From e2ab467b905b57be574e97bc529eddd0a4474633 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 20 May 2014 18:21:51 +0000 Subject: [PATCH 001/906] PR19767: DebugInfo emission of pointer constants. In refactoring DwarfUnit::isUnsignedDIType I restricted it to only work on values with signedness (unsigned or signed), asserting on anything else (which did uncover some bugs). But it turns out that we do need to emit constants of signless data, such as pointer constants - only null pointer constants are known to need this so far, but it's conceivable that there might be non-null pointer constants at some point (hardcoded address offsets for device drivers?). This patch just uses 'unsigned' for signless data such as pointer constants. Arguably we could use signless representations (DW_FORM_dataN) instead, allowing a trinary result from isUnsignedDIType (signed, unsigned, signless), but this seems reasonable for now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209223 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 15 ++++++-- test/DebugInfo/constant-pointers.ll | 51 ++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 2 deletions(-) create mode 100644 test/DebugInfo/constant-pointers.ll diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 8adf78ca8546..6b5417306917 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -749,8 +749,19 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) { DIDerivedType DTy(Ty); if (DTy.isDerivedType()) { - if (DIType Deriv = DD->resolve(DTy.getTypeDerivedFrom())) - return isUnsignedDIType(DD, Deriv); + dwarf::Tag T = (dwarf::Tag)Ty.getTag(); + // Encode pointer constants as unsigned bytes. This is used at least for + // null pointer constant emission. Maybe DW_TAG_reference_type should be + // accepted here too, if there are ways to produce compile-time constant + // references. + if (T == dwarf::DW_TAG_pointer_type || + T == dwarf::DW_TAG_ptr_to_member_type) + return true; + assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type || + T == dwarf::DW_TAG_volatile_type || + T == dwarf::DW_TAG_restrict_type); + if (DITypeRef Deriv = DTy.getTypeDerivedFrom()) + return isUnsignedDIType(DD, DD->resolve(Deriv)); // FIXME: Enums without a fixed underlying type have unknown signedness // here, leading to incorrectly emitted constants. assert(DTy.getTag() == dwarf::DW_TAG_enumeration_type); diff --git a/test/DebugInfo/constant-pointers.ll b/test/DebugInfo/constant-pointers.ll new file mode 100644 index 000000000000..fdde06d4a2b2 --- /dev/null +++ b/test/DebugInfo/constant-pointers.ll @@ -0,0 +1,51 @@ +; REQUIRES: object-emission + +; RUN: %llc_dwarf -O0 -filetype=obj %s -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s + +; Ensure that pointer constants are emitted as unsigned data. Alternatively, +; these could be signless data (dataN). + +; Built with Clang from: +; template +; void func() {} +; template void func(); + +; CHECK: DW_TAG_subprogram +; CHECK: DW_TAG_template_value_parameter +; CHECK: DW_AT_name {{.*}} "V" +; CHECK: DW_AT_const_value [DW_FORM_udata] (0) +; CHECK: DW_TAG_template_value_parameter +; CHECK: DW_AT_name {{.*}} "F" +; CHECK: DW_AT_const_value [DW_FORM_udata] (0) + +; Function Attrs: nounwind uwtable +define weak_odr void @_Z4funcILPv0ELPFvvE0ELi42EEvv() #0 { +entry: + ret void, !dbg !18 +} + +attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!15, !16} +!llvm.ident = !{!17} + +!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/constant-pointers.cpp] [DW_LANG_C_plus_plus] +!1 = metadata !{metadata !"constant-pointers.cpp", metadata !"/tmp/dbginfo"} +!2 = metadata !{} +!3 = metadata !{metadata !4} +!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"func", metadata !"func", metadata !"_Z4funcILPv0ELPFvvE0ELi42EEvv", i32 2, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z4funcILPv0ELPFvvE0ELi42EEvv, metadata !8, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [func] +!5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [/tmp/dbginfo/constant-pointers.cpp] +!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = metadata !{null} +!8 = metadata !{metadata !9, metadata !11, metadata !13} +!9 = metadata !{i32 786480, null, metadata !"V", metadata !10, i8 0, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ] +!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ] +!11 = metadata !{i32 786480, null, metadata !"F", metadata !12, i8 0, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ] +!12 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ] +!13 = metadata !{i32 786480, null, metadata !"i", metadata !14, i32 42, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ] +!14 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!15 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} +!16 = metadata !{i32 2, metadata !"Debug Info Version", i32 1} +!17 = metadata !{metadata !"clang version 3.5.0 "} +!18 = metadata !{i32 3, i32 0, metadata !4, null} From 447a7ce76cbec90aaa14b22592babbb4aa0f0993 Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Tue, 20 May 2014 18:34:54 +0000 Subject: [PATCH 002/906] Rewrite calculateDbgValueHistory to make it (hopefully) more transparent. This change preserves the original algorithm of generating history for user variables, but makes it more clear. High-level description of algorithm: Scan all the machine basic blocks and machine instructions in the order they are emitted to the object file. Do the following: 1) If we see a DBG_VALUE instruction, add it to the history of the corresponding user variable. Keep track of all user variables, whose locations are described by a register. 2) If we see a regular instruction, look at all the registers it clobbers, and terminate the location range for all variables described by these registers. 3) At the end of the basic block, terminate location ranges for all user variables described by some register. Although this change shouldn't be user-visible (the contents of .debug_loc section should be the same), it changes some internal assumptions about the set of instructions used to track the variable locations. Watching the bots. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209225 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../AsmPrinter/DbgValueHistoryCalculator.cpp | 251 ++++++++++-------- .../AsmPrinter/DbgValueHistoryCalculator.h | 7 +- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 6 +- 3 files changed, 142 insertions(+), 122 deletions(-) diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp index 874b8615b94f..c9bf1ecf9075 100644 --- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp +++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp @@ -8,141 +8,160 @@ //===----------------------------------------------------------------------===// #include "DbgValueHistoryCalculator.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetRegisterInfo.h" +#include +#include #define DEBUG_TYPE "dwarfdebug" namespace llvm { -// Return true if debug value, encoded by DBG_VALUE instruction, is in a -// defined reg. -static bool isDbgValueInDefinedReg(const MachineInstr *MI) { - assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); - return MI->getNumOperands() == 3 && MI->getOperand(0).isReg() && - MI->getOperand(0).getReg() && - (MI->getOperand(1).isImm() || - (MI->getOperand(1).isReg() && MI->getOperand(1).getReg() == 0U)); +namespace { +// Maps physreg numbers to the variables they describe. +typedef std::map> RegDescribedVarsMap; +} + +// \brief If @MI is a DBG_VALUE with debug value described by a +// defined register, returns the number of this register. +// In the other case, returns 0. +static unsigned isDescribedByReg(const MachineInstr &MI) { + assert(MI.isDebugValue()); + assert(MI.getNumOperands() == 3); + // If location of variable is described using a register (directly or + // indirecltly), this register is always a first operand. + return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0; +} + +// \brief Claim that @Var is not described by @RegNo anymore. +static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, + unsigned RegNo, const MDNode *Var) { + const auto &I = RegVars.find(RegNo); + assert(RegNo != 0U && I != RegVars.end()); + auto &VarSet = I->second; + const auto &VarPos = std::find(VarSet.begin(), VarSet.end(), Var); + assert(VarPos != VarSet.end()); + VarSet.erase(VarPos); + // Don't keep empty sets in a map to keep it as small as possible. + if (VarSet.empty()) + RegVars.erase(I); +} + +// \brief Claim that @Var is now described by @RegNo. +static void addRegDescribedVar(RegDescribedVarsMap &RegVars, + unsigned RegNo, const MDNode *Var) { + assert(RegNo != 0U); + RegVars[RegNo].push_back(Var); +} + +static void clobberVariableLocation(SmallVectorImpl &VarHistory, + const MachineInstr &ClobberingInstr) { + assert(!VarHistory.empty()); + // DBG_VALUE we're clobbering should belong to the same MBB. + assert(VarHistory.back()->isDebugValue()); + assert(VarHistory.back()->getParent() == ClobberingInstr.getParent()); + VarHistory.push_back(&ClobberingInstr); +} + +// \brief Terminate the location range for variables described by register +// @RegNo by inserting @ClobberingInstr to their history. +static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo, + DbgValueHistoryMap &HistMap, + const MachineInstr &ClobberingInstr) { + const auto &I = RegVars.find(RegNo); + if (I == RegVars.end()) + return; + // Iterate over all variables described by this register and add this + // instruction to their history, clobbering it. + for (const auto &Var : I->second) + clobberVariableLocation(HistMap[Var], ClobberingInstr); + RegVars.erase(I); +} + +// \brief Terminate the location range for all variables, described by registers +// clobbered by @MI. +static void clobberRegisterUses(RegDescribedVarsMap &RegVars, + const MachineInstr &MI, + const TargetRegisterInfo *TRI, + DbgValueHistoryMap &HistMap) { + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.isDef() || !MO.getReg()) + continue; + for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); + ++AI) { + unsigned RegNo = *AI; + clobberRegisterUses(RegVars, RegNo, HistMap, MI); + } + } +} + +// \brief Terminate the location range for all register-described variables +// by inserting @ClobberingInstr to their history. +static void clobberAllRegistersUses(RegDescribedVarsMap &RegVars, + DbgValueHistoryMap &HistMap, + const MachineInstr &ClobberingInstr) { + for (const auto &I : RegVars) + for (const auto &Var : I.second) + clobberVariableLocation(HistMap[Var], ClobberingInstr); + RegVars.clear(); +} + +// \brief Update the register that describes location of @Var in @RegVars map. +static void +updateRegForVariable(RegDescribedVarsMap &RegVars, const MDNode *Var, + const SmallVectorImpl &VarHistory, + const MachineInstr &MI) { + if (!VarHistory.empty()) { + const MachineInstr &Prev = *VarHistory.back(); + // Check if Var is currently described by a register by instruction in the + // same basic block. + if (Prev.isDebugValue() && Prev.getDebugVariable() == Var && + Prev.getParent() == MI.getParent()) { + if (unsigned PrevReg = isDescribedByReg(Prev)) + dropRegDescribedVar(RegVars, PrevReg, Var); + } + } + + assert(MI.getDebugVariable() == Var); + if (unsigned MIReg = isDescribedByReg(MI)) + addRegDescribedVar(RegVars, MIReg, Var); } void calculateDbgValueHistory(const MachineFunction *MF, const TargetRegisterInfo *TRI, DbgValueHistoryMap &Result) { - // LiveUserVar - Map physreg numbers to the MDNode they contain. - std::vector LiveUserVar(TRI->getNumRegs()); - - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; - ++I) { - bool AtBlockEntry = true; - for (const auto &MI : *I) { - if (MI.isDebugValue()) { - assert(MI.getNumOperands() > 1 && "Invalid machine instruction!"); - - // Keep track of user variables. - const MDNode *Var = MI.getDebugVariable(); - - // Variable is in a register, we need to check for clobbers. - if (isDbgValueInDefinedReg(&MI)) - LiveUserVar[MI.getOperand(0).getReg()] = Var; - - // Check the history of this variable. - SmallVectorImpl &History = Result[Var]; - if (!History.empty()) { - // We have seen this variable before. Try to coalesce DBG_VALUEs. - const MachineInstr *Prev = History.back(); - if (Prev->isDebugValue()) { - // Coalesce identical entries at the end of History. - if (History.size() >= 2 && - Prev->isIdenticalTo(History[History.size() - 2])) { - DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" - << "\t" << *Prev << "\t" - << *History[History.size() - 2] << "\n"); - History.pop_back(); - } - - // Terminate old register assignments that don't reach MI; - MachineFunction::const_iterator PrevMBB = Prev->getParent(); - if (PrevMBB != I && (!AtBlockEntry || std::next(PrevMBB) != I) && - isDbgValueInDefinedReg(Prev)) { - // Previous register assignment needs to terminate at the end of - // its basic block. - MachineBasicBlock::const_iterator LastMI = - PrevMBB->getLastNonDebugInstr(); - if (LastMI == PrevMBB->end()) { - // Drop DBG_VALUE for empty range. - DEBUG(dbgs() << "Dropping DBG_VALUE for empty range:\n" - << "\t" << *Prev << "\n"); - History.pop_back(); - } else if (std::next(PrevMBB) != PrevMBB->getParent()->end()) - // Terminate after LastMI. - History.push_back(LastMI); - } - } - } - History.push_back(&MI); - } else { - // Not a DBG_VALUE instruction. - if (!MI.isPosition()) - AtBlockEntry = false; - - // Check if the instruction clobbers any registers with debug vars. - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg() || !MO.isDef() || !MO.getReg()) - continue; - for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); - ++AI) { - unsigned Reg = *AI; - const MDNode *Var = LiveUserVar[Reg]; - if (!Var) - continue; - // Reg is now clobbered. - LiveUserVar[Reg] = nullptr; - - // Was MD last defined by a DBG_VALUE referring to Reg? - auto HistI = Result.find(Var); - if (HistI == Result.end()) - continue; - SmallVectorImpl &History = HistI->second; - if (History.empty()) - continue; - const MachineInstr *Prev = History.back(); - // Sanity-check: Register assignments are terminated at the end of - // their block. - if (!Prev->isDebugValue() || Prev->getParent() != MI.getParent()) - continue; - // Is the variable still in Reg? - if (!isDbgValueInDefinedReg(Prev) || - Prev->getOperand(0).getReg() != Reg) - continue; - // Var is clobbered. Make sure the next instruction gets a label. - History.push_back(&MI); - } - } + RegDescribedVarsMap RegVars; + + for (const auto &MBB : *MF) { + for (const auto &MI : MBB) { + if (!MI.isDebugValue()) { + // Not a DBG_VALUE instruction. It may clobber registers which describe + // some variables. + clobberRegisterUses(RegVars, MI, TRI, Result); + continue; } - } - } - // Make sure the final register assignments are terminated. - for (auto &I : Result) { - SmallVectorImpl &History = I.second; - if (History.empty()) - continue; + const MDNode *Var = MI.getDebugVariable(); + auto &History = Result[Var]; - const MachineInstr *Prev = History.back(); - if (Prev->isDebugValue() && isDbgValueInDefinedReg(Prev)) { - const MachineBasicBlock *PrevMBB = Prev->getParent(); - MachineBasicBlock::const_iterator LastMI = - PrevMBB->getLastNonDebugInstr(); - if (LastMI == PrevMBB->end()) - // Drop DBG_VALUE for empty range. - History.pop_back(); - else if (PrevMBB != &PrevMBB->getParent()->back()) { - // Terminate after LastMI. - History.push_back(LastMI); + if (!History.empty() && History.back()->isIdenticalTo(&MI)) { + DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" + << "\t" << History.back() << "\t" << MI << "\n"); + continue; } + + updateRegForVariable(RegVars, Var, History, MI); + History.push_back(&MI); } + + // Make sure locations for register-described variables are valid only + // until the end of the basic block (unless it's the last basic block, in + // which case let their liveness run off to the end of the function). + if (!MBB.empty() && &MBB != &MF->back()) + clobberAllRegistersUses(RegVars, Result, MBB.back()); } } diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h index 2945c1522de3..db5116df1bb2 100644 --- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h +++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h @@ -20,9 +20,10 @@ class MachineInstr; class MDNode; class TargetRegisterInfo; -// For each user variable, keep a list of DBG_VALUE instructions in order. -// The list can also contain normal instructions that clobber the previous -// DBG_VALUE. The variables are listed in order of appearance. +// For each user variable, keep a list of DBG_VALUE instructions for it +// in the order of appearance. The list can also contain another +// instructions, which are assumed to clobber the previous DBG_VALUE. +// The variables are listed in order of appearance. typedef MapVector> DbgValueHistoryMap; diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 5688d55fafdc..641ebb68dc21 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1201,10 +1201,10 @@ DwarfDebug::collectVariableInfo(SmallPtrSet &Processed) { const MachineInstr *End = HI[1]; DEBUG(dbgs() << "DotDebugLoc Pair:\n" << "\t" << *Begin << "\t" << *End << "\n"); - if (End->isDebugValue()) + if (End->isDebugValue() && End->getDebugVariable() == DV) SLabel = getLabelBeforeInsn(End); else { - // End is a normal instruction clobbering the range. + // End is clobbering the range. SLabel = getLabelAfterInsn(End); assert(SLabel && "Forgot label after clobber instruction"); ++HI; @@ -1415,7 +1415,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { LabelsBeforeInsn[History.front()] = FunctionBeginSym; for (const MachineInstr *MI : History) { - if (MI->isDebugValue()) + if (MI->isDebugValue() && MI->getDebugVariable() == DV) requestLabelBeforeInsn(MI); else requestLabelAfterInsn(MI); From 1e3493b04a88fe931f73eebc175eb03c0d165e21 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 20 May 2014 18:36:35 +0000 Subject: [PATCH 003/906] Fix test breakage introduced in r209223. Oops, broke the broken enum constants again. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209226 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 6b5417306917..68673f7fb84c 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -759,7 +759,8 @@ static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) { return true; assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type || T == dwarf::DW_TAG_volatile_type || - T == dwarf::DW_TAG_restrict_type); + T == dwarf::DW_TAG_restrict_type || + T == dwarf::DW_TAG_enumeration_type); if (DITypeRef Deriv = DTy.getTypeDerivedFrom()) return isUnsignedDIType(DD, DD->resolve(Deriv)); // FIXME: Enums without a fixed underlying type have unknown signedness From 8598e290c3ef00108d1e59871f7c2665be44273a Mon Sep 17 00:00:00 2001 From: "Duncan P. N. Exon Smith" Date: Tue, 20 May 2014 19:00:58 +0000 Subject: [PATCH 004/906] GlobalValue: Automatically reset visibility when setting local linkage r208264 started asserting in `setLinkage()` and `setVisibility()` that visibility and linkage are compatible. There are a few places in clang where visibility is set first, and then linkage later, so the assert fires. In `setLinkage()`, it's clear what the visibility *should* be, so rather than updating all the call sites just automatically fix the visibility. The testcase for this is for *clang*, so it'll follow separately in cfe. PR19760 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209227 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/GlobalValue.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/llvm/IR/GlobalValue.h b/include/llvm/IR/GlobalValue.h index 0ed302cdb4b8..10df372945a9 100644 --- a/include/llvm/IR/GlobalValue.h +++ b/include/llvm/IR/GlobalValue.h @@ -222,8 +222,8 @@ class GlobalValue : public Constant { bool hasCommonLinkage() const { return isCommonLinkage(Linkage); } void setLinkage(LinkageTypes LT) { - assert((!isLocalLinkage(LT) || hasDefaultVisibility()) && - "local linkage requires default visibility"); + if (isLocalLinkage(LT)) + Visibility = DefaultVisibility; Linkage = LT; } LinkageTypes getLinkage() const { return Linkage; } From 50d4008b471858a5dace11cc046e997bb0cf95eb Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Tue, 20 May 2014 19:25:04 +0000 Subject: [PATCH 005/906] [LSR] Canonicalize reg1 + ... + regN into reg1 + ... + 1*regN. This commit introduces a canonical representation for the formulae. Basically, as soon as a formula has more that one base register, the scaled register field is used for one of them. The register put into the scaled register is preferably a loop variant. The commit refactors how the formulae are built in order to produce such representation. This yields a more accurate, but still perfectible, cost model. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209230 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 558 +++++++++++++------ test/CodeGen/X86/avoid_complex_am.ll | 11 +- test/CodeGen/X86/masked-iv-safe.ll | 6 +- 3 files changed, 385 insertions(+), 190 deletions(-) diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 13e4fceec664..914b56aa8167 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -238,7 +238,15 @@ struct Formula { int64_t Scale; /// BaseRegs - The list of "base" registers for this use. When this is - /// non-empty, + /// non-empty. The canonical representation of a formula is + /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and + /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty(). + /// #1 enforces that the scaled register is always used when at least two + /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2. + /// #2 enforces that 1 * reg is reg. + /// This invariant can be temporarly broken while building a formula. + /// However, every formula inserted into the LSRInstance must be in canonical + /// form. SmallVector BaseRegs; /// ScaledReg - The 'scaled' register for this use. This should be non-null @@ -256,6 +264,12 @@ struct Formula { void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE); + bool isCanonical() const; + + void Canonicalize(); + + bool Unscale(); + size_t getNumRegs() const; Type *getType() const; @@ -346,6 +360,52 @@ void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) { BaseRegs.push_back(Sum); HasBaseReg = true; } + Canonicalize(); +} + +/// \brief Check whether or not this formula statisfies the canonical +/// representation. +/// \see Formula::BaseRegs. +bool Formula::isCanonical() const { + if (ScaledReg) + return Scale != 1 || !BaseRegs.empty(); + return BaseRegs.size() <= 1; +} + +/// \brief Helper method to morph a formula into its canonical representation. +/// \see Formula::BaseRegs. +/// Every formula having more than one base register, must use the ScaledReg +/// field. Otherwise, we would have to do special cases everywhere in LSR +/// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ... +/// On the other hand, 1*reg should be canonicalized into reg. +void Formula::Canonicalize() { + if (isCanonical()) + return; + // So far we did not need this case. This is easy to implement but it is + // useless to maintain dead code. Beside it could hurt compile time. + assert(!BaseRegs.empty() && "1*reg => reg, should not be needed."); + // Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg. + ScaledReg = BaseRegs.back(); + BaseRegs.pop_back(); + Scale = 1; + size_t BaseRegsSize = BaseRegs.size(); + size_t Try = 0; + // If ScaledReg is an invariant, try to find a variant expression. + while (Try < BaseRegsSize && !isa(ScaledReg)) + std::swap(ScaledReg, BaseRegs[Try++]); +} + +/// \brief Get rid of the scale in the formula. +/// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2. +/// \return true if it was possible to get rid of the scale, false otherwise. +/// \note After this operation the formula may not be in the canonical form. +bool Formula::Unscale() { + if (Scale != 1) + return false; + Scale = 0; + BaseRegs.push_back(ScaledReg); + ScaledReg = nullptr; + return true; } /// getNumRegs - Return the total number of register operands used by this @@ -776,9 +836,18 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl &DeadInsts) { namespace { class LSRUse; } -// Check if it is legal to fold 2 base registers. -static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU, - const Formula &F); + +/// \brief Check if the addressing mode defined by \p F is completely +/// folded in \p LU at isel time. +/// This includes address-mode folding and special icmp tricks. +/// This function returns true if \p LU can accommodate what \p F +/// defines and up to 1 base + 1 scaled + offset. +/// In other words, if \p F has several base registers, this function may +/// still return true. Therefore, users still need to account for +/// additional base registers and/or unfolded offsets to derive an +/// accurate cost model. +static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, + const LSRUse &LU, const Formula &F); // Get the cost of the scaling factor used in F for LU. static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F); @@ -922,6 +991,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, ScalarEvolution &SE, DominatorTree &DT, const LSRUse &LU, SmallPtrSet *LoserRegs) { + assert(F.isCanonical() && "Cost is accurate only for canonical formula"); // Tally up the registers. if (const SCEV *ScaledReg = F.ScaledReg) { if (VisitedRegs.count(ScaledReg)) { @@ -945,11 +1015,13 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, } // Determine how many (unfolded) adds we'll need inside the loop. - size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0); + size_t NumBaseParts = F.getNumRegs(); if (NumBaseParts > 1) // Do not count the base and a possible second register if the target // allows to fold 2 registers. - NumBaseAdds += NumBaseParts - (1 + isLegal2RegAMUse(TTI, LU, F)); + NumBaseAdds += + NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F))); + NumBaseAdds += (F.UnfoldedOffset != 0); // Accumulate non-free scaling amounts. ScaleCost += getScalingFactorCost(TTI, LU, F); @@ -1210,7 +1282,10 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { /// InsertFormula - If the given formula has not yet been inserted, add it to /// the list, and return true. Return false otherwise. +/// The formula must be in canonical form. bool LSRUse::InsertFormula(const Formula &F) { + assert(F.isCanonical() && "Invalid canonical representation"); + if (!Formulae.empty() && RigidFormula) return false; @@ -1236,6 +1311,8 @@ bool LSRUse::InsertFormula(const Formula &F) { // Record registers now being used by this use. Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); + if (F.ScaledReg) + Regs.insert(F.ScaledReg); return true; } @@ -1302,12 +1379,10 @@ void LSRUse::dump() const { } #endif -/// isLegalUse - Test whether the use described by AM is "legal", meaning it can -/// be completely folded into the user instruction at isel time. This includes -/// address-mode folding and special icmp tricks. -static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind, - Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, - bool HasBaseReg, int64_t Scale) { +static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, + LSRUse::KindType Kind, Type *AccessTy, + GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg, int64_t Scale) { switch (Kind) { case LSRUse::Address: return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale); @@ -1358,10 +1433,11 @@ static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind, llvm_unreachable("Invalid LSRUse Kind!"); } -static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, - int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy, - GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, - int64_t Scale) { +static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, + int64_t MinOffset, int64_t MaxOffset, + LSRUse::KindType Kind, Type *AccessTy, + GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg, int64_t Scale) { // Check for overflow. if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) != (MinOffset > 0)) @@ -1372,9 +1448,41 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, return false; MaxOffset = (uint64_t)BaseOffset + MaxOffset; - return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg, - Scale) && - isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale); + return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset, + HasBaseReg, Scale) && + isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset, + HasBaseReg, Scale); +} + +static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, + int64_t MinOffset, int64_t MaxOffset, + LSRUse::KindType Kind, Type *AccessTy, + const Formula &F) { + // For the purpose of isAMCompletelyFolded either having a canonical formula + // or a scale not equal to zero is correct. + // Problems may arise from non canonical formulae having a scale == 0. + // Strictly speaking it would best to just rely on canonical formulae. + // However, when we generate the scaled formulae, we first check that the + // scaling factor is profitable before computing the actual ScaledReg for + // compile time sake. + assert((F.isCanonical() || F.Scale != 0)); + return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, + F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale); +} + +/// isLegalUse - Test whether we know how to expand the current formula. +static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, + int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy, + GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, + int64_t Scale) { + // We know how to expand completely foldable formulae. + return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV, + BaseOffset, HasBaseReg, Scale) || + // Or formulae that use a base register produced by a sum of base + // registers. + (Scale == 1 && + isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, + BaseGV, BaseOffset, true, 0)); } static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, @@ -1384,36 +1492,23 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, F.BaseOffset, F.HasBaseReg, F.Scale); } -static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU, - const Formula &F) { - // If F is used as an Addressing Mode, it may fold one Base plus one - // scaled register. If the scaled register is nil, do as if another - // element of the base regs is a 1-scaled register. - // This is possible if BaseRegs has at least 2 registers. - - // If this is not an address calculation, this is not an addressing mode - // use. - if (LU.Kind != LSRUse::Address) - return false; - - // F is already scaled. - if (F.Scale != 0) - return false; - - // We need to keep one register for the base and one to scale. - if (F.BaseRegs.size() < 2) - return false; - - return isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, - F.BaseGV, F.BaseOffset, F.HasBaseReg, 1); - } +static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, + const LSRUse &LU, const Formula &F) { + return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, + LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg, + F.Scale); +} static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F) { if (!F.Scale) return 0; - assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, - LU.AccessTy, F) && "Illegal formula in use."); + + // If the use is not completely folded in that instruction, we will have to + // pay an extra cost only for scale != 1. + if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, + LU.AccessTy, F)) + return F.Scale != 1; switch (LU.Kind) { case LSRUse::Address: { @@ -1432,12 +1527,10 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, return std::max(ScaleCostMinOffset, ScaleCostMaxOffset); } case LSRUse::ICmpZero: - // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg. - // Therefore, return 0 in case F.Scale == -1. - return F.Scale != -1; - case LSRUse::Basic: case LSRUse::Special: + // The use is completely folded, i.e., everything is folded into the + // instruction. return 0; } @@ -1462,7 +1555,8 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI, HasBaseReg = true; } - return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale); + return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset, + HasBaseReg, Scale); } static bool isAlwaysFoldable(const TargetTransformInfo &TTI, @@ -1487,8 +1581,8 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI, // base and a scale. int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1; - return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV, - BaseOffset, HasBaseReg, Scale); + return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV, + BaseOffset, HasBaseReg, Scale); } namespace { @@ -1644,8 +1738,19 @@ class LSRInstance { void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base, unsigned Depth = 0); + + void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, + const Formula &Base, unsigned Depth, + size_t Idx, bool IsScaledReg = false); void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base); + void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx, + const Formula &Base, size_t Idx, + bool IsScaledReg = false); void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base); + void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx, + const Formula &Base, + const SmallVectorImpl &Worklist, + size_t Idx, bool IsScaledReg = false); void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base); void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base); void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base); @@ -2148,23 +2253,25 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, // the uses will have all its uses outside the loop, for example. if (LU.Kind != Kind) return false; + + // Check for a mismatched access type, and fall back conservatively as needed. + // TODO: Be less conservative when the type is similar and can use the same + // addressing modes. + if (Kind == LSRUse::Address && AccessTy != LU.AccessTy) + NewAccessTy = Type::getVoidTy(AccessTy->getContext()); + // Conservatively assume HasBaseReg is true for now. if (NewOffset < LU.MinOffset) { - if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr, + if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr, LU.MaxOffset - NewOffset, HasBaseReg)) return false; NewMinOffset = NewOffset; } else if (NewOffset > LU.MaxOffset) { - if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr, + if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr, NewOffset - LU.MinOffset, HasBaseReg)) return false; NewMaxOffset = NewOffset; } - // Check for a mismatched access type, and fall back conservatively as needed. - // TODO: Be less conservative when the type is similar and can use the same - // addressing modes. - if (Kind == LSRUse::Address && AccessTy != LU.AccessTy) - NewAccessTy = Type::getVoidTy(AccessTy->getContext()); // Update the use. LU.MinOffset = NewMinOffset; @@ -2994,6 +3101,9 @@ void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) { /// InsertFormula - If the given formula has not yet been inserted, add it to /// the list, and return true. Return false otherwise. bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) { + // Do not insert formula that we will not be able to expand. + assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) && + "Formula is illegal"); if (!LU.InsertFormula(F)) return false; @@ -3149,84 +3259,104 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C, return S; } -/// GenerateReassociations - Split out subexpressions from adds and the bases of -/// addrecs. -void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, - Formula Base, - unsigned Depth) { - // Arbitrarily cap recursion to protect compile time. - if (Depth >= 3) return; - - for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { - const SCEV *BaseReg = Base.BaseRegs[i]; +/// \brief Helper function for LSRInstance::GenerateReassociations. +void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, + const Formula &Base, + unsigned Depth, size_t Idx, + bool IsScaledReg) { + const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx]; + SmallVector AddOps; + const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE); + if (Remainder) + AddOps.push_back(Remainder); + + if (AddOps.size() == 1) + return; - SmallVector AddOps; - const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE); - if (Remainder) - AddOps.push_back(Remainder); + for (SmallVectorImpl::const_iterator J = AddOps.begin(), + JE = AddOps.end(); + J != JE; ++J) { - if (AddOps.size() == 1) continue; + // Loop-variant "unknown" values are uninteresting; we won't be able to + // do anything meaningful with them. + if (isa(*J) && !SE.isLoopInvariant(*J, L)) + continue; - for (SmallVectorImpl::const_iterator J = AddOps.begin(), - JE = AddOps.end(); J != JE; ++J) { + // Don't pull a constant into a register if the constant could be folded + // into an immediate field. + if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, + LU.AccessTy, *J, Base.getNumRegs() > 1)) + continue; - // Loop-variant "unknown" values are uninteresting; we won't be able to - // do anything meaningful with them. - if (isa(*J) && !SE.isLoopInvariant(*J, L)) - continue; + // Collect all operands except *J. + SmallVector InnerAddOps( + ((const SmallVector &)AddOps).begin(), J); + InnerAddOps.append(std::next(J), + ((const SmallVector &)AddOps).end()); + + // Don't leave just a constant behind in a register if the constant could + // be folded into an immediate field. + if (InnerAddOps.size() == 1 && + isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, + LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1)) + continue; - // Don't pull a constant into a register if the constant could be folded - // into an immediate field. - if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, - LU.AccessTy, *J, Base.getNumRegs() > 1)) - continue; + const SCEV *InnerSum = SE.getAddExpr(InnerAddOps); + if (InnerSum->isZero()) + continue; + Formula F = Base; - // Collect all operands except *J. - SmallVector InnerAddOps( - ((const SmallVector &)AddOps).begin(), J); - InnerAddOps.append(std::next(J), - ((const SmallVector &)AddOps).end()); - - // Don't leave just a constant behind in a register if the constant could - // be folded into an immediate field. - if (InnerAddOps.size() == 1 && - isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, - LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1)) - continue; + // Add the remaining pieces of the add back into the new formula. + const SCEVConstant *InnerSumSC = dyn_cast(InnerSum); + if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 && + TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + + InnerSumSC->getValue()->getZExtValue())) { + F.UnfoldedOffset = + (uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue(); + if (IsScaledReg) + F.ScaledReg = nullptr; + else + F.BaseRegs.erase(F.BaseRegs.begin() + Idx); + } else if (IsScaledReg) + F.ScaledReg = InnerSum; + else + F.BaseRegs[Idx] = InnerSum; + + // Add J as its own register, or an unfolded immediate. + const SCEVConstant *SC = dyn_cast(*J); + if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 && + TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + + SC->getValue()->getZExtValue())) + F.UnfoldedOffset = + (uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue(); + else + F.BaseRegs.push_back(*J); + // We may have changed the number of register in base regs, adjust the + // formula accordingly. + F.Canonicalize(); + + if (InsertFormula(LU, LUIdx, F)) + // If that formula hadn't been seen before, recurse to find more like + // it. + GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth + 1); + } +} - const SCEV *InnerSum = SE.getAddExpr(InnerAddOps); - if (InnerSum->isZero()) - continue; - Formula F = Base; +/// GenerateReassociations - Split out subexpressions from adds and the bases of +/// addrecs. +void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, + Formula Base, unsigned Depth) { + assert(Base.isCanonical() && "Input must be in the canonical form"); + // Arbitrarily cap recursion to protect compile time. + if (Depth >= 3) + return; - // Add the remaining pieces of the add back into the new formula. - const SCEVConstant *InnerSumSC = dyn_cast(InnerSum); - if (InnerSumSC && - SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 && - TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + - InnerSumSC->getValue()->getZExtValue())) { - F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset + - InnerSumSC->getValue()->getZExtValue(); - F.BaseRegs.erase(F.BaseRegs.begin() + i); - } else - F.BaseRegs[i] = InnerSum; - - // Add J as its own register, or an unfolded immediate. - const SCEVConstant *SC = dyn_cast(*J); - if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 && - TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + - SC->getValue()->getZExtValue())) - F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset + - SC->getValue()->getZExtValue(); - else - F.BaseRegs.push_back(*J); + for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) + GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i); - if (InsertFormula(LU, LUIdx, F)) - // If that formula hadn't been seen before, recurse to find more like - // it. - GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1); - } - } + if (Base.Scale == 1) + GenerateReassociationsImpl(LU, LUIdx, Base, Depth, + /* Idx */ -1, /* IsScaledReg */ true); } /// GenerateCombinations - Generate a formula consisting of all of the @@ -3234,8 +3364,12 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base) { // This method is only interesting on a plurality of registers. - if (Base.BaseRegs.size() <= 1) return; + if (Base.BaseRegs.size() + (Base.Scale == 1) <= 1) + return; + // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before + // processing the formula. + Base.Unscale(); Formula F = Base; F.BaseRegs.clear(); SmallVector Ops; @@ -3255,29 +3389,87 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx, // rather than proceed with zero in a register. if (!Sum->isZero()) { F.BaseRegs.push_back(Sum); + F.Canonicalize(); (void)InsertFormula(LU, LUIdx, F); } } } +/// \brief Helper function for LSRInstance::GenerateSymbolicOffsets. +void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx, + const Formula &Base, size_t Idx, + bool IsScaledReg) { + const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx]; + GlobalValue *GV = ExtractSymbol(G, SE); + if (G->isZero() || !GV) + return; + Formula F = Base; + F.BaseGV = GV; + if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) + return; + if (IsScaledReg) + F.ScaledReg = G; + else + F.BaseRegs[Idx] = G; + (void)InsertFormula(LU, LUIdx, F); +} + /// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets. void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base) { // We can't add a symbolic offset if the address already contains one. if (Base.BaseGV) return; - for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { - const SCEV *G = Base.BaseRegs[i]; - GlobalValue *GV = ExtractSymbol(G, SE); - if (G->isZero() || !GV) - continue; + for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) + GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i); + if (Base.Scale == 1) + GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1, + /* IsScaledReg */ true); +} + +/// \brief Helper function for LSRInstance::GenerateConstantOffsets. +void LSRInstance::GenerateConstantOffsetsImpl( + LSRUse &LU, unsigned LUIdx, const Formula &Base, + const SmallVectorImpl &Worklist, size_t Idx, bool IsScaledReg) { + const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx]; + for (SmallVectorImpl::const_iterator I = Worklist.begin(), + E = Worklist.end(); + I != E; ++I) { Formula F = Base; - F.BaseGV = GV; - if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) - continue; - F.BaseRegs[i] = G; - (void)InsertFormula(LU, LUIdx, F); + F.BaseOffset = (uint64_t)Base.BaseOffset - *I; + if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind, + LU.AccessTy, F)) { + // Add the offset to the base register. + const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G); + // If it cancelled out, drop the base register, otherwise update it. + if (NewG->isZero()) { + if (IsScaledReg) { + F.Scale = 0; + F.ScaledReg = nullptr; + } else + F.DeleteBaseReg(F.BaseRegs[Idx]); + F.Canonicalize(); + } else if (IsScaledReg) + F.ScaledReg = NewG; + else + F.BaseRegs[Idx] = NewG; + + (void)InsertFormula(LU, LUIdx, F); + } } + + int64_t Imm = ExtractImmediate(G, SE); + if (G->isZero() || Imm == 0) + return; + Formula F = Base; + F.BaseOffset = (uint64_t)F.BaseOffset + Imm; + if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) + return; + if (IsScaledReg) + F.ScaledReg = G; + else + F.BaseRegs[Idx] = G; + (void)InsertFormula(LU, LUIdx, F); } /// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets. @@ -3290,38 +3482,11 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, if (LU.MaxOffset != LU.MinOffset) Worklist.push_back(LU.MaxOffset); - for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { - const SCEV *G = Base.BaseRegs[i]; - - for (SmallVectorImpl::const_iterator I = Worklist.begin(), - E = Worklist.end(); I != E; ++I) { - Formula F = Base; - F.BaseOffset = (uint64_t)Base.BaseOffset - *I; - if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind, - LU.AccessTy, F)) { - // Add the offset to the base register. - const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G); - // If it cancelled out, drop the base register, otherwise update it. - if (NewG->isZero()) { - std::swap(F.BaseRegs[i], F.BaseRegs.back()); - F.BaseRegs.pop_back(); - } else - F.BaseRegs[i] = NewG; - - (void)InsertFormula(LU, LUIdx, F); - } - } - - int64_t Imm = ExtractImmediate(G, SE); - if (G->isZero() || Imm == 0) - continue; - Formula F = Base; - F.BaseOffset = (uint64_t)F.BaseOffset + Imm; - if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) - continue; - F.BaseRegs[i] = G; - (void)InsertFormula(LU, LUIdx, F); - } + for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) + GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i); + if (Base.Scale == 1) + GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1, + /* IsScaledReg */ true); } /// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up @@ -3421,7 +3586,11 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { if (!IntTy) return; // If this Formula already has a scaled register, we can't add another one. - if (Base.Scale != 0) return; + // Try to unscale the formula to generate a better scale. + if (Base.Scale != 0 && !Base.Unscale()) + return; + + assert(Base.Scale == 0 && "Unscale did not did its job!"); // Check each interesting stride. for (SmallSetVector::const_iterator @@ -3462,6 +3631,11 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { Formula F = Base; F.ScaledReg = Quotient; F.DeleteBaseReg(F.BaseRegs[i]); + // The canonical representation of 1*reg is reg, which is already in + // Base. In that case, do not try to insert the formula, it will be + // rejected anyway. + if (F.Scale == 1 && F.BaseRegs.empty()) + continue; (void)InsertFormula(LU, LUIdx, F); } } @@ -3626,7 +3800,12 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { // TODO: Use a more targeted data structure. for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) { - const Formula &F = LU.Formulae[L]; + Formula F = LU.Formulae[L]; + // FIXME: The code for the scaled and unscaled registers looks + // very similar but slightly different. Investigate if they + // could be merged. That way, we would not have to unscale the + // Formula. + F.Unscale(); // Use the immediate in the scaled register. if (F.ScaledReg == OrigReg) { int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale; @@ -3652,6 +3831,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { continue; // OK, looks good. + NewF.Canonicalize(); (void)InsertFormula(LU, LUIdx, NewF); } else { // Use the immediate in a base register. @@ -3685,6 +3865,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { goto skip_formula; // Ok, looks good. + NewF.Canonicalize(); (void)InsertFormula(LU, LUIdx, NewF); break; skip_formula:; @@ -3938,7 +4119,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { for (SmallVectorImpl::const_iterator I = LU.Formulae.begin(), E = LU.Formulae.end(); I != E; ++I) { const Formula &F = *I; - if (F.BaseOffset == 0 || F.Scale != 0) + if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1)) continue; LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU); @@ -4399,25 +4580,34 @@ Value *LSRInstance::Expand(const LSRFixup &LF, Loops, SE, DT); if (LU.Kind == LSRUse::ICmpZero) { - // An interesting way of "folding" with an icmp is to use a negated - // scale, which we'll implement by inserting it into the other operand - // of the icmp. - assert(F.Scale == -1 && - "The only scale supported by ICmpZero uses is -1!"); - ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, IP); + // Expand ScaleReg as if it was part of the base regs. + if (F.Scale == 1) + Ops.push_back( + SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP))); + else { + // An interesting way of "folding" with an icmp is to use a negated + // scale, which we'll implement by inserting it into the other operand + // of the icmp. + assert(F.Scale == -1 && + "The only scale supported by ICmpZero uses is -1!"); + ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, IP); + } } else { // Otherwise just expand the scaled register and an explicit scale, // which is expected to be matched as part of the address. // Flush the operand list to suppress SCEVExpander hoisting address modes. - if (!Ops.empty() && LU.Kind == LSRUse::Address) { + // Unless the addressing mode will not be folded. + if (!Ops.empty() && LU.Kind == LSRUse::Address && + isAMCompletelyFolded(TTI, LU, F)) { Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP); Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); } ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP)); - ScaledS = SE.getMulExpr(ScaledS, - SE.getConstant(ScaledS->getType(), F.Scale)); + if (F.Scale != 1) + ScaledS = + SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale)); Ops.push_back(ScaledS); } } @@ -4495,7 +4685,9 @@ Value *LSRInstance::Expand(const LSRFixup &LF, } CI->setOperand(1, ICmpScaledV); } else { - assert(F.Scale == 0 && + // A scale of 1 means that the scale has been expanded as part of the + // base regs. + assert((F.Scale == 0 || F.Scale == 1) && "ICmp does not support folding a global value and " "a scale at the same time!"); Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy), diff --git a/test/CodeGen/X86/avoid_complex_am.ll b/test/CodeGen/X86/avoid_complex_am.ll index 0b7a13d3c091..7f095190ab8f 100644 --- a/test/CodeGen/X86/avoid_complex_am.ll +++ b/test/CodeGen/X86/avoid_complex_am.ll @@ -1,6 +1,9 @@ ; RUN: opt -S -loop-reduce < %s | FileCheck %s ; Complex addressing mode are costly. ; Make loop-reduce prefer unscaled accesses. +; On X86, reg1 + 1*reg2 has the same cost as reg1 + 8*reg2. +; Therefore, LSR currently prefers to fold as much computation as possible +; in the addressing mode. ; target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx" @@ -18,8 +21,8 @@ for.body: ; preds = %for.body, %entry %tmp = add nsw i64 %indvars.iv, -1 %arrayidx = getelementptr inbounds double* %b, i64 %tmp %tmp1 = load double* %arrayidx, align 8 -; The induction variable should carry the scaling factor: 1 * 8 = 8. -; CHECK: [[IVNEXT]] = add nuw nsw i64 [[IV]], 8 +; The induction variable should carry the scaling factor: 1. +; CHECK: [[IVNEXT]] = add nuw nsw i64 [[IV]], 1 %indvars.iv.next = add i64 %indvars.iv, 1 %arrayidx2 = getelementptr inbounds double* %c, i64 %indvars.iv.next %tmp2 = load double* %arrayidx2, align 8 @@ -27,8 +30,8 @@ for.body: ; preds = %for.body, %entry %arrayidx4 = getelementptr inbounds double* %a, i64 %indvars.iv store double %mul, double* %arrayidx4, align 8 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 -; Comparison should be 19 * 8 = 152. -; CHECK: icmp eq i32 {{%[^,]+}}, 152 +; Comparison should be 19 * 1 = 19. +; CHECK: icmp eq i32 {{%[^,]+}}, 19 %exitcond = icmp eq i32 %lftr.wideiv, 20 br i1 %exitcond, label %for.end, label %for.body diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll index 7f61e10f5f68..9ddc84708d5b 100644 --- a/test/CodeGen/X86/masked-iv-safe.ll +++ b/test/CodeGen/X86/masked-iv-safe.ll @@ -5,7 +5,7 @@ ; CHECK-LABEL: count_up ; CHECK-NOT: {{and|movz|sar|shl}} -; CHECK: addq $8, +; CHECK: incq ; CHECK-NOT: {{and|movz|sar|shl}} ; CHECK: jne define void @count_up(double* %d, i64 %n) nounwind { @@ -71,7 +71,7 @@ return: ; CHECK-LABEL: count_up_signed ; CHECK-NOT: {{and|movz|sar|shl}} -; CHECK: addq $8, +; CHECK: incq ; CHECK-NOT: {{and|movz|sar|shl}} ; CHECK: jne define void @count_up_signed(double* %d, i64 %n) nounwind { @@ -242,7 +242,7 @@ return: ; CHECK-LABEL: another_count_down_signed ; CHECK-NOT: {{and|movz|sar|shl}} -; CHECK: addq $-8, +; CHECK: decq ; CHECK-NOT: {{and|movz|sar|shl}} ; CHECK: jne define void @another_count_down_signed(double* %d, i64 %n) nounwind { From e3f74564ffaab3d7f9aad010a38a8a044599a540 Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Tue, 20 May 2014 20:32:18 +0000 Subject: [PATCH 006/906] =?UTF-8?q?Update=20MachOObjectFile::getSymbolAddr?= =?UTF-8?q?ess=20so=20it=20returns=20UnknownAddressOrSize=20for=20undefine?= =?UTF-8?q?d=20symbols.=20=20Allowing=20llvm-nm=20to=20print=20spaces=20in?= =?UTF-8?q?stead=20of=200=E2=80=99s=20for=20the=20value=20of=20undefined?= =?UTF-8?q?=20symbols=20in=20Mach-O=20files.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209235 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/MachOObjectFile.cpp | 12 ++++++++++-- test/Object/nm-trivial-object.test | 8 ++++---- test/Object/nm-universal-binary.test | 2 +- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 0951460ccbb2..9c581d378419 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -472,10 +472,18 @@ error_code MachOObjectFile::getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const { if (is64Bit()) { MachO::nlist_64 Entry = getSymbol64TableEntry(Symb); - Res = Entry.n_value; + if ((Entry.n_type & MachO::N_TYPE) == MachO::N_UNDF && + Entry.n_value == 0) + Res = UnknownAddressOrSize; + else + Res = Entry.n_value; } else { MachO::nlist Entry = getSymbolTableEntry(Symb); - Res = Entry.n_value; + if ((Entry.n_type & MachO::N_TYPE) == MachO::N_UNDF && + Entry.n_value == 0) + Res = UnknownAddressOrSize; + else + Res = Entry.n_value; } return object_error::success; } diff --git a/test/Object/nm-trivial-object.test b/test/Object/nm-trivial-object.test index 111749289807..20ac6621e728 100644 --- a/test/Object/nm-trivial-object.test +++ b/test/Object/nm-trivial-object.test @@ -55,14 +55,14 @@ WEAK-ELF64: 0000000000000000 V x2 ABSOLUTE-ELF64: 0000000000000123 a a1 ABSOLUTE-ELF64: 0000000000000123 A a2 -macho: 00000000 U _SomeOtherFunction +macho: U _SomeOtherFunction macho: 00000000 T _main -macho: 00000000 U _puts +macho: U _puts macho64: 0000000000000028 s L_.str -macho64: 0000000000000000 U _SomeOtherFunction +macho64: U _SomeOtherFunction macho64: 0000000000000000 T _main -macho64: 0000000000000000 U _puts +macho64: U _puts Test that nm uses addresses even with ELF .o files. diff --git a/test/Object/nm-universal-binary.test b/test/Object/nm-universal-binary.test index faf4812e5378..c20c733dcd8b 100644 --- a/test/Object/nm-universal-binary.test +++ b/test/Object/nm-universal-binary.test @@ -13,7 +13,7 @@ CHECK-AR: 0000000000000068 s EH_frame0 CHECK-AR: 000000000000003b s L_.str CHECK-AR: 0000000000000000 T _main CHECK-AR: 0000000000000080 S _main.eh -CHECK-AR: 0000000000000000 U _printf +CHECK-AR: U _printf CHECK-AR: macho-universal-archive.x86_64.i386:i386:foo.o: CHECK-AR: 00000008 S _bar CHECK-AR: 00000000 T _foo From 95aa960b715315bf99918544211d0639b77c0f3a Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Tue, 20 May 2014 21:10:15 +0000 Subject: [PATCH 007/906] Revert r209235 as it broke two tests: Failing Tests (2): LLVM :: ExecutionEngine/MCJIT/stubs-sm-pic.ll LLVM :: ExecutionEngine/MCJIT/stubs.ll git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209236 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/MachOObjectFile.cpp | 12 ++---------- test/Object/nm-trivial-object.test | 8 ++++---- test/Object/nm-universal-binary.test | 2 +- 3 files changed, 7 insertions(+), 15 deletions(-) diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 9c581d378419..0951460ccbb2 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -472,18 +472,10 @@ error_code MachOObjectFile::getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const { if (is64Bit()) { MachO::nlist_64 Entry = getSymbol64TableEntry(Symb); - if ((Entry.n_type & MachO::N_TYPE) == MachO::N_UNDF && - Entry.n_value == 0) - Res = UnknownAddressOrSize; - else - Res = Entry.n_value; + Res = Entry.n_value; } else { MachO::nlist Entry = getSymbolTableEntry(Symb); - if ((Entry.n_type & MachO::N_TYPE) == MachO::N_UNDF && - Entry.n_value == 0) - Res = UnknownAddressOrSize; - else - Res = Entry.n_value; + Res = Entry.n_value; } return object_error::success; } diff --git a/test/Object/nm-trivial-object.test b/test/Object/nm-trivial-object.test index 20ac6621e728..111749289807 100644 --- a/test/Object/nm-trivial-object.test +++ b/test/Object/nm-trivial-object.test @@ -55,14 +55,14 @@ WEAK-ELF64: 0000000000000000 V x2 ABSOLUTE-ELF64: 0000000000000123 a a1 ABSOLUTE-ELF64: 0000000000000123 A a2 -macho: U _SomeOtherFunction +macho: 00000000 U _SomeOtherFunction macho: 00000000 T _main -macho: U _puts +macho: 00000000 U _puts macho64: 0000000000000028 s L_.str -macho64: U _SomeOtherFunction +macho64: 0000000000000000 U _SomeOtherFunction macho64: 0000000000000000 T _main -macho64: U _puts +macho64: 0000000000000000 U _puts Test that nm uses addresses even with ELF .o files. diff --git a/test/Object/nm-universal-binary.test b/test/Object/nm-universal-binary.test index c20c733dcd8b..faf4812e5378 100644 --- a/test/Object/nm-universal-binary.test +++ b/test/Object/nm-universal-binary.test @@ -13,7 +13,7 @@ CHECK-AR: 0000000000000068 s EH_frame0 CHECK-AR: 000000000000003b s L_.str CHECK-AR: 0000000000000000 T _main CHECK-AR: 0000000000000080 S _main.eh -CHECK-AR: U _printf +CHECK-AR: 0000000000000000 U _printf CHECK-AR: macho-universal-archive.x86_64.i386:i386:foo.o: CHECK-AR: 00000008 S _bar CHECK-AR: 00000000 T _foo From 6a9366c0c633c4518d55685d4f3b289ef99c0bd1 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Tue, 20 May 2014 21:25:34 +0000 Subject: [PATCH 008/906] Move the function and data section flags into the options struct and make the functions to set them non-static. Move and rename the llvm specific backend options to avoid conflicting with the clang option. Paired with a backend commit to update. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209238 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/CommandFlags.h | 11 +++++ include/llvm/Target/TargetMachine.h | 8 ++-- include/llvm/Target/TargetOptions.h | 14 ++++-- lib/Target/TargetMachine.cpp | 21 +++----- test/CodeGen/X86/global-sections.ll | 74 ++++++++++++++--------------- test/DebugInfo/X86/cu-ranges.ll | 2 +- 6 files changed, 69 insertions(+), 61 deletions(-) diff --git a/include/llvm/CodeGen/CommandFlags.h b/include/llvm/CodeGen/CommandFlags.h index ac789e4af8a6..2956ad8ea33f 100644 --- a/include/llvm/CodeGen/CommandFlags.h +++ b/include/llvm/CodeGen/CommandFlags.h @@ -193,6 +193,15 @@ cl::opt StartAfter("start-after", cl::value_desc("pass-name"), cl::init("")); +cl::opt DataSections("data-sections", + cl::desc("Emit data into separate sections"), + cl::init(false)); + +cl::opt +FunctionSections("function-sections", + cl::desc("Emit functions into separate sections"), + cl::init(false)); + // Common utility function tightly tied to the options listed here. Initializes // a TargetOptions object with CodeGen flags and returns it. static inline TargetOptions InitTargetOptionsFromCodeGenFlags() { @@ -215,6 +224,8 @@ static inline TargetOptions InitTargetOptionsFromCodeGenFlags() { Options.TrapFuncName = TrapFuncName; Options.PositionIndependentExecutable = EnablePIE; Options.UseInitArray = UseInitArray; + Options.DataSections = DataSections; + Options.FunctionSections = FunctionSections; Options.MCOptions = InitMCTargetOptionsFromFlags(); diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h index bf6963b79659..17ebd07e5d78 100644 --- a/include/llvm/Target/TargetMachine.h +++ b/include/llvm/Target/TargetMachine.h @@ -195,18 +195,18 @@ class TargetMachine { /// getDataSections - Return true if data objects should be emitted into their /// own section, corresponds to -fdata-sections. - static bool getDataSections(); + bool getDataSections() const; /// getFunctionSections - Return true if functions should be emitted into /// their own section, corresponding to -ffunction-sections. - static bool getFunctionSections(); + bool getFunctionSections() const; /// setDataSections - Set if the data are emit into separate sections. - static void setDataSections(bool); + void setDataSections(bool); /// setFunctionSections - Set if the functions are emit into separate /// sections. - static void setFunctionSections(bool); + void setFunctionSections(bool); /// \brief Register analysis passes for this target with a pass manager. virtual void addAnalysisPasses(PassManagerBase &) {} diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h index 188395968bb5..636eaf5c05e9 100644 --- a/include/llvm/Target/TargetOptions.h +++ b/include/llvm/Target/TargetOptions.h @@ -50,10 +50,10 @@ namespace llvm { JITEmitDebugInfoToDisk(false), GuaranteedTailCallOpt(false), DisableTailCalls(false), StackAlignmentOverride(0), EnableFastISel(false), PositionIndependentExecutable(false), - UseInitArray(false), - DisableIntegratedAS(false), CompressDebugSections(false), - TrapUnreachable(false), - TrapFuncName(""), FloatABIType(FloatABI::Default), + UseInitArray(false), DisableIntegratedAS(false), + CompressDebugSections(false), FunctionSections(false), + DataSections(false), TrapUnreachable(false), TrapFuncName(""), + FloatABIType(FloatABI::Default), AllowFPOpFusion(FPOpFusion::Standard) {} /// PrintMachineCode - This flag is enabled when the -print-machineinstrs @@ -164,6 +164,12 @@ namespace llvm { /// Compress DWARF debug sections. unsigned CompressDebugSections : 1; + /// Emit functions into separate sections. + unsigned FunctionSections : 1; + + /// Emit data into separate sections. + unsigned DataSections : 1; + /// Emit target-specific trap instruction for 'unreachable' IR instructions. unsigned TrapUnreachable : 1; diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index f79cdfd0a791..4ccf5194947b 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -36,15 +36,6 @@ namespace llvm { bool AsmVerbosityDefault(false); } -static cl::opt -DataSections("fdata-sections", - cl::desc("Emit data into separate sections"), - cl::init(false)); -static cl::opt -FunctionSections("ffunction-sections", - cl::desc("Emit functions into separate sections"), - cl::init(false)); - //--------------------------------------------------------------------------- // TargetMachine Class // @@ -179,20 +170,20 @@ void TargetMachine::setAsmVerbosityDefault(bool V) { AsmVerbosityDefault = V; } -bool TargetMachine::getFunctionSections() { - return FunctionSections; +bool TargetMachine::getFunctionSections() const { + return Options.FunctionSections; } -bool TargetMachine::getDataSections() { - return DataSections; +bool TargetMachine::getDataSections() const { + return Options.DataSections; } void TargetMachine::setFunctionSections(bool V) { - FunctionSections = V; + Options.FunctionSections = V; } void TargetMachine::setDataSections(bool V) { - DataSections = V; + Options.DataSections = V; } void TargetMachine::getNameWithPrefix(SmallVectorImpl &Name, diff --git a/test/CodeGen/X86/global-sections.ll b/test/CodeGen/X86/global-sections.ll index 7f123c106727..c763f3947e59 100644 --- a/test/CodeGen/X86/global-sections.ll +++ b/test/CodeGen/X86/global-sections.ll @@ -2,8 +2,8 @@ ; RUN: llc < %s -mtriple=i386-apple-darwin9.7 | FileCheck %s -check-prefix=DARWIN ; RUN: llc < %s -mtriple=i386-apple-darwin10 -relocation-model=static | FileCheck %s -check-prefix=DARWIN-STATIC ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s -check-prefix=DARWIN64 -; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -fdata-sections | FileCheck %s -check-prefix=LINUX-SECTIONS -; RUN: llc < %s -mtriple=i686-pc-win32 -fdata-sections -ffunction-sections | FileCheck %s -check-prefix=WIN32-SECTIONS +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -data-sections | FileCheck %s -check-prefix=LINUX-SECTIONS +; RUN: llc < %s -mtriple=i686-pc-win32 -data-sections -function-sections | FileCheck %s -check-prefix=WIN32-SECTIONS define void @F1() { ret void @@ -18,13 +18,13 @@ define void @F1() { ; LINUX: .type G1,@object ; LINUX: .comm G1,4,4 -; DARWIN: .comm _G1,4,2 +; DARWIN: .comm _G1,4,2 ; const int G2 __attribute__((weak)) = 42; -@G2 = weak_odr unnamed_addr constant i32 42 +@G2 = weak_odr unnamed_addr constant i32 42 ; TODO: linux drops this into .rodata, we drop it into ".gnu.linkonce.r.G2" @@ -85,25 +85,25 @@ define void @F1() { ; PR4584 @"foo bar" = linkonce global i32 42 -; LINUX: .type "foo bar",@object +; LINUX: .type "foo bar",@object ; LINUX: .section ".data.foo bar","aGw",@progbits,"foo bar",comdat -; LINUX: .weak "foo bar" +; LINUX: .weak "foo bar" ; LINUX: "foo bar": -; DARWIN: .section __DATA,__datacoal_nt,coalesced -; DARWIN: .globl "_foo bar" -; DARWIN: .weak_definition "_foo bar" +; DARWIN: .section __DATA,__datacoal_nt,coalesced +; DARWIN: .globl "_foo bar" +; DARWIN: .weak_definition "_foo bar" ; DARWIN: "_foo bar": ; PR4650 @G6 = weak_odr unnamed_addr constant [1 x i8] c"\01" -; LINUX: .type G6,@object -; LINUX: .section .rodata.G6,"aG",@progbits,G6,comdat -; LINUX: .weak G6 +; LINUX: .type G6,@object +; LINUX: .section .rodata.G6,"aG",@progbits,G6,comdat +; LINUX: .weak G6 ; LINUX: G6: -; LINUX: .byte 1 -; LINUX: .size G6, 1 +; LINUX: .byte 1 +; LINUX: .size G6, 1 ; DARWIN: .section __TEXT,__const_coal,coalesced ; DARWIN: .globl _G6 @@ -114,58 +114,58 @@ define void @F1() { @G7 = unnamed_addr constant [10 x i8] c"abcdefghi\00" -; DARWIN: __TEXT,__cstring,cstring_literals -; DARWIN: .globl _G7 +; DARWIN: __TEXT,__cstring,cstring_literals +; DARWIN: .globl _G7 ; DARWIN: _G7: -; DARWIN: .asciz "abcdefghi" +; DARWIN: .asciz "abcdefghi" -; LINUX: .section .rodata.str1.1,"aMS",@progbits,1 -; LINUX: .globl G7 +; LINUX: .section .rodata.str1.1,"aMS",@progbits,1 +; LINUX: .globl G7 ; LINUX: G7: -; LINUX: .asciz "abcdefghi" +; LINUX: .asciz "abcdefghi" ; LINUX-SECTIONS: .section .rodata.G7,"aMS",@progbits,1 -; LINUX-SECTIONS: .globl G7 +; LINUX-SECTIONS: .globl G7 ; WIN32-SECTIONS: .section .rdata,"rd",one_only,_G7 -; WIN32-SECTIONS: .globl _G7 +; WIN32-SECTIONS: .globl _G7 @G8 = unnamed_addr constant [4 x i16] [ i16 1, i16 2, i16 3, i16 0 ] -; DARWIN: .section __TEXT,__const -; DARWIN: .globl _G8 +; DARWIN: .section __TEXT,__const +; DARWIN: .globl _G8 ; DARWIN: _G8: -; LINUX: .section .rodata.str2.2,"aMS",@progbits,2 -; LINUX: .globl G8 +; LINUX: .section .rodata.str2.2,"aMS",@progbits,2 +; LINUX: .globl G8 ; LINUX:G8: @G9 = unnamed_addr constant [4 x i32] [ i32 1, i32 2, i32 3, i32 0 ] -; DARWIN: .globl _G9 +; DARWIN: .globl _G9 ; DARWIN: _G9: -; LINUX: .section .rodata.str4.4,"aMS",@progbits,4 -; LINUX: .globl G9 +; LINUX: .section .rodata.str4.4,"aMS",@progbits,4 +; LINUX: .globl G9 ; LINUX:G9 @G10 = weak global [100 x i32] zeroinitializer, align 32 ; <[100 x i32]*> [#uses=0] -; DARWIN: .section __DATA,__datacoal_nt,coalesced +; DARWIN: .section __DATA,__datacoal_nt,coalesced ; DARWIN: .globl _G10 -; DARWIN: .weak_definition _G10 -; DARWIN: .align 5 +; DARWIN: .weak_definition _G10 +; DARWIN: .align 5 ; DARWIN: _G10: -; DARWIN: .space 400 +; DARWIN: .space 400 -; LINUX: .bss -; LINUX: .weak G10 -; LINUX: .align 32 +; LINUX: .bss +; LINUX: .weak G10 +; LINUX: .align 32 ; LINUX: G10: -; LINUX: .zero 400 +; LINUX: .zero 400 diff --git a/test/DebugInfo/X86/cu-ranges.ll b/test/DebugInfo/X86/cu-ranges.ll index e6dc17e2d50e..405a498155f5 100644 --- a/test/DebugInfo/X86/cu-ranges.ll +++ b/test/DebugInfo/X86/cu-ranges.ll @@ -1,4 +1,4 @@ -; RUN: llc -split-dwarf=Enable -O0 %s -ffunction-sections -mtriple=x86_64-unknown-linux-gnu -filetype=obj -o %t +; RUN: llc -split-dwarf=Enable -O0 %s -function-sections -mtriple=x86_64-unknown-linux-gnu -filetype=obj -o %t ; RUN: llvm-dwarfdump -debug-dump=all %t | FileCheck --check-prefix=FUNCTION-SECTIONS %s ; RUN: llvm-readobj --relocations %t | FileCheck --check-prefix=FUNCTION-SECTIONS-RELOCS %s From e94103adcde704725ccfcd1481035bbc301f755a Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 20 May 2014 21:40:13 +0000 Subject: [PATCH 009/906] Unbreak the sanitizer buildbots after r209226 due to SROA issue described in http://reviews.llvm.org/D3714 Undecided whether this should include a test case - SROA produces bad dbg.value metadata describing a value for a reference that is actually the value of the thing the reference refers to. For now, loosening the assert lets this not assert, but it's still bogus/wrong output... If someone wants to tell me to add a test, I'm willing/able, just undecided. Hopefully we'll get SROA fixed soon & we can tighten up this assertion again. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209240 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 68673f7fb84c..f09cb6ade638 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -751,11 +751,14 @@ static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) { if (DTy.isDerivedType()) { dwarf::Tag T = (dwarf::Tag)Ty.getTag(); // Encode pointer constants as unsigned bytes. This is used at least for - // null pointer constant emission. Maybe DW_TAG_reference_type should be - // accepted here too, if there are ways to produce compile-time constant - // references. + // null pointer constant emission. + // FIXME: reference and rvalue_reference /probably/ shouldn't be allowed + // here, but accept them for now due to a bug in SROA producing bogus + // dbg.values. if (T == dwarf::DW_TAG_pointer_type || - T == dwarf::DW_TAG_ptr_to_member_type) + T == dwarf::DW_TAG_ptr_to_member_type || + T == dwarf::DW_TAG_reference_type || + T == dwarf::DW_TAG_rvalue_reference_type) return true; assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type || T == dwarf::DW_TAG_volatile_type || From adf1668bec523d96a6ddc9fffcc7ae092e919197 Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Tue, 20 May 2014 21:47:07 +0000 Subject: [PATCH 010/906] [ARM64] PR19792: Fix cycle in DAG after performPostLD1Combine Povray and dealII currently assert with "Overran sorted position" in AssignTopologicalOrder. The problem is that performPostLD1Combine can introduce cycles. Consider: (insert_vector_elt (INSERT_SUBREG undef, (load (add %vreg0, Constant<8>), undef), <= A TargetConstant<2>), (load %vreg0, undef), <= B Constant<1>) This is turned into a LD1LANEpost node. However the address in A is not a valid user of the post-incremented address of B in LD1LANEpost. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209242 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM64/ARM64ISelLowering.cpp | 7 +++- test/CodeGen/ARM64/indexed-vector-ldst-2.ll | 40 +++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/ARM64/indexed-vector-ldst-2.ll diff --git a/lib/Target/ARM64/ARM64ISelLowering.cpp b/lib/Target/ARM64/ARM64ISelLowering.cpp index 538360cf39dc..385373116de8 100644 --- a/lib/Target/ARM64/ARM64ISelLowering.cpp +++ b/lib/Target/ARM64/ARM64ISelLowering.cpp @@ -7298,6 +7298,7 @@ static SDValue performPostLD1Combine(SDNode *N, } SDValue Addr = LD->getOperand(1); + SDValue Vector = N->getOperand(0); // Search for a use of the address operand that is an increment. for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE = Addr.getNode()->use_end(); UI != UE; ++UI) { @@ -7310,6 +7311,10 @@ static SDValue performPostLD1Combine(SDNode *N, // would create a cycle. if (User->isPredecessorOf(LD) || LD->isPredecessorOf(User)) continue; + // Also check that add is not used in the vector operand. This would also + // create a cycle. + if (User->isPredecessorOf(Vector.getNode())) + continue; // If the increment is a constant, it must match the memory ref size. SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); @@ -7324,7 +7329,7 @@ static SDValue performPostLD1Combine(SDNode *N, SmallVector Ops; Ops.push_back(LD->getOperand(0)); // Chain if (IsLaneOp) { - Ops.push_back(N->getOperand(0)); // The vector to be inserted + Ops.push_back(Vector); // The vector to be inserted Ops.push_back(N->getOperand(2)); // The lane to be inserted in the vector } Ops.push_back(Addr); diff --git a/test/CodeGen/ARM64/indexed-vector-ldst-2.ll b/test/CodeGen/ARM64/indexed-vector-ldst-2.ll new file mode 100644 index 000000000000..654f96acc646 --- /dev/null +++ b/test/CodeGen/ARM64/indexed-vector-ldst-2.ll @@ -0,0 +1,40 @@ +; RUN: llc %s + +; This used to assert with "Overran sorted position" in AssignTopologicalOrder +; due to a cycle created in performPostLD1Combine. + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-ios7.0.0" + +; Function Attrs: nounwind ssp +define void @f(double* %P1) #0 { +entry: + %arrayidx4 = getelementptr inbounds double* %P1, i64 1 + %0 = load double* %arrayidx4, align 8, !tbaa !1 + %1 = load double* %P1, align 8, !tbaa !1 + %2 = insertelement <2 x double> undef, double %0, i32 0 + %3 = insertelement <2 x double> %2, double %1, i32 1 + %4 = fsub <2 x double> zeroinitializer, %3 + %5 = fmul <2 x double> undef, %4 + %6 = extractelement <2 x double> %5, i32 0 + %cmp168 = fcmp olt double %6, undef + br i1 %cmp168, label %if.then172, label %return + +if.then172: ; preds = %cond.end90 + %7 = tail call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 false) + br label %return + +return: ; preds = %if.then172, %cond.end90, %entry + ret void +} + +; Function Attrs: nounwind readnone +declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) #1 + +attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } + +!1 = metadata !{metadata !2, metadata !2, i64 0} +!2 = metadata !{metadata !"double", metadata !3, i64 0} +!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0} +!4 = metadata !{metadata !"Simple C/C++ TBAA"} From ed3abe4994fa5cbbae19257232bef87f83354b2d Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 20 May 2014 22:33:09 +0000 Subject: [PATCH 011/906] Revert "DebugInfo: Assume all subprogram DIEs have been created before any abstract subprograms are constructed." This reverts commit r209178. This seems to be asserting in an LTO build on some internal Apple buildbots. No upstream reproduction (and I don't have an LLVM-aware gold built right now to reproduce it personally) but it's a small patch & the failure's semi-plausible so I'm going to revert first while I try to reproduce this. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209251 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 641ebb68dc21..cb5824b35728 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -530,11 +530,11 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU, if (!ProcessedSPNodes.insert(Sub)) return; - DIE *ScopeDIE = TheCU.getDIE(Sub); - assert(ScopeDIE); - AbstractSPDies.insert(std::make_pair(Sub, ScopeDIE)); - TheCU.addUInt(*ScopeDIE, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); - createAndAddScopeChildren(TheCU, Scope, *ScopeDIE); + if (DIE *ScopeDIE = TheCU.getDIE(Sub)) { + AbstractSPDies.insert(std::make_pair(Sub, ScopeDIE)); + TheCU.addUInt(*ScopeDIE, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); + createAndAddScopeChildren(TheCU, Scope, *ScopeDIE); + } } DIE &DwarfDebug::constructSubprogramScopeDIE(DwarfCompileUnit &TheCU, From acf621545dace3f78904072f88fb05dd983dab2e Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Tue, 20 May 2014 22:40:31 +0000 Subject: [PATCH 012/906] Fix test added in r209242: llc shouldn't create files in source tree git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209252 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/ARM64/indexed-vector-ldst-2.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CodeGen/ARM64/indexed-vector-ldst-2.ll b/test/CodeGen/ARM64/indexed-vector-ldst-2.ll index 654f96acc646..c118f109289b 100644 --- a/test/CodeGen/ARM64/indexed-vector-ldst-2.ll +++ b/test/CodeGen/ARM64/indexed-vector-ldst-2.ll @@ -1,4 +1,4 @@ -; RUN: llc %s +; RUN: llc < %s ; This used to assert with "Overran sorted position" in AssignTopologicalOrder ; due to a cycle created in performPostLD1Combine. From b38059ab25cdabc9bcf4819cd82ab5955e8d3e3e Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Tue, 20 May 2014 23:04:47 +0000 Subject: [PATCH 013/906] =?UTF-8?q?Update=20MachOObjectFile::getSymbolAddr?= =?UTF-8?q?ess=20so=20it=20returns=20UnknownAddressOrSize=20for=20undefine?= =?UTF-8?q?d=20symbols,=20so=20it=20matches=20what=20COFFObjectFile::getSy?= =?UTF-8?q?mbolAddress=20does.=20=20This=20allows=20llvm-nm=20to=20print?= =?UTF-8?q?=20spaces=20instead=20of=200=E2=80=99s=20for=20the=20value=20of?= =?UTF-8?q?=20undefined=20symbols=20in=20Mach-O=20files.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To make this change other uses of MachOObjectFile::getSymbolAddress are updated to handle when the Value is returned as UnknownAddressOrSize. Which is needed to keep two of the ExecutionEngine tests working for example. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209253 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/MachOObjectFile.cpp | 20 +++++++++++++++++--- test/Object/nm-trivial-object.test | 8 ++++---- test/Object/nm-universal-binary.test | 2 +- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 0951460ccbb2..c6bab03d018e 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -472,10 +472,18 @@ error_code MachOObjectFile::getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const { if (is64Bit()) { MachO::nlist_64 Entry = getSymbol64TableEntry(Symb); - Res = Entry.n_value; + if ((Entry.n_type & MachO::N_TYPE) == MachO::N_UNDF && + Entry.n_value == 0) + Res = UnknownAddressOrSize; + else + Res = Entry.n_value; } else { MachO::nlist Entry = getSymbolTableEntry(Symb); - Res = Entry.n_value; + if ((Entry.n_type & MachO::N_TYPE) == MachO::N_UNDF && + Entry.n_value == 0) + Res = UnknownAddressOrSize; + else + Res = Entry.n_value; } return object_error::success; } @@ -501,6 +509,10 @@ error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI, nlist_base Entry = getSymbolTableEntryBase(this, DRI); uint64_t Value; getSymbolAddress(DRI, Value); + if (Value == UnknownAddressOrSize) { + Result = UnknownAddressOrSize; + return object_error::success; + } BeginOffset = Value; @@ -519,6 +531,8 @@ error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI, DataRefImpl DRI = Symbol.getRawDataRefImpl(); Entry = getSymbolTableEntryBase(this, DRI); getSymbolAddress(DRI, Value); + if (Value == UnknownAddressOrSize) + continue; if (Entry.n_sect == SectionIndex && Value > BeginOffset) if (!EndOffset || Value < EndOffset) EndOffset = Value; @@ -578,7 +592,7 @@ uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const { if ((MachOType & MachO::N_TYPE) == MachO::N_UNDF) { uint64_t Value; getSymbolAddress(DRI, Value); - if (Value) + if (Value && Value != UnknownAddressOrSize) Result |= SymbolRef::SF_Common; } } diff --git a/test/Object/nm-trivial-object.test b/test/Object/nm-trivial-object.test index 111749289807..20ac6621e728 100644 --- a/test/Object/nm-trivial-object.test +++ b/test/Object/nm-trivial-object.test @@ -55,14 +55,14 @@ WEAK-ELF64: 0000000000000000 V x2 ABSOLUTE-ELF64: 0000000000000123 a a1 ABSOLUTE-ELF64: 0000000000000123 A a2 -macho: 00000000 U _SomeOtherFunction +macho: U _SomeOtherFunction macho: 00000000 T _main -macho: 00000000 U _puts +macho: U _puts macho64: 0000000000000028 s L_.str -macho64: 0000000000000000 U _SomeOtherFunction +macho64: U _SomeOtherFunction macho64: 0000000000000000 T _main -macho64: 0000000000000000 U _puts +macho64: U _puts Test that nm uses addresses even with ELF .o files. diff --git a/test/Object/nm-universal-binary.test b/test/Object/nm-universal-binary.test index faf4812e5378..c20c733dcd8b 100644 --- a/test/Object/nm-universal-binary.test +++ b/test/Object/nm-universal-binary.test @@ -13,7 +13,7 @@ CHECK-AR: 0000000000000068 s EH_frame0 CHECK-AR: 000000000000003b s L_.str CHECK-AR: 0000000000000000 T _main CHECK-AR: 0000000000000080 S _main.eh -CHECK-AR: 0000000000000000 U _printf +CHECK-AR: U _printf CHECK-AR: macho-universal-archive.x86_64.i386:i386:foo.o: CHECK-AR: 00000008 S _bar CHECK-AR: 00000000 T _foo From 4f6d26dbe89d51362b665fe8fb0f206434e52471 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Tue, 20 May 2014 23:59:50 +0000 Subject: [PATCH 014/906] Move the verbose asm option to be part of the options struct and set appropriately. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209258 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/MC/MCTargetOptions.h | 4 +++- include/llvm/MC/MCTargetOptionsCommandFlags.h | 4 ++++ include/llvm/Target/TargetMachine.h | 4 ++-- lib/CodeGen/LLVMTargetMachine.cpp | 18 +++--------------- lib/MC/MCTargetOptions.cpp | 2 +- lib/Target/TargetMachine.cpp | 14 +++----------- tools/llc/llc.cpp | 7 ++++--- 7 files changed, 20 insertions(+), 33 deletions(-) diff --git a/include/llvm/MC/MCTargetOptions.h b/include/llvm/MC/MCTargetOptions.h index ad34958ae380..b4f5a979720d 100644 --- a/include/llvm/MC/MCTargetOptions.h +++ b/include/llvm/MC/MCTargetOptions.h @@ -28,6 +28,7 @@ class MCTargetOptions { unsigned MCUseDwarfDirectory : 1; unsigned ShowMCEncoding : 1; unsigned ShowMCInst : 1; + unsigned AsmVerbose : 1; MCTargetOptions(); }; @@ -39,7 +40,8 @@ inline bool operator==(const MCTargetOptions &LHS, const MCTargetOptions &RHS) { ARE_EQUAL(MCSaveTempLabels) && ARE_EQUAL(MCUseDwarfDirectory) && ARE_EQUAL(ShowMCEncoding) && - ARE_EQUAL(ShowMCInst)); + ARE_EQUAL(ShowMCInst) && + ARE_EQUAL(AsmVerbose)); #undef ARE_EQUAL } diff --git a/include/llvm/MC/MCTargetOptionsCommandFlags.h b/include/llvm/MC/MCTargetOptionsCommandFlags.h index 1edf8f75b139..55ac14215724 100644 --- a/include/llvm/MC/MCTargetOptionsCommandFlags.h +++ b/include/llvm/MC/MCTargetOptionsCommandFlags.h @@ -47,6 +47,9 @@ cl::opt ShowMCEncoding("show-mc-encoding", cl::Hidden, cl::opt ShowMCInst("show-mc-inst", cl::Hidden, cl::desc("Show instruction structure in .s output")); +cl::opt AsmVerbose("asm-verbose", cl::desc("Add comments to directives."), + cl::init(false)); + static inline MCTargetOptions InitMCTargetOptionsFromFlags() { MCTargetOptions Options; Options.SanitizeAddress = @@ -57,6 +60,7 @@ static inline MCTargetOptions InitMCTargetOptionsFromFlags() { Options.MCSaveTempLabels = SaveTempLabels; Options.ShowMCEncoding = ShowMCEncoding; Options.ShowMCInst = ShowMCInst; + Options.AsmVerbose = AsmVerbose; return Options; } diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h index 17ebd07e5d78..b263c571d9e6 100644 --- a/include/llvm/Target/TargetMachine.h +++ b/include/llvm/Target/TargetMachine.h @@ -187,11 +187,11 @@ class TargetMachine { /// getAsmVerbosityDefault - Returns the default value of asm verbosity. /// - static bool getAsmVerbosityDefault(); + bool getAsmVerbosityDefault() const ; /// setAsmVerbosityDefault - Set the default value of asm verbosity. Default /// is false. - static void setAsmVerbosityDefault(bool); + void setAsmVerbosityDefault(bool); /// getDataSections - Return true if data objects should be emitted into their /// own section, corresponds to -fdata-sections. diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index c8211b76be4e..a5ac0578ab88 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -43,19 +43,6 @@ static cl::opt EnableFastISelOption("fast-isel", cl::Hidden, cl::desc("Enable the \"fast\" instruction selector")); -static cl::opt -AsmVerbose("asm-verbose", cl::desc("Add comments to directives."), - cl::init(cl::BOU_UNSET)); - -static bool getVerboseAsm() { - switch (AsmVerbose) { - case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault(); - case cl::BOU_TRUE: return true; - case cl::BOU_FALSE: return false; - } - llvm_unreachable("Invalid verbose asm state"); -} - void LLVMTargetMachine::initAsmInfo() { MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(*getRegisterInfo(), TargetTriple); @@ -188,8 +175,9 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), TargetCPU); MCStreamer *S = getTarget().createAsmStreamer( - *Context, Out, getVerboseAsm(), Options.MCOptions.MCUseDwarfDirectory, - InstPrinter, MCE, MAB, Options.MCOptions.ShowMCInst); + *Context, Out, Options.MCOptions.AsmVerbose, + Options.MCOptions.MCUseDwarfDirectory, InstPrinter, MCE, MAB, + Options.MCOptions.ShowMCInst); AsmStreamer.reset(S); break; } diff --git a/lib/MC/MCTargetOptions.cpp b/lib/MC/MCTargetOptions.cpp index e1b6a5889d89..8e946d57f7fb 100644 --- a/lib/MC/MCTargetOptions.cpp +++ b/lib/MC/MCTargetOptions.cpp @@ -14,6 +14,6 @@ namespace llvm { MCTargetOptions::MCTargetOptions() : SanitizeAddress(false), MCRelaxAll(false), MCNoExecStack(false), MCSaveTempLabels(false), MCUseDwarfDirectory(false), - ShowMCEncoding(false), ShowMCInst(false) {} + ShowMCEncoding(false), ShowMCInst(false), AsmVerbose(false) {} } // end namespace llvm diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 4ccf5194947b..dbd433d5240c 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -28,14 +28,6 @@ #include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; -//--------------------------------------------------------------------------- -// Command-line options that tend to be useful on more than one back-end. -// - -namespace llvm { - bool AsmVerbosityDefault(false); -} - //--------------------------------------------------------------------------- // TargetMachine Class // @@ -162,12 +154,12 @@ void TargetMachine::setOptLevel(CodeGenOpt::Level Level) const { CodeGenInfo->setOptLevel(Level); } -bool TargetMachine::getAsmVerbosityDefault() { - return AsmVerbosityDefault; +bool TargetMachine::getAsmVerbosityDefault() const { + return Options.MCOptions.AsmVerbose; } void TargetMachine::setAsmVerbosityDefault(bool V) { - AsmVerbosityDefault = V; + Options.MCOptions.AsmVerbose = V; } bool TargetMachine::getFunctionSections() const { diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp index 269a5df90414..abdc1ab634cf 100644 --- a/tools/llc/llc.cpp +++ b/tools/llc/llc.cpp @@ -273,6 +273,10 @@ static int compileModule(char **argv, LLVMContext &Context) { TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); Options.DisableIntegratedAS = NoIntegratedAssembler; + // Override default to generate verbose assembly unless we've seen the flag. + if (AsmVerbose.getNumOccurrences() == 0) + Options.MCOptions.AsmVerbose = true; + std::unique_ptr target( TheTarget->createTargetMachine(TheTriple.getTriple(), MCPU, FeaturesStr, Options, RelocModel, CMModel, OLvl)); @@ -309,9 +313,6 @@ static int compileModule(char **argv, LLVMContext &Context) { mod->setDataLayout(DL); PM.add(new DataLayoutPass(mod)); - // Override default to generate verbose assembly. - Target.setAsmVerbosityDefault(true); - if (RelaxAll.getNumOccurrences() > 0 && FileType != TargetMachine::CGFT_ObjectFile) errs() << argv[0] From 3d5f46bb8ad0e6767520c9b60a300977955810ce Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Tue, 20 May 2014 23:59:54 +0000 Subject: [PATCH 015/906] Move this test to the backend from the frontend. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209259 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/DebugInfo/X86/debug-dead-local-var.ll | 50 ++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 test/DebugInfo/X86/debug-dead-local-var.ll diff --git a/test/DebugInfo/X86/debug-dead-local-var.ll b/test/DebugInfo/X86/debug-dead-local-var.ll new file mode 100644 index 000000000000..798b6df6a81e --- /dev/null +++ b/test/DebugInfo/X86/debug-dead-local-var.ll @@ -0,0 +1,50 @@ +; RUN: llc -mtriple=x86_64-linux-gnu %s -filetype=obj -o %t +; RUN: llvm-dwarfdump %t | FileCheck %s + +; static void foo() { +; struct X { int a; int b; } xyz; +; } + +; int bar() { +; foo(); +; return 1; +; } + +; Check that we still have the structure type for X even though we're not +; going to emit a low/high_pc for foo. +; CHECK: DW_TAG_structure_type + +; Function Attrs: nounwind readnone uwtable +define i32 @bar() #0 { +entry: + ret i32 1, !dbg !21 +} + +attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!18, !19} +!llvm.ident = !{!20} + +!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 (trunk 209255) (llvm/trunk 209253)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/debug-dead-local-var.c] [DW_LANG_C99] +!1 = metadata !{metadata !"debug-dead-local-var.c", metadata !"/usr/local/google/home/echristo"} +!2 = metadata !{} +!3 = metadata !{metadata !4, metadata !9} +!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"bar", metadata !"bar", metadata !"", i32 11, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @bar, null, null, metadata !2, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [bar] +!5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/debug-dead-local-var.c] +!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = metadata !{metadata !8} +!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!9 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 6, metadata !10, i1 true, i1 true, i32 0, i32 0, null, i32 0, i1 true, null, null, null, metadata !12, i32 6} ; [ DW_TAG_subprogram ] [line 6] [local] [def] [foo] +!10 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!11 = metadata !{null} +!12 = metadata !{metadata !13} +!13 = metadata !{i32 786688, metadata !9, metadata !"xyz", metadata !5, i32 8, metadata !14, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xyz] [line 8] +!14 = metadata !{i32 786451, metadata !1, metadata !9, metadata !"X", i32 8, i64 64, i64 32, i32 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [X] [line 8, size 64, align 32, offset 0] [def] [from ] +!15 = metadata !{metadata !16, metadata !17} +!16 = metadata !{i32 786445, metadata !1, metadata !14, metadata !"a", i32 8, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_member ] [a] [line 8, size 32, align 32, offset 0] [from int] +!17 = metadata !{i32 786445, metadata !1, metadata !14, metadata !"b", i32 8, i64 32, i64 32, i64 32, i32 0, metadata !8} ; [ DW_TAG_member ] [b] [line 8, size 32, align 32, offset 32] [from int] +!18 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} +!19 = metadata !{i32 2, metadata !"Debug Info Version", i32 1} +!20 = metadata !{metadata !"clang version 3.5.0 (trunk 209255) (llvm/trunk 209253)"} +!21 = metadata !{i32 13, i32 0, metadata !4, null} From 110260377ad3156467a107f6f7b2544ed3b2d173 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 21 May 2014 00:02:50 +0000 Subject: [PATCH 016/906] Add a comment here. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209262 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/DebugInfo/X86/debug-dead-local-var.ll | 1 + 1 file changed, 1 insertion(+) diff --git a/test/DebugInfo/X86/debug-dead-local-var.ll b/test/DebugInfo/X86/debug-dead-local-var.ll index 798b6df6a81e..64f0b2a9e40f 100644 --- a/test/DebugInfo/X86/debug-dead-local-var.ll +++ b/test/DebugInfo/X86/debug-dead-local-var.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=x86_64-linux-gnu %s -filetype=obj -o %t ; RUN: llvm-dwarfdump %t | FileCheck %s +; Reconstruct this via clang and -O2. ; static void foo() { ; struct X { int a; int b; } xyz; ; } From e648d75c607378d55765103c2ac554a2964b6280 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 21 May 2014 00:20:01 +0000 Subject: [PATCH 017/906] This command line option is only used in one place. Move it there and rename it to something more descriptive. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209263 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/MC/MCTargetOptionsCommandFlags.h | 3 --- test/MC/MachO/temp-labels.s | 2 +- tools/llvm-mc/llvm-mc.cpp | 3 +++ 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/llvm/MC/MCTargetOptionsCommandFlags.h b/include/llvm/MC/MCTargetOptionsCommandFlags.h index 55ac14215724..24e683f61723 100644 --- a/include/llvm/MC/MCTargetOptionsCommandFlags.h +++ b/include/llvm/MC/MCTargetOptionsCommandFlags.h @@ -40,8 +40,6 @@ cl::opt EnableDwarfDirectory( cl::opt NoExecStack("mc-no-exec-stack", cl::desc("File doesn't need an exec stack")); -cl::opt SaveTempLabels("L", cl::desc("Don't discard temporary labels")); - cl::opt ShowMCEncoding("show-mc-encoding", cl::Hidden, cl::desc("Show encoding in .s output")); cl::opt ShowMCInst("show-mc-inst", cl::Hidden, @@ -57,7 +55,6 @@ static inline MCTargetOptions InitMCTargetOptionsFromFlags() { Options.MCRelaxAll = RelaxAll; Options.MCUseDwarfDirectory = EnableDwarfDirectory; Options.MCNoExecStack = NoExecStack; - Options.MCSaveTempLabels = SaveTempLabels; Options.ShowMCEncoding = ShowMCEncoding; Options.ShowMCInst = ShowMCInst; Options.AsmVerbose = AsmVerbose; diff --git a/test/MC/MachO/temp-labels.s b/test/MC/MachO/temp-labels.s index b7382b7d2c82..ac0f6203aef1 100644 --- a/test/MC/MachO/temp-labels.s +++ b/test/MC/MachO/temp-labels.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -L -o - | macho-dump --dump-section-data | FileCheck %s +// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -save-temp-labels -o - | macho-dump --dump-section-data | FileCheck %s // CHECK: # Load Command 1 // CHECK: (('command', 2) diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp index e87f1eef2829..02dcfcd2dd0b 100644 --- a/tools/llvm-mc/llvm-mc.cpp +++ b/tools/llvm-mc/llvm-mc.cpp @@ -156,6 +156,9 @@ static cl::opt MainFileName("main-file-name", cl::desc("Specifies the name we should consider the input file")); +static cl::opt SaveTempLabels("save-temp-labels", + cl::desc("Don't discard temporary labels")); + enum ActionType { AC_AsLex, AC_Assemble, From 91bbc253bd8748562e082da37232c8d283f6f3b5 Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Wed, 21 May 2014 01:25:24 +0000 Subject: [PATCH 018/906] ARM: correct bundle generation for MOV32T relocations Although the previous code would construct a bundle and add the correct elements to it, it would not finalise the bundle. This resulted in the InternalRead markers not being added to the MachineOperands nor, more importantly, the externally visible defs to the bundle itself. So, although the bundle was not exposing the def, the generated code would be correct because there was no optimisations being performed. When optimisations were enabled, the post register allocator would kick in, and the hazard recognizer would reorder operations around the load which would define the value being operated upon. Rather than manually constructing the bundle, simply construct and finalise the bundle via the finaliseBundle call after both MIs have been emitted. This improves the code generation with optimisations where IMAGE_REL_ARM_MOV32T relocations are emitted. The changes to the other tests are the result of the bundle generation preventing the scheduler from hoisting the moves across the loads. The net effect of the generated code is equivalent, but, is much more identical to what is actually being lowered. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209267 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMExpandPseudoInsts.cpp | 10 +++----- test/CodeGen/ARM/Windows/chkstk.ll | 2 +- test/CodeGen/ARM/Windows/memset.ll | 4 +-- test/CodeGen/ARM/Windows/mov32t-bundling.ll | 28 +++++++++++++++++++++ 4 files changed, 34 insertions(+), 10 deletions(-) create mode 100644 test/CodeGen/ARM/Windows/mov32t-bundling.ll diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 93357fe525ad..6045738e2e34 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -22,6 +22,7 @@ #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/CommandLine.h" @@ -697,9 +698,6 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, HI16Opc = ARM::MOVTi16; } - if (RequiresBundling) - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(TargetOpcode::BUNDLE)); - LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg); HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc)) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) @@ -735,10 +733,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, LO16.addImm(Pred).addReg(PredReg); HI16.addImm(Pred).addReg(PredReg); - if (RequiresBundling) { - LO16->bundleWithPred(); - HI16->bundleWithPred(); - } + if (RequiresBundling) + finalizeBundle(MBB, &*LO16, &*MBBI); TransferImpOps(MI, LO16, HI16); MI.eraseFromParent(); diff --git a/test/CodeGen/ARM/Windows/chkstk.ll b/test/CodeGen/ARM/Windows/chkstk.ll index 9c58fa08d378..cb787e14b5ba 100644 --- a/test/CodeGen/ARM/Windows/chkstk.ll +++ b/test/CodeGen/ARM/Windows/chkstk.ll @@ -16,9 +16,9 @@ entry: ; CHECK-DEFAULT-CODE-MODEL: sub.w sp, sp, r4 ; CHECK-LARGE-CODE-MODEL: check_watermark: -; CHECK-LARGE-CODE-MODEL: movw r4, #1024 ; CHECK-LARGE-CODE-MODEL: movw r12, :lower16:__chkstk ; CHECK-LARGE-CODE-MODEL: movt r12, :upper16:__chkstk +; CHECK-LARGE-CODE-MODEL: movw r4, #1024 ; CHECK-LARGE-CODE-MODEL: blx r12 ; CHECK-LARGE-CODE-MODEL: sub.w sp, sp, r4 diff --git a/test/CodeGen/ARM/Windows/memset.ll b/test/CodeGen/ARM/Windows/memset.ll index bcf744c909df..500e25e259c6 100644 --- a/test/CodeGen/ARM/Windows/memset.ll +++ b/test/CodeGen/ARM/Windows/memset.ll @@ -10,9 +10,9 @@ entry: unreachable } -; CHECK: movs r1, #0 -; CHECK: mov.w r2, #512 ; CHECK: movw r0, :lower16:source ; CHECK: movt r0, :upper16:source +; CHECK: movs r1, #0 +; CHECK: mov.w r2, #512 ; CHECK: memset diff --git a/test/CodeGen/ARM/Windows/mov32t-bundling.ll b/test/CodeGen/ARM/Windows/mov32t-bundling.ll new file mode 100644 index 000000000000..5f838378fa87 --- /dev/null +++ b/test/CodeGen/ARM/Windows/mov32t-bundling.ll @@ -0,0 +1,28 @@ +; RUN: llc -mtriple thumbv7-windows-itanium -filetype asm -o - %s | FileCheck %s + +@_begin = external global i8 +@_end = external global i8 + +declare arm_aapcs_vfpcc void @force_emission() + +define arm_aapcs_vfpcc void @bundle() { +entry: + br i1 icmp uge (i32 sub (i32 ptrtoint (i8* @_end to i32), i32 ptrtoint (i8* @_begin to i32)), i32 4), label %if.then, label %if.end + +if.then: + tail call arm_aapcs_vfpcc void @force_emission() + br label %if.end + +if.end: + ret void +} + +; CHECK-LABEL: bundle +; CHECK-NOT: subs r0, r1, r0 +; CHECK: movw r0, :lower16:_begin +; CHECK-NEXT: movt r0, :upper16:_begin +; CHECK-NEXT: movw r1, :lower16:_end +; CHECK-NEXT: movt r1, :upper16:_end +; CHECK-NEXT: subs r0, r1, r0 +; CHECK-NEXT: cmp r0, #4 + From 651f680b79947b345a5cd6ddb177483365e3b551 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Wed, 21 May 2014 02:46:14 +0000 Subject: [PATCH 019/906] [modules] Add module maps for LLVM. These are not quite ready for prime-time yet, but only a few more Clang patches need to land. (I have 'ninja check' passing locally.) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209269 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm-c/module.modulemap | 5 + include/llvm/CMakeLists.txt | 6 + include/llvm/module.modulemap | 177 ++++++++++++++++++++++++++++ include/llvm/module.modulemap.build | 5 + lib/AsmParser/module.modulemap | 1 + lib/Bitcode/module.modulemap | 1 + lib/CodeGen/module.modulemap | 1 + lib/DebugInfo/module.modulemap | 1 + lib/IR/module.modulemap | 1 + lib/TableGen/module.modulemap | 1 + utils/TableGen/module.modulemap | 4 + 11 files changed, 203 insertions(+) create mode 100644 include/llvm-c/module.modulemap create mode 100644 include/llvm/module.modulemap create mode 100644 include/llvm/module.modulemap.build create mode 100644 lib/AsmParser/module.modulemap create mode 100644 lib/Bitcode/module.modulemap create mode 100644 lib/CodeGen/module.modulemap create mode 100644 lib/DebugInfo/module.modulemap create mode 100644 lib/IR/module.modulemap create mode 100644 lib/TableGen/module.modulemap create mode 100644 utils/TableGen/module.modulemap diff --git a/include/llvm-c/module.modulemap b/include/llvm-c/module.modulemap new file mode 100644 index 000000000000..2bcdbc17a5c3 --- /dev/null +++ b/include/llvm-c/module.modulemap @@ -0,0 +1,5 @@ +module LLVM_C { + requires cplusplus + umbrella "." + module * { export * } +} diff --git a/include/llvm/CMakeLists.txt b/include/llvm/CMakeLists.txt index 0f5c63ded67a..ca4fd1338ed7 100644 --- a/include/llvm/CMakeLists.txt +++ b/include/llvm/CMakeLists.txt @@ -12,3 +12,9 @@ if( MSVC_IDE OR XCODE ) set_target_properties(llvm_headers_do_not_build PROPERTIES FOLDER "Misc" EXCLUDE_FROM_DEFAULT_BUILD ON) endif() + +# If we're doing an out-of-tree build, copy a module map for generated +# header files into the build area. +if (NOT "${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}") + configure_file(module.modulemap.build module.modulemap COPYONLY) +endif (NOT "${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}") diff --git a/include/llvm/module.modulemap b/include/llvm/module.modulemap new file mode 100644 index 000000000000..1790a7242e5b --- /dev/null +++ b/include/llvm/module.modulemap @@ -0,0 +1,177 @@ +module LLVM_Analysis { + requires cplusplus + umbrella "Analysis" + module * { export * } + exclude header "Analysis/BlockFrequencyInfoImpl.h" +} + +module LLVM_AsmParser { requires cplusplus umbrella "AsmParser" module * { export * } } + +// A module covering CodeGen/ and Target/. These are intertwined +// and codependent, and thus notionally form a single module. +module LLVM_Backend { + requires cplusplus + + module CodeGen { + umbrella "CodeGen" + module * { export * } + + // FIXME: Why is this excluded? + exclude header "CodeGen/MachineValueType.h" + + // Exclude these; they're intended to be included into only a single + // translation unit (or none) and aren't part of this module. + exclude header "CodeGen/CommandFlags.h" + exclude header "CodeGen/LinkAllAsmWriterComponents.h" + exclude header "CodeGen/LinkAllCodegenComponents.h" + } + + module Target { + umbrella "Target" + module * { export * } + } + + // FIXME: Where should this go? + module Analysis_BlockFrequencyInfoImpl { + header "Analysis/BlockFrequencyInfoImpl.h" + export * + } +} + +module LLVM_Bitcode { requires cplusplus umbrella "Bitcode" module * { export * } } +module LLVM_Config { requires cplusplus umbrella "Config" module * { export * } } +module LLVM_DebugInfo { requires cplusplus umbrella "DebugInfo" module * { export * } } +module LLVM_ExecutionEngine { + requires cplusplus + + umbrella "ExecutionEngine" + module * { export * } + + // Exclude this; it's an optional component of the ExecutionEngine. + exclude header "ExecutionEngine/OProfileWrapper.h" + + // Exclude these; they're intended to be included into only a single + // translation unit (or none) and aren't part of this module. + exclude header "ExecutionEngine/JIT.h" + exclude header "ExecutionEngine/MCJIT.h" + exclude header "ExecutionEngine/Interpreter.h" +} + +module LLVM_IR { + requires cplusplus + + // FIXME: Is this the right place for these? + module Pass { header "Pass.h" export * } + module PassSupport { header "PassSupport.h" export * } + module PassAnalysisSupport { header "PassAnalysisSupport.h" export * } + module PassRegistry { header "PassRegistry.h" export * } + module InitializePasses { header "InitializePasses.h" export * } + + umbrella "IR" + module * { export * } + + // We cannot have llvm/PassManager.h and llvm/IR/PassManager.h in the same TU, + // so we can't include llvm/IR/PassManager.h in the IR module. + exclude header "IR/PassManager.h" + exclude header "IR/LegacyPassManager.h" + + // Exclude this; it's intended for (repeated) textual inclusion. + exclude header "IR/Instruction.def" +} + +module LLVM_LegacyPassManager { + requires cplusplus + module CompatInterface { header "PassManager.h" export * } + module Implementation { header "IR/LegacyPassManager.h" export * } +} + +module LLVM_IR_PassManager { + requires cplusplus + // FIXME PR19358: This doesn't work! conflict LLVM_LegacyPassManager, "cannot use legacy pass manager and new pass manager in same file" + header "IR/PassManager.h" + export * +} + +module LLVM_IRReader { requires cplusplus umbrella "IRReader" module * { export * } } +module LLVM_LineEditor { requires cplusplus umbrella "LineEditor" module * { export * } } +module LLVM_LTO { requires cplusplus umbrella "LTO" module * { export * } } + +module LLVM_MC { + requires cplusplus + + // FIXME: Mislayered? + module Support_TargetRegistry { + header "Support/TargetRegistry.h" + export * + } + + umbrella "MC" + module * { export * } + + // Exclude this; it's fundamentally non-modular. + exclude header "MC/MCTargetOptionsCommandFlags.h" +} + +module LLVM_Object { requires cplusplus umbrella "Object" module * { export * } } +module LLVM_Option { requires cplusplus umbrella "Option" module * { export * } } +module LLVM_TableGen { requires cplusplus umbrella "TableGen" module * { export * } } + +module LLVM_Transforms { + requires cplusplus + umbrella "Transforms" + module * { export * } + + // FIXME: Excluded because it does bad things with the legacy pass manager. + exclude header "Transforms/IPO/PassManagerBuilder.h" +} + +// A module covering ADT/ and Support/. These are intertwined and +// codependent, and notionally form a single module. +module LLVM_Utils { + module ADT { + requires cplusplus + + umbrella "ADT" + module * { export * } + } + + module Support { + requires cplusplus + + umbrella "Support" + module * { export * } + + // Exclude this; it's only included on Solaris. + exclude header "Support/Solaris.h" + + // Exclude this; it's only included on AIX and fundamentally non-modular. + exclude header "Support/AIXDataTypesFix.h" + + // Exclude this; it's fundamentally non-modular. + exclude header "Support/Debug.h" + + // Exclude this; it's fundamentally non-modular. + exclude header "Support/PluginLoader.h" + + // Exclude this; it's a weirdly-factored part of llvm-gcov and conflicts + // with the Analysis module (which also defines an llvm::GCOVOptions). + exclude header "Support/GCOV.h" + + // FIXME: Mislayered? + exclude header "Support/TargetRegistry.h" + } +} + +module LLVM_CodeGen_MachineValueType { + requires cplusplus + header "CodeGen/MachineValueType.h" + export * +} + +// This is used for a $src == $build compilation. Otherwise we use +// LLVM_Support_DataTypes_Build, defined in a module map that is +// copied into the build area. +module LLVM_Support_DataTypes_Src { + header "llvm/Support/DataTypes.h" + export * +} diff --git a/include/llvm/module.modulemap.build b/include/llvm/module.modulemap.build new file mode 100644 index 000000000000..7150fe93935f --- /dev/null +++ b/include/llvm/module.modulemap.build @@ -0,0 +1,5 @@ +// This is copied into the build area for a $src != $build compilation. +module LLVM_Support_DataTypes { + header "Support/DataTypes.h" + export * +} diff --git a/lib/AsmParser/module.modulemap b/lib/AsmParser/module.modulemap new file mode 100644 index 000000000000..cc300060b3f5 --- /dev/null +++ b/lib/AsmParser/module.modulemap @@ -0,0 +1 @@ +module AsmParser { requires cplusplus umbrella "." module * { export * } } diff --git a/lib/Bitcode/module.modulemap b/lib/Bitcode/module.modulemap new file mode 100644 index 000000000000..7df1a0a3c721 --- /dev/null +++ b/lib/Bitcode/module.modulemap @@ -0,0 +1 @@ +module Bitcode { requires cplusplus umbrella "." module * { export * } } diff --git a/lib/CodeGen/module.modulemap b/lib/CodeGen/module.modulemap new file mode 100644 index 000000000000..d4f68bcc6eed --- /dev/null +++ b/lib/CodeGen/module.modulemap @@ -0,0 +1 @@ +module CodeGen { requires cplusplus umbrella "." module * { export * } } diff --git a/lib/DebugInfo/module.modulemap b/lib/DebugInfo/module.modulemap new file mode 100644 index 000000000000..1fe5ab130fd7 --- /dev/null +++ b/lib/DebugInfo/module.modulemap @@ -0,0 +1 @@ +module DebugInfo { requires cplusplus umbrella "." module * { export * } } diff --git a/lib/IR/module.modulemap b/lib/IR/module.modulemap new file mode 100644 index 000000000000..9698e9178349 --- /dev/null +++ b/lib/IR/module.modulemap @@ -0,0 +1 @@ +module IR { requires cplusplus umbrella "." module * { export * } } diff --git a/lib/TableGen/module.modulemap b/lib/TableGen/module.modulemap new file mode 100644 index 000000000000..8dac0a22c142 --- /dev/null +++ b/lib/TableGen/module.modulemap @@ -0,0 +1 @@ +module TableGen { requires cplusplus umbrella "." module * { export * } } diff --git a/utils/TableGen/module.modulemap b/utils/TableGen/module.modulemap new file mode 100644 index 000000000000..8871bbfd4a2f --- /dev/null +++ b/utils/TableGen/module.modulemap @@ -0,0 +1,4 @@ +module TableGen { + umbrella "." + module * { export * } +} From 8235ed1808c65ca8ebcc811c166387c24ff63083 Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Wed, 21 May 2014 05:15:01 +0000 Subject: [PATCH 020/906] MC: mark COFF .drectve section as REMOVE The .drectve section should be marked as IMAGE_SCN_LNK_REMOVE. This matches what the MSVC toolchain does and accurately reflects that this section should not be emitted into the final binary. This section is merely information for the linker, comprising of additional linker directives. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209273 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCObjectFileInfo.cpp | 2 +- .../COFF/directive-section-characteristics.ll | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 test/MC/COFF/directive-section-characteristics.ll diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index e4bcfa257f69..e5377890da5b 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -739,7 +739,7 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { DrectveSection = Ctx->getCOFFSection(".drectve", - COFF::IMAGE_SCN_LNK_INFO, + COFF::IMAGE_SCN_LNK_INFO | COFF::IMAGE_SCN_LNK_REMOVE, SectionKind::getMetadata()); PDataSection = diff --git a/test/MC/COFF/directive-section-characteristics.ll b/test/MC/COFF/directive-section-characteristics.ll new file mode 100644 index 000000000000..ca8102af641d --- /dev/null +++ b/test/MC/COFF/directive-section-characteristics.ll @@ -0,0 +1,17 @@ +; RUN: llc -mtriple i686-windows -filetype obj -o - %s | llvm-readobj -sections \ +; RUN: | FileCheck %s + +define dllexport void @function() { +entry: + ret void +} + +; CHECK: Section { +; CHECK: Name: .drectve +; CHECK: Characteristics [ +; CHECK: IMAGE_SCN_ALIGN_1BYTES +; CHECK: IMAGE_SCN_LNK_INFO +; CHECK: IMAGE_SCN_LNK_REMOVE +; CHECK: ] +; CHECK: } + From c0e669384dce6767a75a4aaa0d4bfb8e806cedbb Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Wed, 21 May 2014 08:14:24 +0000 Subject: [PATCH 021/906] [asan] Fix x86-32 asm instrumentation to preserve flags. Patch by Yuri Gorshenin. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209280 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp index 85c7bf009726..f3e6b3f4193b 100644 --- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp +++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp @@ -161,8 +161,7 @@ void X86AddressSanitizer32::InstrumentMemOperandImpl( MCSymbolRefExpr::Create(FuncSym, MCSymbolRefExpr::VK_PLT, Ctx); EmitInstruction(Out, MCInstBuilder(X86::CALLpcrel32).addExpr(FuncExpr)); } - EmitInstruction(Out, MCInstBuilder(X86::ADD32ri).addReg(X86::ESP) - .addReg(X86::ESP).addImm(4)); + EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::EAX)); EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::EAX)); } From 3046dcbce4645fddd379030295fccb09e9b87b1d Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Wed, 21 May 2014 10:11:24 +0000 Subject: [PATCH 022/906] [asm matcher] Fix incorrect assertion when there are exactly 32 SubtargetFeatures Summary: The minimal type needs to hold a value of '1ULL << 31' but getMinimalTypeForRange() is called with a value of '1ULL << 32'. This patch will also reduce the size of the matcher table when there are 8 or 16 SubtargetFeatures. Also added a dump of the SubtargetFeatures to the -debug output and corrected getMinimalTypeInRange() to consider 0xffffffffull to be a 32-bit value. The testcase is that no existing code is broken and that LLVM still successfully compiles after adding MIPS64r6 CodeGen support. Reviewers: rafael Reviewed By: rafael Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D3787 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209288 91177308-0d34-0410-b5e6-96231b3b80d8 --- utils/TableGen/AsmMatcherEmitter.cpp | 40 +++++++++++++++++++--------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp index bb32cf427ee8..3d72741c77c4 100644 --- a/utils/TableGen/AsmMatcherEmitter.cpp +++ b/utils/TableGen/AsmMatcherEmitter.cpp @@ -573,6 +573,11 @@ struct SubtargetFeatureInfo { std::string getEnumName() const { return "Feature_" + TheDef->getName(); } + + void dump() { + errs() << getEnumName() << " " << Index << "\n"; + TheDef->dump(); + } }; struct OperandMatchEntry { @@ -1324,6 +1329,7 @@ void AsmMatcherInfo::buildInfo() { unsigned FeatureNo = SubtargetFeatures.size(); SubtargetFeatures[Pred] = new SubtargetFeatureInfo(Pred, FeatureNo); + DEBUG(SubtargetFeatures[Pred]->dump()); assert(FeatureNo < 32 && "Too many subtarget features!"); } @@ -2199,18 +2205,35 @@ static void emitMatchRegisterName(CodeGenTarget &Target, Record *AsmParser, OS << "}\n\n"; } +static const char *getMinimalTypeForRange(uint64_t Range) { + assert(Range <= 0xFFFFFFFFULL && "Enum too large"); + if (Range > 0xFFFF) + return "uint32_t"; + if (Range > 0xFF) + return "uint16_t"; + return "uint8_t"; +} + +static const char *getMinimalRequiredFeaturesType(const AsmMatcherInfo &Info) { + uint64_t MaxIndex = Info.SubtargetFeatures.size(); + if (MaxIndex > 0) + MaxIndex--; + return getMinimalTypeForRange(1ULL << MaxIndex); +} + /// emitSubtargetFeatureFlagEnumeration - Emit the subtarget feature flag /// definitions. static void emitSubtargetFeatureFlagEnumeration(AsmMatcherInfo &Info, raw_ostream &OS) { OS << "// Flags for subtarget features that participate in " << "instruction matching.\n"; - OS << "enum SubtargetFeatureFlag {\n"; + OS << "enum SubtargetFeatureFlag : " << getMinimalRequiredFeaturesType(Info) + << " {\n"; for (std::map::const_iterator it = Info.SubtargetFeatures.begin(), ie = Info.SubtargetFeatures.end(); it != ie; ++it) { SubtargetFeatureInfo &SFI = *it->second; - OS << " " << SFI.getEnumName() << " = (1 << " << SFI.Index << "),\n"; + OS << " " << SFI.getEnumName() << " = (1U << " << SFI.Index << "),\n"; } OS << " Feature_None = 0\n"; OS << "};\n\n"; @@ -2446,15 +2469,6 @@ static bool emitMnemonicAliases(raw_ostream &OS, const AsmMatcherInfo &Info, return true; } -static const char *getMinimalTypeForRange(uint64_t Range) { - assert(Range < 0xFFFFFFFFULL && "Enum too large"); - if (Range > 0xFFFF) - return "uint32_t"; - if (Range > 0xFF) - return "uint16_t"; - return "uint8_t"; -} - static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target, const AsmMatcherInfo &Info, StringRef ClassName, StringToOffsetTable &StringTable, @@ -2469,7 +2483,7 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target, // Emit the static custom operand parsing table; OS << "namespace {\n"; OS << " struct OperandMatchEntry {\n"; - OS << " " << getMinimalTypeForRange(1ULL << Info.SubtargetFeatures.size()) + OS << " " << getMinimalRequiredFeaturesType(Info) << " RequiredFeatures;\n"; OS << " " << getMinimalTypeForRange(MaxMnemonicIndex) << " Mnemonic;\n"; @@ -2805,7 +2819,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { OS << " uint16_t Opcode;\n"; OS << " " << getMinimalTypeForRange(Info.Matchables.size()) << " ConvertFn;\n"; - OS << " " << getMinimalTypeForRange(1ULL << Info.SubtargetFeatures.size()) + OS << " " << getMinimalRequiredFeaturesType(Info) << " RequiredFeatures;\n"; OS << " " << getMinimalTypeForRange(Info.Classes.size()) << " Classes[" << MaxNumOperands << "];\n"; From 820861536c125a3da403b2bb7e9d9399fdf0fcdd Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Wed, 21 May 2014 12:56:39 +0000 Subject: [PATCH 023/906] [mips][mips64r6] Add bc[12](eq|ne)z Summary: Depends on D3691 Reviewers: jkolek, zoran.jovanovic, vmedic Reviewed By: vmedic Differential Revision: http://reviews.llvm.org/D3760 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209292 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Mips/Disassembler/MipsDisassembler.cpp | 17 ++++++++++ lib/Target/Mips/Mips32r6InstrFormats.td | 29 ++++++++++++++++ lib/Target/Mips/Mips32r6InstrInfo.td | 33 ++++++++++++++++--- test/MC/Mips/mips32r6/valid.s | 8 +++++ test/MC/Mips/mips64r6/valid.s | 8 +++++ 5 files changed, 91 insertions(+), 4 deletions(-) diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index 21abe6c5857d..aeafe92d674a 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -195,6 +195,11 @@ static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeCOP2RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeBranchTarget(MCInst &Inst, unsigned Offset, uint64_t Address, @@ -846,6 +851,18 @@ static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeCOP2RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + unsigned Reg = getReg(Decoder, Mips::COP2RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Reg)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeBranchTarget(MCInst &Inst, unsigned Offset, uint64_t Address, diff --git a/lib/Target/Mips/Mips32r6InstrFormats.td b/lib/Target/Mips/Mips32r6InstrFormats.td index fc656d7beea9..926181b9aa4d 100644 --- a/lib/Target/Mips/Mips32r6InstrFormats.td +++ b/lib/Target/Mips/Mips32r6InstrFormats.td @@ -24,6 +24,7 @@ class MipsR6Inst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>, //===----------------------------------------------------------------------===// def OPGROUP_COP1 { bits<6> Value = 0b010001; } +def OPGROUP_COP2 { bits<6> Value = 0b010010; } def OPGROUP_AUI { bits<6> Value = 0b001111; } def OPGROUP_DAUI { bits<6> Value = 0b011101; } def OPGROUP_PCREL { bits<6> Value = 0b111011; } @@ -45,6 +46,10 @@ def OPCODE5_ALUIPC : OPCODE5<0b11111>; def OPCODE5_AUIPC : OPCODE5<0b11110>; def OPCODE5_DAHI : OPCODE5<0b00110>; def OPCODE5_DATI : OPCODE5<0b11110>; +def OPCODE5_BC1EQZ : OPCODE5<0b01001>; +def OPCODE5_BC1NEZ : OPCODE5<0b01101>; +def OPCODE5_BC2EQZ : OPCODE5<0b01001>; +def OPCODE5_BC2NEZ : OPCODE5<0b01101>; class OPCODE6 Val> { bits<6> Value = Val; @@ -138,6 +143,30 @@ class COP1_3R_FM funct, FIELD_FMT Format> : MipsR6Inst { let Inst{5-0} = funct; } +class COP1_BCCZ_FM : MipsR6Inst { + bits<5> ft; + bits<16> offset; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_COP1.Value; + let Inst{25-21} = Operation.Value; + let Inst{20-16} = ft; + let Inst{15-0} = offset; +} + +class COP2_BCCZ_FM : MipsR6Inst { + bits<5> ct; + bits<16> offset; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_COP2.Value; + let Inst{25-21} = Operation.Value; + let Inst{20-16} = ct; + let Inst{15-0} = offset; +} + class PCREL16_FM : MipsR6Inst { bits<5> rs; bits<16> imm; diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td index 4d2d103f88e8..7599f07c8a8a 100644 --- a/lib/Target/Mips/Mips32r6InstrInfo.td +++ b/lib/Target/Mips/Mips32r6InstrInfo.td @@ -107,6 +107,11 @@ class BEQZC_ENC : CMP_BRANCH_OFF21_FM<0b110110>; class BGEZALC_ENC : CMP_BRANCH_OFF16_FM<0b000110>; class BNEZC_ENC : CMP_BRANCH_OFF21_FM<0b111110>; +class BC1EQZ_ENC : COP1_BCCZ_FM; +class BC1NEZ_ENC : COP1_BCCZ_FM; +class BC2EQZ_ENC : COP2_BCCZ_FM; +class BC2NEZ_ENC : COP2_BCCZ_FM; + class JIALC_ENC : JMP_IDX_COMPACT_FM<0b111110>; class JIC_ENC : JMP_IDX_COMPACT_FM<0b110110>; @@ -326,6 +331,26 @@ class BGTZC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bgtzc", brtarget, GPR32Opnd>; class BEQZC_DESC : CMP_CBR_EQNE_Z_DESC_BASE<"beqzc", brtarget21, GPR32Opnd>; class BNEZC_DESC : CMP_CBR_EQNE_Z_DESC_BASE<"bnezc", brtarget21, GPR32Opnd>; +class COP1_BCCZ_DESC_BASE : BRANCH_DESC_BASE { + dag InOperandList = (ins FGR64Opnd:$ft, brtarget:$offset); + dag OutOperandList = (outs); + string AsmString = instr_asm; + bit hasDelaySlot = 1; +} + +class BC1EQZ_DESC : COP1_BCCZ_DESC_BASE<"bc1eqz $ft, $offset">; +class BC1NEZ_DESC : COP1_BCCZ_DESC_BASE<"bc1nez $ft, $offset">; + +class COP2_BCCZ_DESC_BASE : BRANCH_DESC_BASE { + dag InOperandList = (ins COP2Opnd:$ct, brtarget:$offset); + dag OutOperandList = (outs); + string AsmString = instr_asm; + bit hasDelaySlot = 1; +} + +class BC2EQZ_DESC : COP2_BCCZ_DESC_BASE<"bc2eqz $ct, $offset">; +class BC2NEZ_DESC : COP2_BCCZ_DESC_BASE<"bc2nez $ct, $offset">; + class JMP_IDX_COMPACT_DESC_BASE { dag InOperandList = (ins GPROpnd:$rt, opnd:$offset); @@ -484,10 +509,10 @@ def ALUIPC : ALUIPC_ENC, ALUIPC_DESC, ISA_MIPS32R6; def AUI : AUI_ENC, AUI_DESC, ISA_MIPS32R6; def AUIPC : AUIPC_ENC, AUIPC_DESC, ISA_MIPS32R6; def BALC : BALC_ENC, BALC_DESC, ISA_MIPS32R6; -def BC1EQZ; -def BC1NEZ; -def BC2EQZ; -def BC2NEZ; +def BC1EQZ : BC1EQZ_ENC, BC1EQZ_DESC, ISA_MIPS32R6; +def BC1NEZ : BC1NEZ_ENC, BC1NEZ_DESC, ISA_MIPS32R6; +def BC2EQZ : BC2EQZ_ENC, BC2EQZ_DESC, ISA_MIPS32R6; +def BC2NEZ : BC2NEZ_ENC, BC2NEZ_DESC, ISA_MIPS32R6; def BC : BC_ENC, BC_DESC, ISA_MIPS32R6; def BEQC : BEQC_ENC, BEQC_DESC, ISA_MIPS32R6; def BEQZALC : BEQZALC_ENC, BEQZALC_DESC, ISA_MIPS32R6; diff --git a/test/MC/Mips/mips32r6/valid.s b/test/MC/Mips/mips32r6/valid.s index e276c8eb9838..33965c171ab8 100644 --- a/test/MC/Mips/mips32r6/valid.s +++ b/test/MC/Mips/mips32r6/valid.s @@ -11,6 +11,14 @@ auipc $3, -1 # CHECK: auipc $3, -1 # encoding: [0xec,0x7e,0xff,0xff] balc 14572256 # CHECK: balc 14572256 # encoding: [0xe8,0x37,0x96,0xb8] bc 14572256 # CHECK: bc 14572256 # encoding: [0xc8,0x37,0x96,0xb8] + bc1eqz $f0,4 # CHECK: bc1eqz $f0, 4 # encoding: [0x45,0x20,0x00,0x01] + bc1eqz $f31,4 # CHECK: bc1eqz $f31, 4 # encoding: [0x45,0x3f,0x00,0x01] + bc1nez $f0,4 # CHECK: bc1nez $f0, 4 # encoding: [0x45,0xa0,0x00,0x01] + bc1nez $f31,4 # CHECK: bc1nez $f31, 4 # encoding: [0x45,0xbf,0x00,0x01] + bc2eqz $0,8 # CHECK: bc2eqz $0, 8 # encoding: [0x49,0x20,0x00,0x02] + bc2eqz $31,8 # CHECK: bc2eqz $31, 8 # encoding: [0x49,0x3f,0x00,0x02] + bc2nez $0,8 # CHECK: bc2nez $0, 8 # encoding: [0x49,0xa0,0x00,0x02] + bc2nez $31,8 # CHECK: bc2nez $31, 8 # encoding: [0x49,0xbf,0x00,0x02] beqc $5, $6, 256 # CHECK: beqc $5, $6, 256 # encoding: [0x20,0xa6,0x00,0x40] beqzalc $2, 1332 # CHECK: beqzalc $2, 1332 # encoding: [0x20,0x02,0x01,0x4d] bnec $5, $6, 256 # CHECK: bnec $5, $6, 256 # encoding: [0x60,0xa6,0x00,0x40] diff --git a/test/MC/Mips/mips64r6/valid.s b/test/MC/Mips/mips64r6/valid.s index 4b66eb8db9fa..6d7ffbd771f5 100644 --- a/test/MC/Mips/mips64r6/valid.s +++ b/test/MC/Mips/mips64r6/valid.s @@ -11,6 +11,14 @@ auipc $3, -1 # CHECK: auipc $3, -1 # encoding: [0xec,0x7e,0xff,0xff] balc 14572256 # CHECK: balc 14572256 # encoding: [0xe8,0x37,0x96,0xb8] bc 14572256 # CHECK: bc 14572256 # encoding: [0xc8,0x37,0x96,0xb8] + bc1eqz $f0,4 # CHECK: bc1eqz $f0, 4 # encoding: [0x45,0x20,0x00,0x01] + bc1eqz $f31,4 # CHECK: bc1eqz $f31, 4 # encoding: [0x45,0x3f,0x00,0x01] + bc1nez $f0,4 # CHECK: bc1nez $f0, 4 # encoding: [0x45,0xa0,0x00,0x01] + bc1nez $f31,4 # CHECK: bc1nez $f31, 4 # encoding: [0x45,0xbf,0x00,0x01] + bc2eqz $0,8 # CHECK: bc2eqz $0, 8 # encoding: [0x49,0x20,0x00,0x02] + bc2eqz $31,8 # CHECK: bc2eqz $31, 8 # encoding: [0x49,0x3f,0x00,0x02] + bc2nez $0,8 # CHECK: bc2nez $0, 8 # encoding: [0x49,0xa0,0x00,0x02] + bc2nez $31,8 # CHECK: bc2nez $31, 8 # encoding: [0x49,0xbf,0x00,0x02] beqc $5, $6, 256 # CHECK: beqc $5, $6, 256 # encoding: [0x20,0xa6,0x00,0x40] beqzalc $2, 1332 # CHECK: beqzalc $2, 1332 # encoding: [0x20,0x02,0x01,0x4d] bnec $5, $6, 256 # CHECK: bnec $5, $6, 256 # encoding: [0x60,0xa6,0x00,0x40] From c12c3d075383806be3b7dd2a5166124c6de8de34 Mon Sep 17 00:00:00 2001 From: Dave Estes Date: Wed, 21 May 2014 16:19:51 +0000 Subject: [PATCH 024/906] Test comment commit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209306 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/ARM64/misched-basic-A53.ll | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/CodeGen/ARM64/misched-basic-A53.ll b/test/CodeGen/ARM64/misched-basic-A53.ll index b87a523a30be..d69b097a9b58 100644 --- a/test/CodeGen/ARM64/misched-basic-A53.ll +++ b/test/CodeGen/ARM64/misched-basic-A53.ll @@ -110,9 +110,8 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"= attributes #1 = { nounwind } -; Regression Test for Bug 19761 -; - [ARM64] Cortex-a53 schedule mode can't handle NEON post-increment load -; - http://llvm.org/bugs/show_bug.cgi?id=19761 +; Regression Test for PR19761 +; [ARM64] Cortex-a53 schedule mode can't handle NEON post-increment load ; ; Nothing explicit to check other than llc not crashing. define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) { From 03b003b1cb2f0ab2b6bd05b879c2624522b18672 Mon Sep 17 00:00:00 2001 From: Greg Fitzgerald Date: Wed, 21 May 2014 16:44:03 +0000 Subject: [PATCH 025/906] Use llvm-lit if LLVM source tree is unavailable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209308 91177308-0d34-0410-b5e6-96231b3b80d8 --- cmake/modules/AddLLVM.cmake | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake index df4aef623256..69ffa5b6606a 100644 --- a/cmake/modules/AddLLVM.cmake +++ b/cmake/modules/AddLLVM.cmake @@ -632,11 +632,12 @@ function(add_lit_target target comment) if (NOT CMAKE_CFG_INTDIR STREQUAL ".") list(APPEND LIT_ARGS --param build_mode=${CMAKE_CFG_INTDIR}) endif () - set(LIT_COMMAND - ${PYTHON_EXECUTABLE} - ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py - ${LIT_ARGS} - ) + if (LLVM_MAIN_SRC_DIR) + set (LIT_COMMAND ${PYTHON_EXECUTABLE} ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py) + else() + find_program(LIT_COMMAND llvm-lit) + endif () + list(APPEND LIT_COMMAND ${LIT_ARGS}) foreach(param ${ARG_PARAMS}) list(APPEND LIT_COMMAND --param ${param}) endforeach() From 5dd4b9f43b4457ea481b1b649777badb79c575d5 Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Wed, 21 May 2014 17:53:18 +0000 Subject: [PATCH 026/906] MC: loosen an overzealous assertion Permit active macro expansions when terminating the assembler if there were errors during the expansion. This would only trigger on invalid input when built with assertions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209309 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 3 ++- test/MC/AsmParser/invalid-input-assertion.s | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 test/MC/AsmParser/invalid-input-assertion.s diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 633d10124816..ec2094d6715b 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -528,7 +528,8 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out, } AsmParser::~AsmParser() { - assert(ActiveMacros.empty() && "Unexpected active macro instantiation!"); + assert((HadError || ActiveMacros.empty()) && + "Unexpected active macro instantiation!"); // Destroy any macros. for (StringMap::iterator it = MacroMap.begin(), diff --git a/test/MC/AsmParser/invalid-input-assertion.s b/test/MC/AsmParser/invalid-input-assertion.s new file mode 100644 index 000000000000..68846fe6a81a --- /dev/null +++ b/test/MC/AsmParser/invalid-input-assertion.s @@ -0,0 +1,10 @@ +// RUN: not llvm-mc -triple i686-linux -o /dev/null %s +// REQUIRES: asserts + + .macro macro parameter=0 + .if \parameter + .else + .endm + + macro 1 + From 49323774b4063965141d8614981264c8a430d9da Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 21 May 2014 18:03:59 +0000 Subject: [PATCH 027/906] Use cast<> instead of unchecked dyn_cast git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209310 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 04924cf130a3..008e1311b737 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -591,7 +591,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, unsigned Size = TD->getTypeAllocSize(EltType); unsigned Alignment = TD->getPrefTypeAlignment(EltType); - const GlobalVariable *Var = dyn_cast(GV); + const GlobalVariable *Var = cast(GV); const Constant *Init = Var->getInitializer(); int FI = FrameInfo->CreateStackObject(Size, Alignment, false); SDValue InitPtr = DAG.getFrameIndex(FI, From c10a1edf3f6f86f8df973fe9de9432d5d4da6219 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Wed, 21 May 2014 18:04:33 +0000 Subject: [PATCH 028/906] DebugInfo: Simplify subprogram declaration creation/references and accidentally refix PR11300. Also simplifies the linkage name handling a little too. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209311 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 39 ++++++++++------------ test/DebugInfo/X86/DW_AT_linkage_name.ll | 4 +-- test/DebugInfo/X86/DW_AT_specification.ll | 8 ++--- test/DebugInfo/X86/concrete_out_of_line.ll | 15 ++++----- test/DebugInfo/X86/pr11300.ll | 8 +++-- 5 files changed, 36 insertions(+), 38 deletions(-) diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index f09cb6ade638..8382990a6c38 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1384,40 +1384,37 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) { if (DIE *SPDie = getDIE(SP)) return SPDie; - DISubprogram SPDecl = SP.getFunctionDeclaration(); - if (SPDecl.isSubprogram()) + DIE *DeclDie = nullptr; + StringRef DeclLinkageName; + if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { // Add subprogram definitions to the CU die directly. ContextDIE = &getUnitDie(); + DeclDie = getOrCreateSubprogramDIE(SPDecl); + DeclLinkageName = SPDecl.getLinkageName(); + } // DW_TAG_inlined_subroutine may refer to this DIE. DIE &SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP); - DIE *DeclDie = nullptr; - if (SPDecl.isSubprogram()) - DeclDie = getOrCreateSubprogramDIE(SPDecl); // Add function template parameters. addTemplateParams(SPDie, SP.getTemplateParams()); - if (DeclDie) - // Refer function declaration directly. - addDIEEntry(SPDie, dwarf::DW_AT_specification, *DeclDie); - // Add the linkage name if we have one and it isn't in the Decl. StringRef LinkageName = SP.getLinkageName(); - if (!LinkageName.empty()) { - if (SPDecl.isSubprogram() && !SPDecl.getLinkageName().empty()) - assert(SPDecl.getLinkageName() == SP.getLinkageName() && - "decl has a linkage name and it is different"); - else - addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, - GlobalValue::getRealLinkageName(LinkageName)); - } - - // If this DIE is going to refer declaration info using AT_specification - // then there is no need to add other attributes. - if (DeclDie) + assert(((LinkageName.empty() || DeclLinkageName.empty()) || + LinkageName == DeclLinkageName) && + "decl has a linkage name and it is different"); + if (!LinkageName.empty() && DeclLinkageName.empty()) + addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, + GlobalValue::getRealLinkageName(LinkageName)); + + if (DeclDie) { + // Refer to the function declaration where all the other attributes will be + // found. + addDIEEntry(SPDie, dwarf::DW_AT_specification, *DeclDie); return &SPDie; + } // Constructors and operators for anonymous aggregates do not have names. if (!SP.getName().empty()) diff --git a/test/DebugInfo/X86/DW_AT_linkage_name.ll b/test/DebugInfo/X86/DW_AT_linkage_name.ll index 4aa69f2aa20b..76d3abbe358c 100644 --- a/test/DebugInfo/X86/DW_AT_linkage_name.ll +++ b/test/DebugInfo/X86/DW_AT_linkage_name.ll @@ -18,12 +18,12 @@ ; Test that we do emit a linkage name for a specific instance of it. ; CHECK: DW_TAG_subprogram -; CHECK: [[A:.*]]: DW_TAG_subprogram +; CHECK: [[A_DTOR:.*]]: DW_TAG_subprogram ; CHECK: DW_AT_name {{.*}} "~A" ; CHECK-NOT: DW_AT_MIPS_linkage_name ; CHECK: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_specification {{.*}}[[A]] ; CHECK-NEXT: DW_AT_MIPS_linkage_name {{.*}} "_ZN1AD2Ev" +; CHECK-NEXT: DW_AT_specification {{.*}}[[A_DTOR]] target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" diff --git a/test/DebugInfo/X86/DW_AT_specification.ll b/test/DebugInfo/X86/DW_AT_specification.ll index 64b00800d63b..b93cdf081df7 100644 --- a/test/DebugInfo/X86/DW_AT_specification.ll +++ b/test/DebugInfo/X86/DW_AT_specification.ll @@ -3,10 +3,10 @@ ; test that the DW_AT_specification is a back edge in the file. -; CHECK: DW_TAG_subprogram [{{[0-9]+}}] * -; CHECK: DW_AT_specification [DW_FORM_ref4] (cu + 0x[[OFFSET:[0-9a-f]*]] => {0x0000[[OFFSET]]}) -; CHECK: 0x0000[[OFFSET]]: DW_TAG_subprogram [{{[0-9]+}}] * -; CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x{{[0-9a-f]*}}] = "bar") +; CHECK: [[BAR_DECL:0x[0-9a-f]*]]: DW_TAG_subprogram +; CHECK-NEXT: DW_AT_MIPS_linkage_name {{.*}} "_ZN3foo3barEv" +; CHECK: DW_TAG_subprogram +; CHECK-NEXT: DW_AT_specification {{.*}} {[[BAR_DECL]]} @_ZZN3foo3barEvE1x = constant i32 0, align 4 diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll index ae025ad0de33..baa819de85df 100644 --- a/test/DebugInfo/X86/concrete_out_of_line.ll +++ b/test/DebugInfo/X86/concrete_out_of_line.ll @@ -11,8 +11,12 @@ ; CHECK: DW_TAG_subprogram ; CHECK: [[ASSIGN_DECL:0x........]]: DW_TAG_subprogram +; CHECK: DW_TAG_class_type +; CHECK: [[RELEASE_DECL:0x........]]: DW_TAG_subprogram +; CHECK: [[DTOR_DECL:0x........]]: DW_TAG_subprogram + ; CHECK: [[RELEASE:0x........]]: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_specification {{.*}} {[[RELEASE_DECL:0x........]]} +; CHECK: DW_AT_specification {{.*}} {[[RELEASE_DECL]]} ; CHECK: DW_TAG_formal_parameter ; CHECK-NOT: NULL ; CHECK-NOT: DW_TAG @@ -30,19 +34,14 @@ ; CHECK: DW_TAG_inlined_subroutine ; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[D2_ABS:0x........]]} -; CHECK: DW_TAG_class_type -; CHECK: [[RELEASE_DECL]]: DW_TAG_subprogram -; CHECK: [[DTOR_DECL:0x........]]: DW_TAG_subprogram - - ; CHECK: [[D1_ABS]]: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_specification {{.*}} {[[DTOR_DECL]]} ; CHECK-NEXT: DW_AT_{{.*}}linkage_name +; CHECK-NEXT: DW_AT_specification {{.*}} {[[DTOR_DECL]]} ; CHECK-NEXT: DW_AT_inline ; CHECK-NOT: DW_AT_inline ; CHECK: [[D2_ABS]]: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_specification {{.*}} {[[DTOR_DECL]]} ; CHECK-NEXT: DW_AT_{{.*}}linkage_name +; CHECK-NEXT: DW_AT_specification {{.*}} {[[DTOR_DECL]]} ; CHECK-NEXT: DW_AT_inline ; CHECK-NOT: DW_AT_inline ; CHECK: DW_TAG diff --git a/test/DebugInfo/X86/pr11300.ll b/test/DebugInfo/X86/pr11300.ll index 772861a69bbb..b3c911252d83 100644 --- a/test/DebugInfo/X86/pr11300.ll +++ b/test/DebugInfo/X86/pr11300.ll @@ -3,11 +3,13 @@ ; test that the DW_AT_specification is a back edge in the file. +; Skip the definition of zed(foo*) ; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x{{[0-9a-f]*}}] = "zed") +; CHECK: DW_TAG_class_type +; CHECK: [[BAR_DECL:0x[0-9a-f]*]]: DW_TAG_subprogram +; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_ZN3foo3barEv" ; CHECK: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_specification [DW_FORM_ref4] (cu + {{.*}} => {[[BACK:0x[0-9a-f]*]]}) -; CHECK: [[BACK]]: DW_TAG_subprogram +; CHECK-NEXT: DW_AT_specification {{.*}} {[[BAR_DECL]]} %struct.foo = type { i8 } From 9a3798632893c9fdec7e29afbdd98be6ef82a72a Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 21 May 2014 21:05:05 +0000 Subject: [PATCH 029/906] Make a couple of command lines static and remove an unnecessary initialization. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209320 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llc/llc.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp index abdc1ab634cf..47be4a8b71ad 100644 --- a/tools/llc/llc.cpp +++ b/tools/llc/llc.cpp @@ -75,13 +75,11 @@ OptLevel("O", static cl::opt TargetTriple("mtriple", cl::desc("Override target triple for module")); -cl::opt NoVerify("disable-verify", cl::Hidden, - cl::desc("Do not verify input module")); +static cl::opt NoVerify("disable-verify", cl::Hidden, + cl::desc("Do not verify input module")); -cl::opt -DisableSimplifyLibCalls("disable-simplify-libcalls", - cl::desc("Disable simplify-libcalls"), - cl::init(false)); +static cl::opt DisableSimplifyLibCalls("disable-simplify-libcalls", + cl::desc("Disable simplify-libcalls")); static int compileModule(char**, LLVMContext&); From d1b5bdaebdcdfc85854e6dac538bcc273b6a486a Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 21 May 2014 21:05:09 +0000 Subject: [PATCH 030/906] Move MCOptions that aren't shared between programs into their specific program and have them initialize the MCOptions struct explicitly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209321 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/MC/MCTargetOptionsCommandFlags.h | 20 ------------------- test/DebugInfo/2010-03-19-DbgDeclare.ll | 2 +- test/MC/ELF/noexec.s | 2 +- tools/llc/llc.cpp | 20 ++++++++++++++----- tools/llvm-mc/llvm-mc.cpp | 3 +++ 5 files changed, 20 insertions(+), 27 deletions(-) diff --git a/include/llvm/MC/MCTargetOptionsCommandFlags.h b/include/llvm/MC/MCTargetOptionsCommandFlags.h index 24e683f61723..17a117a2a3bd 100644 --- a/include/llvm/MC/MCTargetOptionsCommandFlags.h +++ b/include/llvm/MC/MCTargetOptionsCommandFlags.h @@ -33,31 +33,11 @@ cl::opt RelaxAll("mc-relax-all", cl::desc("When used with filetype=obj, " "relax all fixups in the emitted object file")); -cl::opt EnableDwarfDirectory( - "enable-dwarf-directory", cl::Hidden, - cl::desc("Use .file directives with an explicit directory.")); - -cl::opt NoExecStack("mc-no-exec-stack", - cl::desc("File doesn't need an exec stack")); - -cl::opt ShowMCEncoding("show-mc-encoding", cl::Hidden, - cl::desc("Show encoding in .s output")); -cl::opt ShowMCInst("show-mc-inst", cl::Hidden, - cl::desc("Show instruction structure in .s output")); - -cl::opt AsmVerbose("asm-verbose", cl::desc("Add comments to directives."), - cl::init(false)); - static inline MCTargetOptions InitMCTargetOptionsFromFlags() { MCTargetOptions Options; Options.SanitizeAddress = (AsmInstrumentation == MCTargetOptions::AsmInstrumentationAddress); Options.MCRelaxAll = RelaxAll; - Options.MCUseDwarfDirectory = EnableDwarfDirectory; - Options.MCNoExecStack = NoExecStack; - Options.ShowMCEncoding = ShowMCEncoding; - Options.ShowMCInst = ShowMCInst; - Options.AsmVerbose = AsmVerbose; return Options; } diff --git a/test/DebugInfo/2010-03-19-DbgDeclare.ll b/test/DebugInfo/2010-03-19-DbgDeclare.ll index 0c0a4dcb63e8..1ff7fa88bdc3 100644 --- a/test/DebugInfo/2010-03-19-DbgDeclare.ll +++ b/test/DebugInfo/2010-03-19-DbgDeclare.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -verify -S -asm-verbose | FileCheck %s +; RUN: llvm-as < %s | opt -verify -S | FileCheck %s ; CHECK: lang 0x8001 diff --git a/test/MC/ELF/noexec.s b/test/MC/ELF/noexec.s index 33cb8ae3452b..28f50cb7f692 100644 --- a/test/MC/ELF/noexec.s +++ b/test/MC/ELF/noexec.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -mc-no-exec-stack -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -t | FileCheck %s +// RUN: llvm-mc -no-exec-stack -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -t | FileCheck %s // CHECK: Section { // CHECK: Index: 4 diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp index 47be4a8b71ad..09ff4613b972 100644 --- a/tools/llc/llc.cpp +++ b/tools/llc/llc.cpp @@ -81,7 +81,18 @@ static cl::opt NoVerify("disable-verify", cl::Hidden, static cl::opt DisableSimplifyLibCalls("disable-simplify-libcalls", cl::desc("Disable simplify-libcalls")); -static int compileModule(char**, LLVMContext&); +static cl::opt ShowMCEncoding("show-mc-encoding", cl::Hidden, + cl::desc("Show encoding in .s output")); + +static cl::opt EnableDwarfDirectory( + "enable-dwarf-directory", cl::Hidden, + cl::desc("Use .file directives with an explicit directory.")); + +static cl::opt AsmVerbose("asm-verbose", + cl::desc("Add comments to directives."), + cl::init(true)); + +static int compileModule(char **, LLVMContext &); // GetFileNameRoot - Helper function to get the basename of a filename. static inline std::string @@ -270,10 +281,9 @@ static int compileModule(char **argv, LLVMContext &Context) { TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); Options.DisableIntegratedAS = NoIntegratedAssembler; - - // Override default to generate verbose assembly unless we've seen the flag. - if (AsmVerbose.getNumOccurrences() == 0) - Options.MCOptions.AsmVerbose = true; + Options.MCOptions.ShowMCEncoding = ShowMCEncoding; + Options.MCOptions.MCUseDwarfDirectory = EnableDwarfDirectory; + Options.MCOptions.AsmVerbose = AsmVerbose; std::unique_ptr target( TheTarget->createTargetMachine(TheTriple.getTriple(), MCPU, FeaturesStr, diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp index 02dcfcd2dd0b..84d578b4d008 100644 --- a/tools/llvm-mc/llvm-mc.cpp +++ b/tools/llvm-mc/llvm-mc.cpp @@ -159,6 +159,9 @@ MainFileName("main-file-name", static cl::opt SaveTempLabels("save-temp-labels", cl::desc("Don't discard temporary labels")); +static cl::opt NoExecStack("no-exec-stack", + cl::desc("File doesn't need an exec stack")); + enum ActionType { AC_AsLex, AC_Assemble, From fd0096a42c6d21e922e99669b1752a03987ebc84 Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Wed, 21 May 2014 22:00:39 +0000 Subject: [PATCH 031/906] [X86] Fix a bug in the lowering of BLENDI introduced in r209043. ISD::VSELECT mask uses 1 to identify the first argument and 0 to identify the second argument. On the other hand, BLENDI uses 0 to identify the first argument and 1 to identify the second argument. Fix the generation of the blend mask to account for this difference. The bug did not show up with r209043, because we were not checking for the actual arguments of the blend instruction! This commit also fixes the test cases. Note: The same mask works for the BLENDr variant because the arguments are swapped during instruction selection (see the BLENDXXrr patterns). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209324 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 10 +++++--- test/CodeGen/X86/avx-blend.ll | 38 +++++++++++++++++++++++------- test/CodeGen/X86/blend-msb.ll | 12 +++++++++- 3 files changed, 48 insertions(+), 12 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 72743a97abd9..61828759fc2c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7980,7 +7980,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -// This function assumes its argument is a BUILD_VECTOR of constand or +// This function assumes its argument is a BUILD_VECTOR of constants or // undef SDNodes. i.e: ISD::isBuildVectorOfConstantSDNodes(BuildVector) is // true. static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector, @@ -8004,9 +8004,13 @@ static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector, Lane2Cond = !isZero(SndLaneEltCond); if (Lane1Cond == Lane2Cond || Lane2Cond < 0) - MaskValue |= !!Lane1Cond << i; + // Lane1Cond != 0, means we want the first argument. + // Lane1Cond == 0, means we want the second argument. + // The encoding of this argument is 0 for the first argument, 1 + // for the second. Therefore, invert the condition. + MaskValue |= !Lane1Cond << i; else if (Lane1Cond < 0) - MaskValue |= !!Lane2Cond << i; + MaskValue |= !Lane2Cond << i; else return false; } diff --git a/test/CodeGen/X86/avx-blend.ll b/test/CodeGen/X86/avx-blend.ll index 8577a616c3cd..4d4f6c1a03ab 100644 --- a/test/CodeGen/X86/avx-blend.ll +++ b/test/CodeGen/X86/avx-blend.ll @@ -3,7 +3,16 @@ ; AVX128 tests: ;CHECK-LABEL: vsel_float: -;CHECK: vblendps $5 +; select mask is . +; Big endian representation is 0101 = 5. +; '1' means takes the first argument, '0' means takes the second argument. +; This is the opposite of the intel syntax, thus we expect +; the inverted mask: 1010 = 10. +; According to the ABI: +; v1 is in xmm0 => first argument is xmm0. +; v2 is in xmm1 => second argument is xmm1. +; result is in xmm0 => destination argument. +;CHECK: vblendps $10, %xmm1, %xmm0, %xmm0 ;CHECK: ret define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) { %vsel = select <4 x i1> , <4 x float> %v1, <4 x float> %v2 @@ -12,7 +21,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) { ;CHECK-LABEL: vsel_i32: -;CHECK: vblendps $5 +;CHECK: vblendps $10, %xmm1, %xmm0, %xmm0 ;CHECK: ret define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) { %vsel = select <4 x i1> , <4 x i32> %v1, <4 x i32> %v2 @@ -52,7 +61,13 @@ define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) { ;CHECK-LABEL: vsel_float8: ;CHECK-NOT: vinsertf128 -;CHECK: vblendps $17 +; +; which translates into the boolean mask (big endian representation): +; 00010001 = 17. +; '1' means takes the first argument, '0' means takes the second argument. +; This is the opposite of the intel syntax, thus we expect +; the inverted mask: 11101110 = 238. +;CHECK: vblendps $238, %ymm1, %ymm0, %ymm0 ;CHECK: ret define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) { %vsel = select <8 x i1> , <8 x float> %v1, <8 x float> %v2 @@ -61,7 +76,7 @@ define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) { ;CHECK-LABEL: vsel_i328: ;CHECK-NOT: vinsertf128 -;CHECK: vblendps $17 +;CHECK: vblendps $238, %ymm1, %ymm0, %ymm0 ;CHECK-NEXT: ret define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) { %vsel = select <8 x i1> , <8 x i32> %v1, <8 x i32> %v2 @@ -69,8 +84,15 @@ define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) { } ;CHECK-LABEL: vsel_double8: -;CHECK: vblendpd $1 -;CHECK: vblendpd $1 +; select mask is 2x: 0001 => intel mask: ~0001 = 14 +; ABI: +; v1 is in ymm0 and ymm1. +; v2 is in ymm2 and ymm3. +; result is in ymm0 and ymm1. +; Compute the low part: res.low = blend v1.low, v2.low, blendmask +;CHECK: vblendpd $14, %ymm2, %ymm0, %ymm0 +; Compute the high part. +;CHECK: vblendpd $14, %ymm3, %ymm1, %ymm1 ;CHECK: ret define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) { %vsel = select <8 x i1> , <8 x double> %v1, <8 x double> %v2 @@ -78,8 +100,8 @@ define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) { } ;CHECK-LABEL: vsel_i648: -;CHECK: vblendpd $1 -;CHECK: vblendpd $1 +;CHECK: vblendpd $14, %ymm2, %ymm0, %ymm0 +;CHECK: vblendpd $14, %ymm3, %ymm1, %ymm1 ;CHECK: ret define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) { %vsel = select <8 x i1> , <8 x i64> %v1, <8 x i64> %v2 diff --git a/test/CodeGen/X86/blend-msb.ll b/test/CodeGen/X86/blend-msb.ll index 4e17a714bf57..34aaf2c31ace 100644 --- a/test/CodeGen/X86/blend-msb.ll +++ b/test/CodeGen/X86/blend-msb.ll @@ -22,7 +22,17 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) { } ;CHECK-LABEL: vsel_8xi16: -;CHECK: pblendw $17 +; The select mask is +; +; which translates into the boolean mask (big endian representation): +; 00010001 = 17. +; '1' means takes the first argument, '0' means takes the second argument. +; This is the opposite of the intel syntax, thus we expect +; the inverted mask: 11101110 = 238. +; According to the ABI: +; v1 is in xmm0 => first argument is xmm0. +; v2 is in xmm1 => second argument is xmm1. +;CHECK: pblendw $238, %xmm1, %xmm0 ;CHECK: ret define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) { %vsel = select <8 x i1> , <8 x i16> %v1, <8 x i16> %v2 From 30436451e28a92642b1d4f56afc9e7df575b7756 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Wed, 21 May 2014 22:41:17 +0000 Subject: [PATCH 032/906] DebugInfo: Ensure concrete out of line variables from inlined functions reference their abstract origins. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209327 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 45 ++++++++++++---------- lib/CodeGen/AsmPrinter/DwarfDebug.h | 3 +- test/DebugInfo/X86/concrete_out_of_line.ll | 3 ++ 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index cb5824b35728..e4170f026bda 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1024,6 +1024,7 @@ void DwarfDebug::endModule() { // clean up. SPMap.clear(); + AbstractVariables.clear(); // Reset these for the next Module if we have one. FirstCU = nullptr; @@ -1032,21 +1033,25 @@ void DwarfDebug::endModule() { // Find abstract variable, if any, associated with Var. DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV, DebugLoc ScopeLoc) { + return findAbstractVariable(DV, ScopeLoc.getScope(DV->getContext())); +} + +DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV, + const MDNode *ScopeNode) { LLVMContext &Ctx = DV->getContext(); // More then one inlined variable corresponds to one abstract variable. DIVariable Var = cleanseInlinedVariable(DV, Ctx); - DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var); - if (AbsDbgVariable) - return AbsDbgVariable; + auto I = AbstractVariables.find(Var); + if (I != AbstractVariables.end()) + return I->second.get(); - LexicalScope *Scope = LScopes.findAbstractScope(ScopeLoc.getScope(Ctx)); + LexicalScope *Scope = LScopes.findAbstractScope(ScopeNode); if (!Scope) return nullptr; - AbsDbgVariable = new DbgVariable(Var, nullptr, this); - addScopeVariable(Scope, AbsDbgVariable); - AbstractVariables[Var] = AbsDbgVariable; - return AbsDbgVariable; + auto AbsDbgVariable = make_unique(Var, nullptr, this); + addScopeVariable(Scope, AbsDbgVariable.get()); + return (AbstractVariables[Var] = std::move(AbsDbgVariable)).get(); } // If Var is a current function argument then add it to CurrentFnArguments list. @@ -1226,7 +1231,10 @@ DwarfDebug::collectVariableInfo(SmallPtrSet &Processed) { if (!Processed.insert(DV)) continue; if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) - addScopeVariable(Scope, new DbgVariable(DV, nullptr, this)); + addScopeVariable( + Scope, + new DbgVariable(DV, findAbstractVariable(DV, Scope->getScopeNode()), + this)); } } @@ -1516,14 +1524,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { assert(DV && DV.isVariable()); if (!ProcessedVars.insert(DV)) continue; - // Check that DbgVariable for DV wasn't created earlier, when - // findAbstractVariable() was called for inlined instance of DV. - LLVMContext &Ctx = DV->getContext(); - DIVariable CleanDV = cleanseInlinedVariable(DV, Ctx); - if (AbstractVariables.lookup(CleanDV)) - continue; - if (LexicalScope *Scope = LScopes.findAbstractScope(DV.getContext())) - addScopeVariable(Scope, new DbgVariable(DV, nullptr, this)); + findAbstractVariable(DV, DV.getContext()); } constructAbstractSubprogramScopeDIE(TheCU, AScope); } @@ -1539,12 +1540,16 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { PrevCU = &TheCU; // Clear debug info - for (auto &I : ScopeVariables) - DeleteContainerPointers(I.second); + // Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the + // DbgVariables except those that are also in AbstractVariables (since they + // can be used cross-function) + for (const auto &I : ScopeVariables) + for (const auto *Var : I.second) + if (!AbstractVariables.count(Var->getVariable()) || Var->getAbstractVariable()) + delete Var; ScopeVariables.clear(); DeleteContainerPointers(CurrentFnArguments); DbgValues.clear(); - AbstractVariables.clear(); LabelsBeforeInsn.clear(); LabelsAfterInsn.clear(); PrevLabel = nullptr; diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index b2e16074f51d..aa18e7c3456b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -199,7 +199,7 @@ class DwarfDebug : public AsmPrinterHandler { ScopeVariablesMap ScopeVariables; // Collection of abstract variables. - DenseMap AbstractVariables; + DenseMap> AbstractVariables; // Collection of DebugLocEntry. Stored in a linked list so that DIELocLists // can refer to them in spite of insertions into this list. @@ -336,6 +336,7 @@ class DwarfDebug : public AsmPrinterHandler { /// \brief Find abstract variable associated with Var. DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc); + DbgVariable *findAbstractVariable(DIVariable &Var, const MDNode *Scope); /// \brief Find DIE for the given subprogram and attach appropriate /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll index baa819de85df..ad9d3b66696c 100644 --- a/test/DebugInfo/X86/concrete_out_of_line.ll +++ b/test/DebugInfo/X86/concrete_out_of_line.ll @@ -39,6 +39,8 @@ ; CHECK-NEXT: DW_AT_specification {{.*}} {[[DTOR_DECL]]} ; CHECK-NEXT: DW_AT_inline ; CHECK-NOT: DW_AT_inline +; CHECK-NOT: DW_TAG +; CHECK: [[D1_THIS_ABS:0x........]]: DW_TAG_formal_parameter ; CHECK: [[D2_ABS]]: DW_TAG_subprogram ; CHECK-NEXT: DW_AT_{{.*}}linkage_name ; CHECK-NEXT: DW_AT_specification {{.*}} {[[DTOR_DECL]]} @@ -52,6 +54,7 @@ ; CHECK: DW_TAG_subprogram ; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[D1_ABS]]} ; CHECK: DW_TAG_formal_parameter +; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[D1_THIS_ABS]]} ; CHECK: DW_TAG_inlined_subroutine ; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[D2_ABS]]} From 9bfc3f592d8a622210cc28c5197a01466b366655 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 21 May 2014 22:42:02 +0000 Subject: [PATCH 033/906] Remove unused member variable from hexagon pass. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209328 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp index aa4121f4ed5d..d814e33de2c1 100644 --- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp +++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp @@ -47,13 +47,12 @@ using namespace llvm; namespace { class HexagonSplitConst32AndConst64 : public MachineFunctionPass { - const HexagonTargetMachine& QTM; - const HexagonSubtarget &QST; + const HexagonTargetMachine &QTM; public: static char ID; - HexagonSplitConst32AndConst64(const HexagonTargetMachine& TM) - : MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {} + HexagonSplitConst32AndConst64(const HexagonTargetMachine &TM) + : MachineFunctionPass(ID), QTM(TM) {} const char *getPassName() const override { return "Hexagon Split Const32s and Const64s"; From 5fe59a2e10175357d43b312bbef5adb1cfb9ac54 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 21 May 2014 22:42:07 +0000 Subject: [PATCH 034/906] Remove getTargetLowering from TargetPassConfig as the target lowering can change depending upon subtarget/subtarget features for a function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209329 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/Passes.h | 4 ---- lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp | 9 ++++++++- lib/Target/Hexagon/HexagonTargetMachine.cpp | 8 ++------ 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index 2deb1aca7617..35210f1ab844 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -133,10 +133,6 @@ class TargetPassConfig : public ImmutablePass { return *static_cast(TM); } - const TargetLowering *getTargetLowering() const { - return TM->getTargetLowering(); - } - // void setInitialized() { Initialized = true; } diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp index d814e33de2c1..247207f992dc 100644 --- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp +++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp @@ -17,9 +17,10 @@ // //===----------------------------------------------------------------------===// -#include "HexagonTargetMachine.h" #include "HexagonMachineFunctionInfo.h" #include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "HexagonTargetObjectFile.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/MachineDominators.h" @@ -66,6 +67,12 @@ char HexagonSplitConst32AndConst64::ID = 0; bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { + const HexagonTargetObjectFile &TLOF = + (const HexagonTargetObjectFile &) + QTM.getTargetLowering()->getObjFileLowering(); + if (TLOF.IsSmallDataEnabled()) + return true; + const TargetInstrInfo *TII = QTM.getInstrInfo(); // Loop over all of the basic blocks diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 2572e11ae9e1..b9237647ff4a 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -150,16 +150,12 @@ bool HexagonPassConfig::addPostRegAlloc() { bool HexagonPassConfig::addPreSched2() { const HexagonTargetMachine &TM = getHexagonTargetMachine(); - const HexagonTargetObjectFile &TLOF = - (const HexagonTargetObjectFile &)getTargetLowering()->getObjFileLowering(); addPass(createHexagonCopyToCombine()); if (getOptLevel() != CodeGenOpt::None) addPass(&IfConverterID); - if (!TLOF.IsSmallDataEnabled()) { - addPass(createHexagonSplitConst32AndConst64(TM)); - printAndVerify("After hexagon split const32/64 pass"); - } + addPass(createHexagonSplitConst32AndConst64(TM)); + printAndVerify("After hexagon split const32/64 pass"); return true; } From 713096fb68aaf51447c6e4fd3c9e4cc60ec7e0ef Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 21 May 2014 22:42:38 +0000 Subject: [PATCH 035/906] R600: Add failing testcases for constant initializers. Constant initializers involving illegal types hit an assertion. Patch by: Jan Vesely git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209330 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/R600/gv-const-addrspace-fail.ll | 30 ++++++++++++++++++++ test/CodeGen/R600/gv-const-addrspace.ll | 30 +++++++++++--------- 2 files changed, 47 insertions(+), 13 deletions(-) create mode 100644 test/CodeGen/R600/gv-const-addrspace-fail.ll diff --git a/test/CodeGen/R600/gv-const-addrspace-fail.ll b/test/CodeGen/R600/gv-const-addrspace-fail.ll new file mode 100644 index 000000000000..f217ab5df7fa --- /dev/null +++ b/test/CodeGen/R600/gv-const-addrspace-fail.ll @@ -0,0 +1,30 @@ +; XFAIL: * +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + + +@a = internal addrspace(2) constant [1 x i8] [ i8 7 ], align 1 + +; FUNC-LABEL: @test_i8 +; EG: CF_END +; SI: BUFFER_STORE_BYTE +; SI: S_ENDPGM +define void @test_i8( i32 %s, i8 addrspace(1)* %out) #3 { + %arrayidx = getelementptr inbounds [1 x i8] addrspace(2)* @a, i32 0, i32 %s + %1 = load i8 addrspace(2)* %arrayidx, align 1 + store i8 %1, i8 addrspace(1)* %out + ret void +} + +@b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2 + +; FUNC-LABEL: @test_i16 +; EG: CF_END +; SI: BUFFER_STORE_SHORT +; SI: S_ENDPGM +define void @test_i16( i32 %s, i16 addrspace(1)* %out) #3 { + %arrayidx = getelementptr inbounds [1 x i16] addrspace(2)* @b, i32 0, i32 %s + %1 = load i16 addrspace(2)* %arrayidx, align 2 + store i16 %1, i16 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/gv-const-addrspace.ll b/test/CodeGen/R600/gv-const-addrspace.ll index cda7ab1fccd3..a3504df77b3d 100644 --- a/test/CodeGen/R600/gv-const-addrspace.ll +++ b/test/CodeGen/R600/gv-const-addrspace.ll @@ -1,4 +1,8 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600 --check-prefix=FUNC +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + + +@b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2 ; XXX: Test on SI once 64-bit adds are supportes. @@ -6,12 +10,12 @@ ; FUNC-LABEL: @float -; R600-DAG: MOV {{\** *}}T2.X -; R600-DAG: MOV {{\** *}}T3.X -; R600-DAG: MOV {{\** *}}T4.X -; R600-DAG: MOV {{\** *}}T5.X -; R600-DAG: MOV {{\** *}}T6.X -; R600: MOVA_INT +; EG-DAG: MOV {{\** *}}T2.X +; EG-DAG: MOV {{\** *}}T3.X +; EG-DAG: MOV {{\** *}}T4.X +; EG-DAG: MOV {{\** *}}T5.X +; EG-DAG: MOV {{\** *}}T6.X +; EG: MOVA_INT define void @float(float addrspace(1)* %out, i32 %index) { entry: @@ -25,12 +29,12 @@ entry: ; FUNC-LABEL: @i32 -; R600-DAG: MOV {{\** *}}T2.X -; R600-DAG: MOV {{\** *}}T3.X -; R600-DAG: MOV {{\** *}}T4.X -; R600-DAG: MOV {{\** *}}T5.X -; R600-DAG: MOV {{\** *}}T6.X -; R600: MOVA_INT +; EG-DAG: MOV {{\** *}}T2.X +; EG-DAG: MOV {{\** *}}T3.X +; EG-DAG: MOV {{\** *}}T4.X +; EG-DAG: MOV {{\** *}}T5.X +; EG-DAG: MOV {{\** *}}T6.X +; EG: MOVA_INT define void @i32(i32 addrspace(1)* %out, i32 %index) { entry: From bd124c85eff93211873cb87191303f520b6dd659 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 21 May 2014 22:42:42 +0000 Subject: [PATCH 036/906] R600: Partially fix constant initializers for structs and vectors. This should extend the current workaround to work with structs that only contain legal, scalar types. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209331 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 39 +++++++++++++++++--- test/CodeGen/R600/gv-const-addrspace-fail.ll | 28 ++++++++++++++ test/CodeGen/R600/gv-const-addrspace.ll | 27 ++++++++++++++ 3 files changed, 88 insertions(+), 6 deletions(-) diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 008e1311b737..e7cc20673737 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -537,16 +537,43 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init, TD->getPrefTypeAlignment(CFP->getType())); } - if (Init->getType()->isAggregateType()) { + Type *InitTy = Init->getType(); + if (StructType *ST = dyn_cast(InitTy)) { + const StructLayout *SL = TD->getStructLayout(ST); + + EVT PtrVT = InitPtr.getValueType(); + SmallVector Chains; + + for (unsigned I = 0, N = ST->getNumElements(); I != N; ++I) { + SDValue Offset = DAG.getConstant(SL->getElementOffset(I), PtrVT); + SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset); + + Constant *Elt = Init->getAggregateElement(I); + Chains.push_back(LowerConstantInitializer(Elt, GV, Ptr, Chain, DAG)); + } + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + } + + if (SequentialType *SeqTy = dyn_cast(InitTy)) { EVT PtrVT = InitPtr.getValueType(); - unsigned NumElements = Init->getType()->getArrayNumElements(); + + unsigned NumElements; + if (ArrayType *AT = dyn_cast(SeqTy)) + NumElements = AT->getNumElements(); + else if (VectorType *VT = dyn_cast(SeqTy)) + NumElements = VT->getNumElements(); + else + llvm_unreachable("Unexpected type"); + + unsigned EltSize = TD->getTypeAllocSize(SeqTy->getElementType()); SmallVector Chains; for (unsigned i = 0; i < NumElements; ++i) { - SDValue Offset = DAG.getConstant(i * TD->getTypeAllocSize( - Init->getType()->getArrayElementType()), PtrVT); + SDValue Offset = DAG.getConstant(i * EltSize, PtrVT); SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset); - Chains.push_back(LowerConstantInitializer(Init->getAggregateElement(i), - GV, Ptr, Chain, DAG)); + + Constant *Elt = Init->getAggregateElement(i); + Chains.push_back(LowerConstantInitializer(Elt, GV, Ptr, Chain, DAG)); } return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); diff --git a/test/CodeGen/R600/gv-const-addrspace-fail.ll b/test/CodeGen/R600/gv-const-addrspace-fail.ll index f217ab5df7fa..ebd781107627 100644 --- a/test/CodeGen/R600/gv-const-addrspace-fail.ll +++ b/test/CodeGen/R600/gv-const-addrspace-fail.ll @@ -28,3 +28,31 @@ define void @test_i16( i32 %s, i16 addrspace(1)* %out) #3 { store i16 %1, i16 addrspace(1)* %out ret void } + +%struct.bar = type { float, [5 x i8] } + +; The illegal i8s aren't handled +@struct_bar_gv = internal addrspace(2) unnamed_addr constant [1 x %struct.bar] [ %struct.bar { float 16.0, [5 x i8] [i8 0, i8 1, i8 2, i8 3, i8 4] } ] + +; FUNC-LABEL: @struct_bar_gv_load +define void @struct_bar_gv_load(i8 addrspace(1)* %out, i32 %index) { + %gep = getelementptr inbounds [1 x %struct.bar] addrspace(2)* @struct_bar_gv, i32 0, i32 0, i32 1, i32 %index + %load = load i8 addrspace(2)* %gep, align 1 + store i8 %load, i8 addrspace(1)* %out, align 1 + ret void +} + + +; The private load isn't scalarzied. +@array_vector_gv = internal addrspace(2) constant [4 x <4 x i32>] [ <4 x i32> , + <4 x i32> , + <4 x i32> , + <4 x i32> ] + +; FUNC-LABEL: @array_vector_gv_load +define void @array_vector_gv_load(<4 x i32> addrspace(1)* %out, i32 %index) { + %gep = getelementptr inbounds [4 x <4 x i32>] addrspace(2)* @array_vector_gv, i32 0, i32 %index + %load = load <4 x i32> addrspace(2)* %gep, align 16 + store <4 x i32> %load, <4 x i32> addrspace(1)* %out, align 16 + ret void +} diff --git a/test/CodeGen/R600/gv-const-addrspace.ll b/test/CodeGen/R600/gv-const-addrspace.ll index a3504df77b3d..01760617d3e7 100644 --- a/test/CodeGen/R600/gv-const-addrspace.ll +++ b/test/CodeGen/R600/gv-const-addrspace.ll @@ -43,3 +43,30 @@ entry: store i32 %1, i32 addrspace(1)* %out ret void } + + +%struct.foo = type { float, [5 x i32] } + +@struct_foo_gv = internal addrspace(2) unnamed_addr constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ] + +; FUNC-LABEL: @struct_foo_gv_load + +define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) { + %gep = getelementptr inbounds [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index + %load = load i32 addrspace(2)* %gep, align 4 + store i32 %load, i32 addrspace(1)* %out, align 4 + ret void +} + +@array_v1_gv = internal addrspace(2) constant [4 x <1 x i32>] [ <1 x i32> , + <1 x i32> , + <1 x i32> , + <1 x i32> ] + +; FUNC-LABEL: @array_v1_gv_load +define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) { + %gep = getelementptr inbounds [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index + %load = load <1 x i32> addrspace(2)* %gep, align 4 + store <1 x i32> %load, <1 x i32> addrspace(1)* %out, align 4 + ret void +} From afd1747bbca94fa666910e42edcf016495a42dad Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 21 May 2014 22:59:17 +0000 Subject: [PATCH 037/906] R600: Add comment describing problems with LowerConstantInitializer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209333 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index e7cc20673737..d63cd1d281c5 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -514,6 +514,16 @@ void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N, } } +// FIXME: This implements accesses to initialized globals in the constant +// address space by copying them to private and accessing that. It does not +// properly handle illegal types or vectors. The private vector loads are not +// scalarized, and the illegal scalars hit an assertion. This technique will not +// work well with large initializers, and this should eventually be +// removed. Initialized globals should be placed into a data section that the +// runtime will load into a buffer before the kernel is executed. Uses of the +// global need to be replaced with a pointer loaded from an implicit kernel +// argument into this buffer holding the copy of the data, which will remove the +// need for any of this. SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init, const GlobalValue *GV, const SDValue &InitPtr, From 111bad385ab8bdaec4a798481be6b66252417dce Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Wed, 21 May 2014 23:14:12 +0000 Subject: [PATCH 038/906] DebugInfo: Use the SPMap to find the parent CU of inlined functions as they may not be in the current CU Committed in r209178 then reverted in r209251 due to LTO breakage, here's a proper fix for the case of the missing subprogram DIE. The DIEs were there, just in other compile units. Using the SPMap we can find the right compile unit to search for and produce cross-unit references to describe this kind of inlining. One existing test case needed to be updated because it had a function that wasn't in the CU's subprogram list, so it didn't appear in the SPMap. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209335 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 24 ++-- .../CodeGen/Thumb/2010-07-15-debugOrdering.ll | 2 +- test/DebugInfo/cross-cu-inlining.ll | 124 ++++++++++++++++++ 3 files changed, 136 insertions(+), 14 deletions(-) create mode 100644 test/DebugInfo/cross-cu-inlining.ll diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index e4170f026bda..745b2f043273 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -431,14 +431,10 @@ DwarfDebug::constructInlinedScopeDIE(DwarfCompileUnit &TheCU, assert(Scope->getScopeNode()); DIScope DS(Scope->getScopeNode()); DISubprogram InlinedSP = getDISubprogram(DS); - DIE *OriginDIE = TheCU.getDIE(InlinedSP); - // FIXME: This should be an assert (or possibly a - // getOrCreateSubprogram(InlinedSP)) otherwise we're just failing to emit - // inlining information. - if (!OriginDIE) { - DEBUG(dbgs() << "Unable to find original DIE for an inlined subprogram."); - return nullptr; - } + // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram + // was inlined from another compile unit. + DIE *OriginDIE = SPMap[InlinedSP]->getDIE(InlinedSP); + assert(OriginDIE && "Unable to find original DIE for an inlined subprogram."); auto ScopeDIE = make_unique(dwarf::DW_TAG_inlined_subroutine); TheCU.addDIEEntry(*ScopeDIE, dwarf::DW_AT_abstract_origin, *OriginDIE); @@ -530,11 +526,13 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU, if (!ProcessedSPNodes.insert(Sub)) return; - if (DIE *ScopeDIE = TheCU.getDIE(Sub)) { - AbstractSPDies.insert(std::make_pair(Sub, ScopeDIE)); - TheCU.addUInt(*ScopeDIE, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); - createAndAddScopeChildren(TheCU, Scope, *ScopeDIE); - } + // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram + // was inlined from another compile unit. + DIE *ScopeDIE = SPMap[Sub]->getDIE(Sub); + assert(ScopeDIE); + AbstractSPDies.insert(std::make_pair(Sub, ScopeDIE)); + TheCU.addUInt(*ScopeDIE, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); + createAndAddScopeChildren(TheCU, Scope, *ScopeDIE); } DIE &DwarfDebug::constructSubprogramScopeDIE(DwarfCompileUnit &TheCU, diff --git a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll index b87bf24993a1..ffc9584199cf 100644 --- a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll +++ b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll @@ -151,5 +151,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !98 = metadata !{i32 52, i32 0, metadata !1, null} !101 = metadata !{metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src"} !102 = metadata !{i32 0} -!103 = metadata !{metadata !3} +!103 = metadata !{metadata !3, metadata !77} !104 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/test/DebugInfo/cross-cu-inlining.ll b/test/DebugInfo/cross-cu-inlining.ll new file mode 100644 index 000000000000..ae684ad507c4 --- /dev/null +++ b/test/DebugInfo/cross-cu-inlining.ll @@ -0,0 +1,124 @@ +; REQUIRES: object-emission + +; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s + +; Build from source: +; $ clang++ a.cpp b.cpp -g -c -emit-llvm +; $ llvm-link a.bc b.bc -o ab.bc +; $ opt -inline ab.bc -o ab-opt.bc +; $ cat a.cpp +; extern int i; +; int func(int); +; int main() { +; return func(i); +; } +; $ cat b.cpp +; int __attribute__((always_inline)) func(int x) { +; return x * 2; +; } + +; Ensure that func inlined into main is described and references the abstract +; definition in b.cpp's CU. + +; CHECK: DW_TAG_compile_unit +; CHECK: DW_AT_name {{.*}} "a.cpp" +; CHECK: DW_TAG_subprogram +; CHECK: DW_TAG_inlined_subroutine +; CHECK-NEXT: DW_AT_abstract_origin {{.*}}[[ABS_FUNC:........]]) +; CHECK: DW_TAG_formal_parameter +; CHECK-NEXT: DW_AT_abstract_origin {{.*}}[[ABS_VAR:........]]) + +; Check the abstract definition is in the 'b.cpp' CU and doesn't contain any +; concrete information (address range or variable location) +; CHECK: DW_TAG_compile_unit +; CHECK: DW_AT_name {{.*}} "b.cpp" +; CHECK: 0x[[ABS_FUNC]]: DW_TAG_subprogram +; CHECK-NOT: DW_AT_low_pc +; CHECK: 0x[[ABS_VAR]]: DW_TAG_formal_parameter +; CHECK-NOT: DW_AT_location + +; Check the concrete out of line definition references the abstract and +; provides the address range and variable location +; CHECK: DW_TAG_subprogram +; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {0x[[ABS_FUNC]]} +; CHECK: DW_AT_low_pc +; CHECK: DW_TAG_formal_parameter +; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {0x[[ABS_VAR]]} +; CHECK: DW_AT_location + + +@i = external global i32 + +; Function Attrs: uwtable +define i32 @main() #0 { +entry: + %x.addr.i = alloca i32, align 4 + %retval = alloca i32, align 4 + store i32 0, i32* %retval + %0 = load i32* @i, align 4, !dbg !19 + %1 = bitcast i32* %x.addr.i to i8* + call void @llvm.lifetime.start(i64 4, i8* %1) + store i32 %0, i32* %x.addr.i, align 4 + call void @llvm.dbg.declare(metadata !{i32* %x.addr.i}, metadata !20), !dbg !21 + %2 = load i32* %x.addr.i, align 4, !dbg !22 + %mul.i = mul nsw i32 %2, 2, !dbg !22 + %3 = bitcast i32* %x.addr.i to i8*, !dbg !22 + call void @llvm.lifetime.end(i64 4, i8* %3), !dbg !22 + ret i32 %mul.i, !dbg !19 +} + +; Function Attrs: alwaysinline nounwind uwtable +define i32 @_Z4funci(i32 %x) #1 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + call void @llvm.dbg.declare(metadata !{i32* %x.addr}, metadata !20), !dbg !23 + %0 = load i32* %x.addr, align 4, !dbg !24 + %mul = mul nsw i32 %0, 2, !dbg !24 + ret i32 %mul, !dbg !24 +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata) #2 + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #3 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #3 + +attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone } +attributes #3 = { nounwind } + +!llvm.dbg.cu = !{!0, !9} +!llvm.module.flags = !{!16, !17} +!llvm.ident = !{!18, !18} + +!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/a.cpp] [DW_LANG_C_plus_plus] +!1 = metadata !{metadata !"a.cpp", metadata !"/tmp/dbginfo"} +!2 = metadata !{} +!3 = metadata !{metadata !4} +!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [main] +!5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [/tmp/dbginfo/a.cpp] +!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = metadata !{metadata !8} +!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!9 = metadata !{i32 786449, metadata !10, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !11, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/b.cpp] [DW_LANG_C_plus_plus] +!10 = metadata !{metadata !"b.cpp", metadata !"/tmp/dbginfo"} +!11 = metadata !{metadata !12} +!12 = metadata !{i32 786478, metadata !10, metadata !13, metadata !"func", metadata !"func", metadata !"_Z4funci", i32 1, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z4funci, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [func] +!13 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ] [/tmp/dbginfo/b.cpp] +!14 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!15 = metadata !{metadata !8, metadata !8} +!16 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} +!17 = metadata !{i32 2, metadata !"Debug Info Version", i32 1} +!18 = metadata !{metadata !"clang version 3.5.0 "} +!19 = metadata !{i32 4, i32 0, metadata !4, null} +!20 = metadata !{i32 786689, metadata !12, metadata !"x", metadata !13, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [x] [line 1] +!21 = metadata !{i32 1, i32 0, metadata !12, metadata !19} +!22 = metadata !{i32 2, i32 0, metadata !12, metadata !19} +!23 = metadata !{i32 1, i32 0, metadata !12, null} +!24 = metadata !{i32 2, i32 0, metadata !12, null} + From 159ccc8f51f7cb090058cfd0dcb64453f5681606 Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Wed, 21 May 2014 23:17:50 +0000 Subject: [PATCH 039/906] MC: introduce ability to restrict recorded relocations Add support to allow a target specific COFF object writer to restrict the recorded resolutions in the emitted object files. This is motivated by the need in Windows on ARM, where an intermediate relocation needs to be prevented from being emitted in the object file. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209336 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/MC/MCWinCOFFObjectWriter.h | 1 + lib/MC/WinCOFFObjectWriter.cpp | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/llvm/MC/MCWinCOFFObjectWriter.h b/include/llvm/MC/MCWinCOFFObjectWriter.h index 213481c9090c..dad7bb597039 100644 --- a/include/llvm/MC/MCWinCOFFObjectWriter.h +++ b/include/llvm/MC/MCWinCOFFObjectWriter.h @@ -30,6 +30,7 @@ namespace llvm { virtual unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsCrossSection) const = 0; + virtual bool recordRelocation(const MCFixup &) const { return true; } }; /// \brief Construct a new Win COFF writer instance. diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index 2cc027bfa136..961cbc6a8f7d 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -808,7 +808,8 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm, } } - coff_section->Relocations.push_back(Reloc); + if (TargetObjectWriter->recordRelocation(Fixup)) + coff_section->Relocations.push_back(Reloc); } void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, From cba7ac7bda58e03d452f7e8ad73978744c101727 Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Wed, 21 May 2014 23:17:56 +0000 Subject: [PATCH 040/906] MC: correct IMAGE_REL_ARM_MOV32T relocation emission This corrects the emission of IMAGE_REL_ARM_MOV32T relocations. Previously, we were avoiding the high portion of the relocation too early. If there was a section-relative relocation with an offset greater than 16-bits (65535), you would end up truncating the high order bits of the offset. Allow the current relocation representation to flow through out the MC layer to the object writer. Use the new ability to restrict recorded relocations to avoid emitting the relocation into the final object. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209337 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../ARM/MCTargetDesc/ARMMCCodeEmitter.cpp | 3 -- .../MCTargetDesc/ARMWinCOFFObjectWriter.cpp | 10 +++-- test/MC/ARM/Windows/mov32t-range.s | 37 +++++++++++++++++++ 3 files changed, 44 insertions(+), 6 deletions(-) create mode 100644 test/MC/ARM/Windows/mov32t-range.s diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 701a6320d487..5b51a52f828a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -1029,9 +1029,6 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx, switch (ARM16Expr->getKind()) { default: llvm_unreachable("Unsupported ARMFixup"); case ARMMCExpr::VK_ARM_HI16: - if (Triple(STI.getTargetTriple()).isOSWindows()) - return 0; - Kind = MCFixupKind(isThumb2(STI) ? ARM::fixup_t2_movt_hi16 : ARM::fixup_arm_movt_hi16); break; diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp index ba9df6e962c9..d31f1f41c697 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp @@ -27,6 +27,8 @@ class ARMWinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter { unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsCrossSection) const override; + + bool recordRelocation(const MCFixup &) const override; }; unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target, @@ -61,12 +63,14 @@ unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target, case ARM::fixup_arm_thumb_blx: return COFF::IMAGE_REL_ARM_BLX23T; case ARM::fixup_t2_movw_lo16: - return COFF::IMAGE_REL_ARM_MOV32T; case ARM::fixup_t2_movt_hi16: - llvm_unreachable("High-word for pair-wise relocations are contiguously " - "addressed as an IMAGE_REL_ARM_MOV32T relocation"); + return COFF::IMAGE_REL_ARM_MOV32T; } } + +bool ARMWinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const { + return static_cast(Fixup.getKind()) != ARM::fixup_t2_movt_hi16; +} } namespace llvm { diff --git a/test/MC/ARM/Windows/mov32t-range.s b/test/MC/ARM/Windows/mov32t-range.s new file mode 100644 index 000000000000..fef8ff2aca7a --- /dev/null +++ b/test/MC/ARM/Windows/mov32t-range.s @@ -0,0 +1,37 @@ +@ RUN: llvm-mc -triple thumbv7-windows-itanium -filetype obj -o - %s \ +@ RUN: | llvm-readobj -r - | FileCheck -check-prefix CHECK-RELOCATIONS %s + +@ RUN: llvm-mc -triple thumbv7-windows-itanium -filetype obj -o - %s \ +@ RUN: | llvm-objdump -d - | FileCheck -check-prefix CHECK-ENCODING %s + + .syntax unified + .thumb + .text + + .def truncation + .scl 3 + .type 32 + .endef + .align 2 + .thumb_func +truncation: + movw r0, :lower16:.Lerange + movt r0, :upper16:.Lerange + bx lr + + .section .rdata,"rd" +.Lbuffer: + .zero 65536 +.Lerange: + .asciz "-erange" + +@ CHECK-RELOCATIONS: Relocations [ +@ CHECK-RELOCATIONS: .text { +@ CHECK-RELOCATIONS: 0x0 IMAGE_REL_ARM_MOV32T .rdata +@ CHECK-RELOCATIONS-NOT: 0x4 IMAGE_REL_ARM_MOV32T .rdata +@ CHECK-RELOCATIONS: } +@ CHECK-RELOCATIONS: ] + +@ CHECK-ENCODING: 0: 40 f2 00 00 +@ CHECK-ENCODING-NEXT: 4: c0 f2 01 00 + From 8a0240ddf2ed2ebec6fbeaba18a69e19b0abd7b5 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Wed, 21 May 2014 23:27:41 +0000 Subject: [PATCH 041/906] Revert "DebugInfo: Don't put fission type units in comdat sections." This reverts commit r208930, r208933, and r208975. It seems not all fission consumers are ready to handle this behavior. Reverting until tools are brought up to spec. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209338 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/MC/MCObjectFileInfo.h | 5 +---- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 24 ++++++++++-------------- lib/CodeGen/AsmPrinter/DwarfDebug.h | 1 - lib/CodeGen/AsmPrinter/DwarfUnit.h | 4 ---- lib/MC/MCObjectFileInfo.cpp | 14 +++++++------- test/DebugInfo/X86/generate-odr-hash.ll | 20 ++------------------ 6 files changed, 20 insertions(+), 48 deletions(-) diff --git a/include/llvm/MC/MCObjectFileInfo.h b/include/llvm/MC/MCObjectFileInfo.h index 58f37d437c86..1a56040e4667 100644 --- a/include/llvm/MC/MCObjectFileInfo.h +++ b/include/llvm/MC/MCObjectFileInfo.h @@ -122,7 +122,6 @@ class MCObjectFileInfo { /// These are used for the Fission separate debug information files. const MCSection *DwarfInfoDWOSection; - const MCSection *DwarfTypesDWOSection; const MCSection *DwarfAbbrevDWOSection; const MCSection *DwarfStrDWOSection; const MCSection *DwarfLineDWOSection; @@ -271,9 +270,7 @@ class MCObjectFileInfo { return DwarfInfoDWOSection; } const MCSection *getDwarfTypesSection(uint64_t Hash) const; - const MCSection *getDwarfTypesDWOSection() const { - return DwarfTypesDWOSection; - } + const MCSection *getDwarfTypesDWOSection(uint64_t Hash) const; const MCSection *getDwarfAbbrevDWOSection() const { return DwarfAbbrevDWOSection; } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 745b2f043273..4a317cf0b561 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1588,12 +1588,9 @@ void DwarfDebug::emitSectionLabels() { // Dwarf sections base addresses. DwarfInfoSectionSym = emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info"); - if (useSplitDwarf()) { + if (useSplitDwarf()) DwarfInfoDWOSectionSym = emitSectionSym(Asm, TLOF.getDwarfInfoDWOSection(), "section_info_dwo"); - DwarfTypesDWOSectionSym = - emitSectionSym(Asm, TLOF.getDwarfTypesDWOSection(), "section_types_dwo"); - } DwarfAbbrevSectionSym = emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev"); if (useSplitDwarf()) @@ -2357,9 +2354,9 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, bool TopLevelType = TypeUnitsUnderConstruction.empty(); AddrPool.resetUsedFlag(); - auto OwnedUnit = make_unique( - InfoHolder.getUnits().size() + TypeUnitsUnderConstruction.size(), CU, Asm, - this, &InfoHolder, getDwoLineTable(CU)); + auto OwnedUnit = + make_unique(InfoHolder.getUnits().size(), CU, Asm, this, + &InfoHolder, getDwoLineTable(CU)); DwarfTypeUnit &NewTU = *OwnedUnit; DIE &UnitDie = NewTU.getUnitDie(); TU = &NewTU; @@ -2372,14 +2369,13 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, uint64_t Signature = makeTypeSignature(Identifier); NewTU.setTypeSignature(Signature); - if (useSplitDwarf()) - NewTU.initSection(Asm->getObjFileLowering().getDwarfTypesDWOSection(), - DwarfTypesDWOSectionSym); - else { + if (!useSplitDwarf()) CU.applyStmtList(UnitDie); - NewTU.initSection( - Asm->getObjFileLowering().getDwarfTypesSection(Signature)); - } + + NewTU.initSection( + useSplitDwarf() + ? Asm->getObjFileLowering().getDwarfTypesDWOSection(Signature) + : Asm->getObjFileLowering().getDwarfTypesSection(Signature)); NewTU.setType(NewTU.createTypeDIE(CTy)); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index aa18e7c3456b..1b0b1ebafd88 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -253,7 +253,6 @@ class DwarfDebug : public AsmPrinterHandler { MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; MCSymbol *DwarfInfoDWOSectionSym, *DwarfAbbrevDWOSectionSym; - MCSymbol *DwarfTypesDWOSectionSym; MCSymbol *DwarfStrDWOSectionSym; MCSymbol *DwarfGnuPubNamesSectionSym, *DwarfGnuPubTypesSectionSym; diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index 78931487986f..e44f256c3a35 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -577,10 +577,6 @@ class DwarfTypeUnit : public DwarfUnit { sizeof(uint32_t); // Type DIE Offset } void initSection(const MCSection *Section); - // Bring in the base function (taking two args, including the section symbol) - // for use when building DWO type units (they don't go in unique comdat - // sections) - using DwarfUnit::initSection; DwarfCompileUnit &getCU() override { return CU; } protected: diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index e5377890da5b..bb1327995047 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -537,9 +537,6 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { DwarfInfoDWOSection = Ctx->getELFSection(".debug_info.dwo", ELF::SHT_PROGBITS, 0, SectionKind::getMetadata()); - DwarfTypesDWOSection = - Ctx->getELFSection(".debug_types.dwo", ELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); DwarfAbbrevDWOSection = Ctx->getELFSection(".debug_abbrev.dwo", ELF::SHT_PROGBITS, 0, SectionKind::getMetadata()); @@ -709,10 +706,6 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { Ctx->getCOFFSection(".debug_info.dwo", COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); - DwarfTypesDWOSection = - Ctx->getCOFFSection(".debug_types.dwo", COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); DwarfAbbrevDWOSection = Ctx->getCOFFSection(".debug_abbrev.dwo", COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_MEM_READ, @@ -814,6 +807,13 @@ const MCSection *MCObjectFileInfo::getDwarfTypesSection(uint64_t Hash) const { SectionKind::getMetadata(), 0, utostr(Hash)); } +const MCSection * +MCObjectFileInfo::getDwarfTypesDWOSection(uint64_t Hash) const { + return Ctx->getELFSection(".debug_types.dwo", ELF::SHT_PROGBITS, + ELF::SHF_GROUP, SectionKind::getMetadata(), 0, + utostr(Hash)); +} + void MCObjectFileInfo::InitEHFrameSection() { if (Env == IsMachO) EHFrameSection = diff --git a/test/DebugInfo/X86/generate-odr-hash.ll b/test/DebugInfo/X86/generate-odr-hash.ll index 7f4c99de2f9c..e713f14c4f5b 100644 --- a/test/DebugInfo/X86/generate-odr-hash.ll +++ b/test/DebugInfo/X86/generate-odr-hash.ll @@ -1,12 +1,10 @@ ; REQUIRES: object-emission -; RUN: llc < %s -o %t -filetype=obj -O0 -generate-type-units -mtriple=x86_64-unknown-linux-gnu +; RUN: llc %s -o %t -filetype=obj -O0 -generate-type-units -mtriple=x86_64-unknown-linux-gnu ; RUN: llvm-dwarfdump %t | FileCheck --check-prefix=CHECK --check-prefix=SINGLE %s -; RUN: llvm-readobj -s -t %t | FileCheck --check-prefix=OBJ_COMMON %s -; RUN: llc < %s -split-dwarf=Enable -o %t -filetype=obj -O0 -generate-type-units -mtriple=x86_64-unknown-linux-gnu +; RUN: llc %s -split-dwarf=Enable -o %t -filetype=obj -O0 -generate-type-units -mtriple=x86_64-unknown-linux-gnu ; RUN: llvm-dwarfdump %t | FileCheck --check-prefix=CHECK --check-prefix=FISSION %s -; RUN: llvm-readobj -s -t %t | FileCheck --check-prefix=OBJ_COMMON --check-prefix=OBJ_FISSION %s ; Generated from bar.cpp: @@ -163,20 +161,6 @@ ; CHECK-NEXT: [[FLUFFY]] "echidna::capybara::mongoose::fluffy" ; CHECK-NEXT: [[WALRUS]] "walrus" -; Make sure debug_types are in comdat groups. This could be more rigid to check -; that they're the right comdat groups (each type in a separate comdat group, -; etc) -; OBJ_COMMON: Name: .debug_types ( -; OBJ_COMMON-NOT: } -; OBJ_COMMON: SHF_GROUP - -; Fission type units don't go in comdat groups, since their linker is debug -; aware it's handled using the debug info semantics rather than raw ELF object -; semantics. -; OBJ_FISSION: Name: .debug_types.dwo ( -; OBJ_FISSION-NOT: SHF_GROUP -; OBJ_FISSION: } - %struct.bar = type { i8 } %"class.echidna::capybara::mongoose::fluffy" = type { i32, i32 } %"struct.::walrus" = type { i8 } From 595bdb7e8badc2f71f97c5e3acc4f0ce999a2e97 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 21 May 2014 23:40:18 +0000 Subject: [PATCH 042/906] Group the scheduling functions together. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209339 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetSubtargetInfo.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/llvm/Target/TargetSubtargetInfo.h b/include/llvm/Target/TargetSubtargetInfo.h index 1b2e06acc2b0..e3febfb40f2b 100644 --- a/include/llvm/Target/TargetSubtargetInfo.h +++ b/include/llvm/Target/TargetSubtargetInfo.h @@ -76,6 +76,11 @@ class TargetSubtargetInfo : public MCSubtargetInfo { MachineInstr *end, unsigned NumRegionInstrs) const {} + // \brief Perform target specific adjustments to the latency of a schedule + // dependency. + virtual void adjustSchedDependency(SUnit *def, SUnit *use, + SDep& dep) const { } + // enablePostRAScheduler - If the target can benefit from post-regalloc // scheduling and the specified optimization level meets the requirement // return true to enable post-register-allocation scheduling. In @@ -84,10 +89,6 @@ class TargetSubtargetInfo : public MCSubtargetInfo { virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const; - // adjustSchedDependency - Perform target specific adjustments to - // the latency of a schedule dependency. - virtual void adjustSchedDependency(SUnit *def, SUnit *use, - SDep& dep) const { } /// \brief Enable use of alias analysis during code generation (during MI /// scheduling, DAGCombine, etc.). From 189fe78e2f8be4e1eb166c495788aa4d4c87517c Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 21 May 2014 23:40:26 +0000 Subject: [PATCH 043/906] Make early if conversion dependent upon the subtarget and add a subtarget hook to enable. Unconditionally add to the pass pipeline for targets that might want to use it. No functional change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209340 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetSubtargetInfo.h | 3 +++ lib/CodeGen/EarlyIfConversion.cpp | 4 ++++ lib/Target/ARM64/ARM64Subtarget.cpp | 8 ++++++++ lib/Target/ARM64/ARM64Subtarget.h | 2 ++ lib/Target/ARM64/ARM64TargetMachine.cpp | 7 +------ lib/Target/PowerPC/PPCSubtarget.h | 2 ++ lib/Target/PowerPC/PPCTargetMachine.cpp | 8 ++------ lib/Target/X86/X86Subtarget.cpp | 12 ++++++++++++ lib/Target/X86/X86Subtarget.h | 2 ++ lib/Target/X86/X86TargetMachine.cpp | 13 ++----------- 10 files changed, 38 insertions(+), 23 deletions(-) diff --git a/include/llvm/Target/TargetSubtargetInfo.h b/include/llvm/Target/TargetSubtargetInfo.h index e3febfb40f2b..c0c342b22ec6 100644 --- a/include/llvm/Target/TargetSubtargetInfo.h +++ b/include/llvm/Target/TargetSubtargetInfo.h @@ -94,6 +94,9 @@ class TargetSubtargetInfo : public MCSubtargetInfo { /// scheduling, DAGCombine, etc.). virtual bool useAA() const; + /// \brief Enable the use of the early if conversion pass. + virtual bool enableEarlyIfConversion() const { return false; } + /// \brief Reset the features for the subtarget. virtual void resetSubtargetFeatures(const MachineFunction *MF) { } }; diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index e3190241cd6e..b621e101773a 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -776,6 +776,10 @@ bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" << "********** Function: " << MF.getName() << '\n'); + // Only run if conversion if the target wants it. + if (!MF.getTarget().getSubtarget().enableEarlyIfConversion()) + return true; + TII = MF.getTarget().getInstrInfo(); TRI = MF.getTarget().getRegisterInfo(); SchedModel = diff --git a/lib/Target/ARM64/ARM64Subtarget.cpp b/lib/Target/ARM64/ARM64Subtarget.cpp index 528cfc97cbf7..d81e21b51df6 100644 --- a/lib/Target/ARM64/ARM64Subtarget.cpp +++ b/lib/Target/ARM64/ARM64Subtarget.cpp @@ -26,6 +26,10 @@ using namespace llvm; #define GET_SUBTARGETINFO_TARGET_DESC #include "ARM64GenSubtargetInfo.inc" +static cl::opt +EnableEarlyIfConvert("arm64-early-ifcvt", cl::desc("Enable the early if " + "converter pass"), cl::init(true), cl::Hidden); + ARM64Subtarget::ARM64Subtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool LittleEndian) : ARM64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), @@ -105,3 +109,7 @@ void ARM64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, Policy.OnlyTopDown = false; Policy.OnlyBottomUp = false; } + +bool ARM64Subtarget::enableEarlyIfConversion() const override { + return EnableEarlyIfConvert; +} diff --git a/lib/Target/ARM64/ARM64Subtarget.h b/lib/Target/ARM64/ARM64Subtarget.h index 88b9c2e7aa3b..9cea3c387d63 100644 --- a/lib/Target/ARM64/ARM64Subtarget.h +++ b/lib/Target/ARM64/ARM64Subtarget.h @@ -102,6 +102,8 @@ class ARM64Subtarget : public ARM64GenSubtargetInfo { void overrideSchedPolicy(MachineSchedPolicy &Policy, MachineInstr *begin, MachineInstr *end, unsigned NumRegionInstrs) const override; + + bool enableEarlyIfConversion() const override; }; } // End llvm namespace diff --git a/lib/Target/ARM64/ARM64TargetMachine.cpp b/lib/Target/ARM64/ARM64TargetMachine.cpp index f5c187ceb278..5a8c5c6015d0 100644 --- a/lib/Target/ARM64/ARM64TargetMachine.cpp +++ b/lib/Target/ARM64/ARM64TargetMachine.cpp @@ -24,10 +24,6 @@ static cl::opt EnableCCMP("arm64-ccmp", cl::desc("Enable the CCMP formation pass"), cl::init(true), cl::Hidden); -static cl::opt -EnableEarlyIfConvert("arm64-early-ifcvt", cl::desc("Enable the early if " - "converter pass"), cl::init(true), cl::Hidden); - static cl::opt EnableStPairSuppress("arm64-stp-suppress", cl::desc("Suppress STP for ARM64"), cl::init(true), cl::Hidden); @@ -169,8 +165,7 @@ bool ARM64PassConfig::addInstSelector() { bool ARM64PassConfig::addILPOpts() { if (EnableCCMP) addPass(createARM64ConditionalCompares()); - if (EnableEarlyIfConvert) - addPass(&EarlyIfConverterID); + addPass(&EarlyIfConverterID); if (EnableStPairSuppress) addPass(createARM64StorePairSuppressPass()); return true; diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 76f4a318e82d..ee43fd5f807f 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -205,6 +205,8 @@ class PPCSubtarget : public PPCGenSubtargetInfo { TargetSubtargetInfo::AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const override; + bool enableEarlyIfConversion() const override { return hasISEL(); } + // Scheduling customization. bool enableMachineScheduler() const override; void overrideSchedPolicy(MachineSchedPolicy &Policy, diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index fdfb8c9bfc9d..e9c7797b504f 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -148,12 +148,8 @@ bool PPCPassConfig::addPreISel() { } bool PPCPassConfig::addILPOpts() { - if (getPPCSubtarget().hasISEL()) { - addPass(&EarlyIfConverterID); - return true; - } - - return false; + addPass(&EarlyIfConverterID); + return true; } bool PPCPassConfig::addInstSelector() { diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index b94bd712ff72..a6ad386b0a94 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -35,6 +35,13 @@ using namespace llvm; #define GET_SUBTARGETINFO_CTOR #include "X86GenSubtargetInfo.inc" +// Temporary option to control early if-conversion for x86 while adding machine +// models. +static cl::opt +X86EarlyIfConv("x86-early-ifcvt", cl::Hidden, + cl::desc("Enable early if-conversion on X86")); + + /// ClassifyBlockAddressReference - Classify a blockaddress reference for the /// current subtarget according to how we should reference it in a non-pcrel /// context. @@ -310,3 +317,8 @@ X86Subtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel, CriticalPathRCs.clear(); return PostRAScheduler && OptLevel >= CodeGenOpt::Default; } + +bool +X86Subtarget::enableEarlyIfConversion() const override { + return hasCMOV() && X86EarlyIfConv; +} diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 8ec680efff83..703559a47648 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -430,6 +430,8 @@ class X86Subtarget final : public X86GenSubtargetInfo { bool postRAScheduler() const { return PostRAScheduler; } + bool enableEarlyIfConversion() const override; + /// getInstrItins = Return the instruction itineraries based on the /// subtarget selection. const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index d0449f42320c..dae6d4b2e15c 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -126,12 +126,6 @@ UseVZeroUpper("x86-use-vzeroupper", cl::Hidden, cl::desc("Minimize AVX to SSE transition penalty"), cl::init(true)); -// Temporary option to control early if-conversion for x86 while adding machine -// models. -static cl::opt -X86EarlyIfConv("x86-early-ifcvt", cl::Hidden, - cl::desc("Enable early if-conversion on X86")); - //===----------------------------------------------------------------------===// // X86 Analysis Pass Setup //===----------------------------------------------------------------------===// @@ -192,11 +186,8 @@ bool X86PassConfig::addInstSelector() { } bool X86PassConfig::addILPOpts() { - if (X86EarlyIfConv && getX86Subtarget().hasCMov()) { - addPass(&EarlyIfConverterID); - return true; - } - return false; + addPass(&EarlyIfConverterID); + return true; } bool X86PassConfig::addPreRegAlloc() { From a2f371b8eb440d0fac62b879e82e442938413bbd Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 21 May 2014 23:51:57 +0000 Subject: [PATCH 044/906] Fix compilation issues. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209342 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/EarlyIfConversion.cpp | 4 +++- lib/Target/ARM64/ARM64Subtarget.cpp | 2 +- lib/Target/X86/X86Subtarget.cpp | 5 +++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index b621e101773a..2d47be71bea0 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -777,7 +777,9 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" << "********** Function: " << MF.getName() << '\n'); // Only run if conversion if the target wants it. - if (!MF.getTarget().getSubtarget().enableEarlyIfConversion()) + if (!MF.getTarget() + .getSubtarget() + .enableEarlyIfConversion()) return true; TII = MF.getTarget().getInstrInfo(); diff --git a/lib/Target/ARM64/ARM64Subtarget.cpp b/lib/Target/ARM64/ARM64Subtarget.cpp index d81e21b51df6..624e47483ffa 100644 --- a/lib/Target/ARM64/ARM64Subtarget.cpp +++ b/lib/Target/ARM64/ARM64Subtarget.cpp @@ -110,6 +110,6 @@ void ARM64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, Policy.OnlyBottomUp = false; } -bool ARM64Subtarget::enableEarlyIfConversion() const override { +bool ARM64Subtarget::enableEarlyIfConversion() const { return EnableEarlyIfConvert; } diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index a6ad386b0a94..989e0d61b6fc 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -16,6 +16,7 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Host.h" @@ -319,6 +320,6 @@ X86Subtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel, } bool -X86Subtarget::enableEarlyIfConversion() const override { - return hasCMOV() && X86EarlyIfConv; +X86Subtarget::enableEarlyIfConversion() const { + return hasCMov() && X86EarlyIfConv; } From 107db21c2b8e16dc8c18c5c5e13056b6a9deedcf Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Thu, 22 May 2014 00:02:52 +0000 Subject: [PATCH 045/906] Similar to bitcast, treat addrspacecast as a foldable operand. Added a test sink-addrspacecast.ll to verify this change. Patch by Jingyue Wu. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209343 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CodeGenPrepare.cpp | 2 + .../CodeGenPrepare/X86/sink-addrspacecast.ll | 37 +++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index dc5f67b8c0b9..6aa60c66b7e0 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -1640,6 +1640,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, static bool MightBeFoldableInst(Instruction *I) { switch (I->getOpcode()) { case Instruction::BitCast: + case Instruction::AddrSpaceCast: // Don't touch identity bitcasts. if (I->getType() == I->getOperand(0)->getType()) return false; @@ -1994,6 +1995,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, return MatchAddr(AddrInst->getOperand(0), Depth); return false; case Instruction::BitCast: + case Instruction::AddrSpaceCast: // BitCast is always a noop, and we can handle it as long as it is // int->int or pointer->pointer (we don't want int<->fp or something). if ((AddrInst->getOperand(0)->getType()->isPointerTy() || diff --git a/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll b/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll new file mode 100644 index 000000000000..a985c36707a9 --- /dev/null +++ b/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll @@ -0,0 +1,37 @@ +; RUN: opt -S -codegenprepare < %s | FileCheck %s + +target datalayout = +"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: @load_cast_gep +; CHECK: add i64 %sunkaddr, 40 +define void @load_cast_gep(i1 %cond, i64* %base) { +entry: + %addr = getelementptr inbounds i64* %base, i64 5 + %casted = addrspacecast i64* %addr to i32 addrspace(1)* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %v = load i32 addrspace(1)* %casted, align 4 + br label %fallthrough + +fallthrough: + ret void +} + +; CHECK-LABEL: @store_gep_cast +; CHECK: add i64 %sunkaddr, 20 +define void @store_gep_cast(i1 %cond, i64* %base) { +entry: + %casted = addrspacecast i64* %base to i32 addrspace(1)* + %addr = getelementptr inbounds i32 addrspace(1)* %casted, i64 5 + br i1 %cond, label %if.then, label %fallthrough + +if.then: + store i32 0, i32 addrspace(1)* %addr, align 4 + br label %fallthrough + +fallthrough: + ret void +} From facca6e3f3a3b2988cb872834937cb204eb2a686 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Thu, 22 May 2014 00:37:03 +0000 Subject: [PATCH 046/906] Fix a bug in SCEV's backedge taken count computation from my prior fix in Jan. This has to do with the trip count computation for loops with multiple exits, which is quite subtle. Most passes just ask for a single trip count number, so we must be conservative assuming any exit could be taken. Normally, we rely on the "exact" trip count, which was correctly given as "unknown". However, SCEV also gives a "max" back-edge taken count. The loops max BE taken count is conservatively a maximum over the max of each exit's non-exiting iterations count. Note that some exit tests can be skipped so the max loop back-edge taken count can actually exceed the max non-exiting iterations for some exits. However, when we know the loop *latch* cannot be skipped, we can directly use its max taken count disregarding other exits. I previously took the minimum here without checking whether the other exit could be skipped. The correct, and simpler thing to do here is just to directly use the loop latch's max non-exiting iterations as the loops max back-edge count. In the problematic test case, the first loop exit had a max of zero non-exiting iterations, but could be skipped. The loop latch was known not to be skipped but had max of one non-exiting iteration. We incorrectly claimed the loop back-edge could be taken zero times, when it is actually taken one time. Fixes Loop %for.body.i: Unpredictable backedge-taken count. Loop %for.body.i: max backedge-taken count is 1. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209358 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 14 +++++----- .../ScalarEvolution/max-trip-count.ll | 26 +++++++++++++++++++ 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index cef9966f9e8c..dad8e07dadb1 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -4413,7 +4413,7 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { const SCEV *MaxBECount = getCouldNotCompute(); bool CouldComputeBECount = true; BasicBlock *Latch = L->getLoopLatch(); // may be NULL. - const SCEV *LatchMaxCount = nullptr; + bool LatchMustExit = false; SmallVector, 4> ExitCounts; for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { ExitLimit EL = ComputeExitLimit(L, ExitingBlocks[i]); @@ -4431,16 +4431,14 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { // skip some loop tests. Taking the max over the exits is sufficiently // conservative. TODO: We could do better taking into consideration // non-latch exits that dominate the latch. - if (EL.MustExit && ExitingBlocks[i] == Latch) - LatchMaxCount = EL.Max; - else + if (EL.MustExit && ExitingBlocks[i] == Latch) { + MaxBECount = EL.Max; + LatchMustExit = true; + } + else if (!LatchMustExit) MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max); } } - // Be more precise in the easy case of a loop latch that must exit. - if (LatchMaxCount) { - MaxBECount = getUMinFromMismatchedTypes(MaxBECount, LatchMaxCount); - } return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount); } diff --git a/test/Analysis/ScalarEvolution/max-trip-count.ll b/test/Analysis/ScalarEvolution/max-trip-count.ll index 0cdbdf57a64c..43a54b4f3033 100644 --- a/test/Analysis/ScalarEvolution/max-trip-count.ll +++ b/test/Analysis/ScalarEvolution/max-trip-count.ll @@ -98,3 +98,29 @@ for.end: ; preds = %for.cond.for.end_cr ; CHECK: Determining loop execution counts for: @test ; CHECK-NEXT: backedge-taken count is ; CHECK-NEXT: max backedge-taken count is -1 + +; PR19799: Indvars miscompile due to an incorrect max backedge taken count from SCEV. +; CHECK-LABEL: @pr19799 +; CHECK: Loop %for.body.i: Unpredictable backedge-taken count. +; CHECK: Loop %for.body.i: max backedge-taken count is 1 +@a = common global i32 0, align 4 + +define i32 @pr19799() { +entry: + store i32 -1, i32* @a, align 4 + br label %for.body.i + +for.body.i: ; preds = %for.cond.i, %entry + %storemerge1.i = phi i32 [ -1, %entry ], [ %add.i.i, %for.cond.i ] + %tobool.i = icmp eq i32 %storemerge1.i, 0 + %add.i.i = add nsw i32 %storemerge1.i, 2 + br i1 %tobool.i, label %bar.exit, label %for.cond.i + +for.cond.i: ; preds = %for.body.i + store i32 %add.i.i, i32* @a, align 4 + %cmp.i = icmp slt i32 %storemerge1.i, 0 + br i1 %cmp.i, label %for.body.i, label %bar.exit + +bar.exit: ; preds = %for.cond.i, %for.body.i + ret i32 0 +} From b1a3b17e7896f8f5a3fa840b0caa0edfa7eae877 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Thu, 22 May 2014 00:48:36 +0000 Subject: [PATCH 047/906] DebugInfo: Simplify dead variable collection slightly. constructSubprogramDIE was already called for every subprogram in every CU when the module was started - there's no need to call it again at module finalization. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209372 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 4a317cf0b561..049e9e28c119 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -827,10 +827,8 @@ void DwarfDebug::collectDeadVariables() { if (Variables.getNumElements() == 0) continue; - // FIXME: See the comment in constructSubprogramDIE about duplicate - // subprogram DIEs. - constructSubprogramDIE(*SPCU, SP); DIE *SPDIE = SPCU->getDIE(SP); + assert(SPDIE); for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) { DIVariable DV(Variables.getElement(vi)); assert(DV.isVariable()); From 14fb9b60697c9c8d5cebe67aff5779f0f8afd217 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 22 May 2014 01:07:18 +0000 Subject: [PATCH 048/906] Sort includes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209373 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM64/ARM64StorePairSuppress.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Target/ARM64/ARM64StorePairSuppress.cpp b/lib/Target/ARM64/ARM64StorePairSuppress.cpp index 5416f11510c3..a9501ed92177 100644 --- a/lib/Target/ARM64/ARM64StorePairSuppress.cpp +++ b/lib/Target/ARM64/ARM64StorePairSuppress.cpp @@ -16,10 +16,10 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineTraceMetrics.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; From 162180cba5e30806f4da6b8442628ac435f030be Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 22 May 2014 01:07:21 +0000 Subject: [PATCH 049/906] Reset the subtarget for DAGToDAG on every iteration of runOnMachineFunction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209374 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM64/ARM64ISelDAGToDAG.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp index 4a1f9717bf73..45a837e69f66 100644 --- a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp +++ b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp @@ -45,7 +45,7 @@ class ARM64DAGToDAGISel : public SelectionDAGISel { public: explicit ARM64DAGToDAGISel(ARM64TargetMachine &tm, CodeGenOpt::Level OptLevel) : SelectionDAGISel(tm, OptLevel), TM(tm), - Subtarget(&TM.getSubtarget()), ForCodeSize(false) {} + Subtarget(nullptr), ForCodeSize(false) {} const char *getPassName() const override { return "ARM64 Instruction Selection"; @@ -57,6 +57,7 @@ class ARM64DAGToDAGISel : public SelectionDAGISel { FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) || FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); + Subtarget = &TM.getSubtarget(); return SelectionDAGISel::runOnMachineFunction(MF); } From 58d9172bb402c1255b501694fd9170df18f10940 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 22 May 2014 01:07:24 +0000 Subject: [PATCH 050/906] Reset the subtarget for DAGToDAG on every iteration of runOnMachineFunction. This required updating the generated functions and TD file accordingly to be pointers rather than const references. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209375 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCFastISel.cpp | 20 ++++----- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 62 +++++++++++++------------- lib/Target/PowerPC/PPCInstrAltivec.td | 2 +- lib/Target/PowerPC/PPCInstrInfo.td | 8 ++-- lib/Target/PowerPC/PPCInstrVSX.td | 2 +- 5 files changed, 47 insertions(+), 47 deletions(-) diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index c0c495fa9aec..ed3cb4d3293d 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -86,7 +86,7 @@ class PPCFastISel final : public FastISel { const TargetMachine &TM; const TargetInstrInfo &TII; const TargetLowering &TLI; - const PPCSubtarget &PPCSubTarget; + const PPCSubtarget *PPCSubTarget; LLVMContext *Context; public: @@ -96,9 +96,7 @@ class PPCFastISel final : public FastISel { TM(FuncInfo.MF->getTarget()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()), - PPCSubTarget( - *((static_cast(&TM))->getSubtargetImpl()) - ), + PPCSubTarget(&TM.getSubtarget()), Context(&FuncInfo.Fn->getContext()) { } // Backend specific FastISel code. @@ -740,7 +738,7 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, return false; MVT SrcVT = SrcEVT.getSimpleVT(); - if (SrcVT == MVT::i1 && PPCSubTarget.useCRBits()) + if (SrcVT == MVT::i1 && PPCSubTarget->useCRBits()) return false; // See if operand 2 is an immediate encodeable in the compare. @@ -901,7 +899,7 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg, if (!IsSigned) { LoadOpc = PPC::LFIWZX; Addr.Offset = 4; - } else if (PPCSubTarget.hasLFIWAX()) { + } else if (PPCSubTarget->hasLFIWAX()) { LoadOpc = PPC::LFIWAX; Addr.Offset = 4; } @@ -942,7 +940,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { // We can only lower an unsigned convert if we have the newer // floating-point conversion operations. - if (!IsSigned && !PPCSubTarget.hasFPCVT()) + if (!IsSigned && !PPCSubTarget->hasFPCVT()) return false; // FIXME: For now we require the newer floating-point conversion operations @@ -950,7 +948,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { // to single-precision float. Otherwise we have to generate a lot of // fiddly code to avoid double rounding. If necessary, the fiddly code // can be found in PPCTargetLowering::LowerINT_TO_FP(). - if (DstVT == MVT::f32 && !PPCSubTarget.hasFPCVT()) + if (DstVT == MVT::f32 && !PPCSubTarget->hasFPCVT()) return false; // Extend the input if necessary. @@ -1065,7 +1063,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { if (IsSigned) Opc = PPC::FCTIWZ; else - Opc = PPCSubTarget.hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ; + Opc = PPCSubTarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ; else Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ; @@ -2002,7 +2000,7 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm, unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) { // If we're using CR bit registers for i1 values, handle that as a special // case first. - if (VT == MVT::i1 && PPCSubTarget.useCRBits()) { + if (VT == MVT::i1 && PPCSubTarget->useCRBits()) { const ConstantInt *CI = cast(C); unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -2176,7 +2174,7 @@ unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) { // If we're using CR bit registers for i1 values, handle that as a special // case first. - if (VT == MVT::i1 && PPCSubTarget.useCRBits()) { + if (VT == MVT::i1 && PPCSubTarget->useCRBits()) { unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg); diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 63dac61f4cdc..f6e075d27193 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -51,23 +51,25 @@ namespace { /// class PPCDAGToDAGISel : public SelectionDAGISel { const PPCTargetMachine &TM; - const PPCTargetLowering &PPCLowering; - const PPCSubtarget &PPCSubTarget; + const PPCTargetLowering *PPCLowering; + const PPCSubtarget *PPCSubTarget; unsigned GlobalBaseReg; public: explicit PPCDAGToDAGISel(PPCTargetMachine &tm) : SelectionDAGISel(tm), TM(tm), - PPCLowering(*TM.getTargetLowering()), - PPCSubTarget(*TM.getSubtargetImpl()) { + PPCLowering(TM.getTargetLowering()), + PPCSubTarget(TM.getSubtargetImpl()) { initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override { // Make sure we re-emit a set of the global base reg if necessary GlobalBaseReg = 0; + PPCLowering = TM.getTargetLowering(); + PPCSubTarget = TM.getSubtargetImpl(); SelectionDAGISel::runOnMachineFunction(MF); - if (!PPCSubTarget.isSVR4ABI()) + if (!PPCSubTarget->isSVR4ABI()) InsertVRSaveCode(MF); return true; @@ -89,7 +91,7 @@ namespace { /// getSmallIPtrImm - Return a target constant of pointer type. inline SDValue getSmallIPtrImm(unsigned Imm) { - return CurDAG->getTargetConstant(Imm, PPCLowering.getPointerTy()); + return CurDAG->getTargetConstant(Imm, PPCLowering->getPointerTy()); } /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s @@ -122,7 +124,7 @@ namespace { /// a base register plus a signed 16-bit displacement [r+imm]. bool SelectAddrImm(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG, false); + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, false); } /// SelectAddrImmOffs - Return true if the operand is valid for a preinc @@ -142,20 +144,20 @@ namespace { /// represented as an indexed [r+r] operation. Returns false if it can /// be represented by [r+imm], which are preferred. bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) { - return PPCLowering.SelectAddressRegReg(N, Base, Index, *CurDAG); + return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG); } /// SelectAddrIdxOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) { - return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG); + return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG); } /// SelectAddrImmX4 - Returns true if the address N can be represented by /// a base register plus a signed 16-bit displacement that is a multiple of 4. /// Suitable for use by STD and friends. bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG, true); + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, true); } // Select an address into a single register. @@ -272,7 +274,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { MachineBasicBlock::iterator MBBI = FirstMBB.begin(); DebugLoc dl; - if (PPCLowering.getPointerTy() == MVT::i32) { + if (PPCLowering->getPointerTy() == MVT::i32) { GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); @@ -283,7 +285,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { } } return CurDAG->getRegister(GlobalBaseReg, - PPCLowering.getPointerTy()).getNode(); + PPCLowering->getPointerTy()).getNode(); } /// isIntS16Immediate - This method tests to see if the node is either a 32-bit @@ -580,7 +582,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, Opc = PPC::FCMPUS; } else { assert(LHS.getValueType() == MVT::f64 && "Unknown vt!"); - Opc = PPCSubTarget.hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD; + Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD; } return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); } @@ -746,7 +748,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy(); bool isPPC64 = (PtrVT == MVT::i64); - if (!PPCSubTarget.useCRBits() && + if (!PPCSubTarget->useCRBits() && isInt32Immediate(N->getOperand(1), Imm)) { // We can codegen setcc op, imm very efficiently compared to a brcond. // Check for those cases here. @@ -828,7 +830,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { if (LHS.getValueType().isVector()) { EVT VecVT = LHS.getValueType(); MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy; - unsigned int VCmpInst = getVCmpInst(VT, CC, PPCSubTarget.hasVSX()); + unsigned int VCmpInst = getVCmpInst(VT, CC, PPCSubTarget->hasVSX()); switch (CC) { case ISD::SETEQ: @@ -839,7 +841,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { case ISD::SETONE: case ISD::SETUNE: { SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0); - return CurDAG->SelectNodeTo(N, PPCSubTarget.hasVSX() ? PPC::XXLNOR : + return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR, VecVT, VCmp, VCmp); } @@ -861,9 +863,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); } else { SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0); - unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget.hasVSX()); + unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget->hasVSX()); SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0); - return CurDAG->SelectNodeTo(N, PPCSubTarget.hasVSX() ? PPC::XXLOR : + return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLOR : PPC::VOR, VecVT, VCmpGT, VCmpEQ); } @@ -872,9 +874,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { case ISD::SETOLE: case ISD::SETULE: { SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0); - unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget.hasVSX()); + unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget->hasVSX()); SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0); - return CurDAG->SelectNodeTo(N, PPCSubTarget.hasVSX() ? PPC::XXLOR : + return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLOR : PPC::VOR, VecVT, VCmpLE, VCmpEQ); } @@ -883,7 +885,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { } } - if (PPCSubTarget.useCRBits()) + if (PPCSubTarget->useCRBits()) return nullptr; bool Inv; @@ -1101,7 +1103,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Base = LD->getBasePtr(); SDValue Ops[] = { Offset, Base, Chain }; return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), - PPCLowering.getPointerTy(), + PPCLowering->getPointerTy(), MVT::Other, Ops); } else { unsigned Opcode; @@ -1136,7 +1138,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Base = LD->getBasePtr(); SDValue Ops[] = { Base, Offset, Chain }; return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), - PPCLowering.getPointerTy(), + PPCLowering->getPointerTy(), MVT::Other, Ops); } } @@ -1267,7 +1269,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { bool isPPC64 = (PtrVT == MVT::i64); // If this is a select of i1 operands, we'll pattern match it. - if (PPCSubTarget.useCRBits() && + if (PPCSubTarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1) break; @@ -1338,14 +1340,14 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops); } case ISD::VSELECT: - if (PPCSubTarget.hasVSX()) { + if (PPCSubTarget->hasVSX()) { SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) }; return CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops); } break; case ISD::VECTOR_SHUFFLE: - if (PPCSubTarget.hasVSX() && (N->getValueType(0) == MVT::v2f64 || + if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 || N->getValueType(0) == MVT::v2i64)) { ShuffleVectorSDNode *SVN = cast(N); @@ -1383,7 +1385,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { break; case PPCISD::BDNZ: case PPCISD::BDZ: { - bool IsPPC64 = PPCSubTarget.isPPC64(); + bool IsPPC64 = PPCSubTarget->isPPC64(); SDValue Ops[] = { N->getOperand(1), N->getOperand(0) }; return CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : @@ -1443,7 +1445,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); } case PPCISD::TOC_ENTRY: { - assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI"); + assert (PPCSubTarget->isPPC64() && "Only supported for 64-bit ABI"); // For medium and large code model, we generate two instructions as // described below. Otherwise we allow SelectCodeCommon to handle this, @@ -1583,7 +1585,7 @@ void PPCDAGToDAGISel::PostprocessISelDAG() { // containing zero. bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) { // If we're not using isel, then this does not matter. - if (!PPCSubTarget.hasISEL()) + if (!PPCSubTarget->hasISEL()) return false; for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); @@ -2045,7 +2047,7 @@ void PPCDAGToDAGISel::PeepholeCROps() { void PPCDAGToDAGISel::PeepholePPC64() { // These optimizations are currently supported only for 64-bit SVR4. - if (PPCSubTarget.isDarwin() || !PPCSubTarget.isPPC64()) + if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64()) return; SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 2fd4a3eeae7d..f3c2eab9746d 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -223,7 +223,7 @@ class VX2_Int_Ty2 xo, string opc, Intrinsic IntID, ValueType OutTy, //===----------------------------------------------------------------------===// // Instruction Definitions. -def HasAltivec : Predicate<"PPCSubTarget.hasAltivec()">; +def HasAltivec : Predicate<"PPCSubTarget->hasAltivec()">; let Predicates = [HasAltivec] in { let isCodeGenOnly = 1 in { diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 1d984aba1faf..e421f8e69e65 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -610,10 +610,10 @@ def iaddroff : ComplexPattern; //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. -def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">; -def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">; -def IsBookE : Predicate<"PPCSubTarget.isBookE()">; -def IsNotBookE : Predicate<"!PPCSubTarget.isBookE()">; +def In32BitMode : Predicate<"!PPCSubTarget->isPPC64()">; +def In64BitMode : Predicate<"PPCSubTarget->isPPC64()">; +def IsBookE : Predicate<"PPCSubTarget->isBookE()">; +def IsNotBookE : Predicate<"!PPCSubTarget->isBookE()">; //===----------------------------------------------------------------------===// // PowerPC Multiclass Definitions. diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 9cc919ebe383..49bcc4876d33 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -39,7 +39,7 @@ multiclass XX3Form_Rcr opcode, bits<7> xo, dag OOL, dag IOL, } } -def HasVSX : Predicate<"PPCSubTarget.hasVSX()">; +def HasVSX : Predicate<"PPCSubTarget->hasVSX()">; let Predicates = [HasVSX] in { let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. let neverHasSideEffects = 1 in { // VSX instructions don't have side effects. From 6ccae2df5bc9ebee44e2ddc02e97af698b5e4f79 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 22 May 2014 01:21:35 +0000 Subject: [PATCH 051/906] Avoid using subtarget features when initializing the pass pipeline on PPC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209376 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrInfo.cpp | 13 ++++++++++++- lib/Target/PowerPC/PPCTargetMachine.cpp | 16 +++++----------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 0c15e2d02320..fd7238401bb0 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1813,9 +1813,14 @@ namespace { public: bool runOnMachineFunction(MachineFunction &MF) override { + TM = static_cast(&MF.getTarget()); + // If we don't have VSX then go ahead and return without doing + // anything. + if (!TM->getSubtargetImpl()->hasVSX()) + return false; + LIS = &getAnalysis(); - TM = static_cast(&MF.getTarget()); TII = TM->getInstrInfo(); bool Changed = false; @@ -1966,6 +1971,9 @@ namespace { public: bool runOnMachineFunction(MachineFunction &MF) override { TM = static_cast(&MF.getTarget()); + // If we don't have VSX on the subtarget, don't do anything. + if (!TM->getSubtargetImpl()->hasVSX()) + return false; TII = TM->getInstrInfo(); bool Changed = false; @@ -2040,6 +2048,9 @@ namespace { public: bool runOnMachineFunction(MachineFunction &MF) override { TM = static_cast(&MF.getTarget()); + // If we don't have VSX don't bother doing anything here. + if (!TM->getSubtargetImpl()->hasVSX()) + return false; TII = TM->getInstrInfo(); bool Changed = false; diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index e9c7797b504f..2323addd21af 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -161,25 +161,19 @@ bool PPCPassConfig::addInstSelector() { addPass(createPPCCTRLoopsVerify()); #endif - if (getPPCSubtarget().hasVSX()) - addPass(createPPCVSXCopyPass()); - + addPass(createPPCVSXCopyPass()); return false; } bool PPCPassConfig::addPreRegAlloc() { - if (getPPCSubtarget().hasVSX()) { - initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry()); - insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID, - &PPCVSXFMAMutateID); - } - + initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry()); + insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID, + &PPCVSXFMAMutateID); return false; } bool PPCPassConfig::addPreSched2() { - if (getPPCSubtarget().hasVSX()) - addPass(createPPCVSXCopyCleanupPass()); + addPass(createPPCVSXCopyCleanupPass()); if (getOptLevel() != CodeGenOpt::None) addPass(&IfConverterID); From 4551b0a80012bf55e0d045a8f731de8e047fd315 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 22 May 2014 01:21:44 +0000 Subject: [PATCH 052/906] Fix typo. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209377 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMTargetTransformInfo.cpp | 2 +- lib/Target/ARM64/ARM64TargetTransformInfo.cpp | 2 +- lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 2 +- lib/Target/X86/X86TargetTransformInfo.cpp | 2 +- lib/Target/XCore/XCoreTargetTransformInfo.cpp | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index ebb25f49fa44..57df7da7f310 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -25,7 +25,7 @@ using namespace llvm; #define DEBUG_TYPE "armtti" // Declare the pass initialization routine locally as target-specific passes -// don't havve a target-wide initialization entry point, and so we rely on the +// don't have a target-wide initialization entry point, and so we rely on the // pass constructor initialization. namespace llvm { void initializeARMTTIPass(PassRegistry &); diff --git a/lib/Target/ARM64/ARM64TargetTransformInfo.cpp b/lib/Target/ARM64/ARM64TargetTransformInfo.cpp index ac7142f3febd..cc4cdff62b5b 100644 --- a/lib/Target/ARM64/ARM64TargetTransformInfo.cpp +++ b/lib/Target/ARM64/ARM64TargetTransformInfo.cpp @@ -27,7 +27,7 @@ using namespace llvm; #define DEBUG_TYPE "arm64tti" // Declare the pass initialization routine locally as target-specific passes -// don't havve a target-wide initialization entry point, and so we rely on the +// don't have a target-wide initialization entry point, and so we rely on the // pass constructor initialization. namespace llvm { void initializeARM64TTIPass(PassRegistry &); diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 2cc1dfc02cc6..007901b23e0c 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -29,7 +29,7 @@ static cl::opt DisablePPCConstHoist("disable-ppc-constant-hoisting", cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden); // Declare the pass initialization routine locally as target-specific passes -// don't havve a target-wide initialization entry point, and so we rely on the +// don't have a target-wide initialization entry point, and so we rely on the // pass constructor initialization. namespace llvm { void initializePPCTTIPass(PassRegistry &); diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 69f34a16b6f7..91b9d40f8ef7 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -26,7 +26,7 @@ using namespace llvm; #define DEBUG_TYPE "x86tti" // Declare the pass initialization routine locally as target-specific passes -// don't havve a target-wide initialization entry point, and so we rely on the +// don't have a target-wide initialization entry point, and so we rely on the // pass constructor initialization. namespace llvm { void initializeX86TTIPass(PassRegistry &); diff --git a/lib/Target/XCore/XCoreTargetTransformInfo.cpp b/lib/Target/XCore/XCoreTargetTransformInfo.cpp index 079be1fc0abc..80d193d1c26e 100644 --- a/lib/Target/XCore/XCoreTargetTransformInfo.cpp +++ b/lib/Target/XCore/XCoreTargetTransformInfo.cpp @@ -24,7 +24,7 @@ using namespace llvm; #define DEBUG_TYPE "xcoretti" // Declare the pass initialization routine locally as target-specific passes -// don't havve a target-wide initialization entry point, and so we rely on the +// don't have a target-wide initialization entry point, and so we rely on the // pass constructor initialization. namespace llvm { void initializeXCoreTTIPass(PassRegistry &); From 42854e57a9006057baefcabd0e29c4bb29a5d212 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 22 May 2014 01:45:57 +0000 Subject: [PATCH 053/906] Rename createGlobalBaseRegPass -> createX86GlobalBaseRegPass to make it obvious that it's a target specific pass. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209380 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86.h | 4 ++-- lib/Target/X86/X86InstrInfo.cpp | 2 +- lib/Target/X86/X86TargetMachine.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 18e6845efec0..64e8ea834f47 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -30,9 +30,9 @@ class X86TargetMachine; FunctionPass *createX86ISelDag(X86TargetMachine &TM, CodeGenOpt::Level OptLevel); -/// createGlobalBaseRegPass - This pass initializes a global base +/// createX86GlobalBaseRegPass - This pass initializes a global base /// register for PIC on x86-32. -FunctionPass* createGlobalBaseRegPass(); +FunctionPass* createX86GlobalBaseRegPass(); /// createCleanupLocalDynamicTLSPass() - This pass combines multiple accesses /// to local-dynamic TLS variables so that the TLS base address for the module diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index efb5c70c8108..8edce9fd4626 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -5451,7 +5451,7 @@ namespace { char CGBR::ID = 0; FunctionPass* -llvm::createGlobalBaseRegPass() { return new CGBR(); } +llvm::createX86GlobalBaseRegPass() { return new CGBR(); } namespace { struct LDTLSCleanup : public MachineFunctionPass { diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index dae6d4b2e15c..1970ffa0bd15 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -180,7 +180,7 @@ bool X86PassConfig::addInstSelector() { // For 32-bit, prepend instructions to set the "global base reg" for PIC. if (!getX86Subtarget().is64Bit()) - addPass(createGlobalBaseRegPass()); + addPass(createX86GlobalBaseRegPass()); return false; } From 1e264de20544d5a5ba5d0b45331730c213456192 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 22 May 2014 01:45:59 +0000 Subject: [PATCH 054/906] Remove extra local variable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209381 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FixupLEAs.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp index 576f39d3c3a3..1e7712c2df86 100644 --- a/lib/Target/X86/X86FixupLEAs.cpp +++ b/lib/Target/X86/X86FixupLEAs.cpp @@ -149,8 +149,7 @@ FunctionPass *llvm::createX86FixupLEAs() { } bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) { - MF = &Func; - TM = &MF->getTarget(); + TM = &Func.getTarget(); TII = static_cast(TM->getInstrInfo()); DEBUG(dbgs() << "Start X86FixupLEAs\n";); From 26bbeece29b85cae68657aaa2af2e33d5ca67182 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 22 May 2014 01:46:02 +0000 Subject: [PATCH 055/906] Avoid using subtarget features when adding X86 specific passes to the pass pipeline. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209382 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FixupLEAs.cpp | 4 ++++ lib/Target/X86/X86InstrInfo.cpp | 6 ++++-- lib/Target/X86/X86PadShortFunction.cpp | 4 ++++ lib/Target/X86/X86TargetMachine.cpp | 14 +++----------- lib/Target/X86/X86VZeroUpper.cpp | 3 ++- 5 files changed, 17 insertions(+), 14 deletions(-) diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp index 1e7712c2df86..6c5b86f61969 100644 --- a/lib/Target/X86/X86FixupLEAs.cpp +++ b/lib/Target/X86/X86FixupLEAs.cpp @@ -150,6 +150,10 @@ FunctionPass *llvm::createX86FixupLEAs() { bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) { TM = &Func.getTarget(); + const X86Subtarget &ST = TM->getSubtarget(); + if (!ST.LEAusesAG() && !ST.slowLEA()) + return false; + TII = static_cast(TM->getInstrInfo()); DEBUG(dbgs() << "Start X86FixupLEAs\n";); diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 8edce9fd4626..6993577d19c0 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -5395,8 +5395,10 @@ namespace { const X86TargetMachine *TM = static_cast(&MF.getTarget()); - assert(!TM->getSubtarget().is64Bit() && - "X86-64 PIC uses RIP relative addressing"); + // Don't do anything if this is 64-bit as 64-bit PIC + // uses RIP relative addressing. + if (TM->getSubtarget().is64Bit()) + return false; // Only emit a global base reg in PIC mode. if (TM->getRelocationModel() != Reloc::PIC_) diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp index 84521ccee481..6639875d07e3 100644 --- a/lib/Target/X86/X86PadShortFunction.cpp +++ b/lib/Target/X86/X86PadShortFunction.cpp @@ -17,6 +17,7 @@ #include "X86.h" #include "X86InstrInfo.h" +#include "X86Subtarget.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -101,6 +102,9 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) { } TM = &MF.getTarget(); + if (!TM->getSubtarget().padShortFunctions()) + return false; + TII = TM->getInstrInfo(); // Search through basic blocks and mark the ones that have early returns diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 1970ffa0bd15..93760efe666d 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -178,9 +178,7 @@ bool X86PassConfig::addInstSelector() { if (getX86Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None) addPass(createCleanupLocalDynamicTLSPass()); - // For 32-bit, prepend instructions to set the "global base reg" for PIC. - if (!getX86Subtarget().is64Bit()) - addPass(createX86GlobalBaseRegPass()); + addPass(createX86GlobalBaseRegPass()); return false; } @@ -206,19 +204,13 @@ bool X86PassConfig::addPreEmitPass() { ShouldPrint = true; } - if (getX86Subtarget().hasAVX() && UseVZeroUpper) { + if (UseVZeroUpper) { addPass(createX86IssueVZeroUpperPass()); ShouldPrint = true; } - if (getOptLevel() != CodeGenOpt::None && - getX86Subtarget().padShortFunctions()) { + if (getOptLevel() != CodeGenOpt::None) { addPass(createX86PadShortFunctions()); - ShouldPrint = true; - } - if (getOptLevel() != CodeGenOpt::None && - (getX86Subtarget().LEAusesAG() || - getX86Subtarget().slowLEA())){ addPass(createX86FixupLEAs()); ShouldPrint = true; } diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index 945ea3e88179..0bb5f990cae7 100644 --- a/lib/Target/X86/X86VZeroUpper.cpp +++ b/lib/Target/X86/X86VZeroUpper.cpp @@ -247,7 +247,8 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) { /// runOnMachineFunction - Loop over all of the basic blocks, inserting /// vzero upper instructions before function calls. bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { - if (MF.getTarget().getSubtarget().hasAVX512()) + const X86Subtarget &ST = MF.getTarget().getSubtarget(); + if (!ST.hasAVX() || ST.hasAVX512()) return false; TII = MF.getTarget().getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); From c5f6f15fe50ab2ead490f21671c2c407b1c41dcd Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 22 May 2014 01:53:26 +0000 Subject: [PATCH 056/906] Override runOnMachineFunction for X86ISelDAGToDAG so that we can reset the subtarget on each function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209384 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 03c9620db4a2..74386d33990d 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -162,6 +162,13 @@ namespace { return "X86 DAG->DAG Instruction Selection"; } + bool runOnMachineFunction(MachineFunction &MF) override { + // Reset the subtarget each time through. + Subtarget = &TM.getSubtarget(); + SelectionDAGISel::runOnMachineFunction(MF); + return true; + } + void EmitFunctionEntryCode() override; bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override; From 424bbbbbbcadfa402b2e103e8fdec48f99af7fae Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 22 May 2014 02:00:27 +0000 Subject: [PATCH 057/906] Override runOnMachineFunction for ARMISelDAGToDAG so that we can reset the subtarget on each function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209386 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelDAGToDAG.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 8577c8af47d5..08d598d7c5a6 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -73,6 +73,13 @@ class ARMDAGToDAGISel : public SelectionDAGISel { Subtarget(&TM.getSubtarget()) { } + bool runOnMachineFunction(MachineFunction &MF) override { + // Reset the subtarget each time through. + Subtarget = &TM.getSubtarget(); + SelectionDAGISel::runOnMachineFunction(MF); + return true; + } + const char *getPassName() const override { return "ARM Instruction Selection"; } From f4f930c7955fa1c58197dd89d4c6cbb7a9ed45ba Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Thu, 22 May 2014 02:18:10 +0000 Subject: [PATCH 058/906] MC: formalise some assertions into proper errors Now that clang can be used as an assembler via the IAS, invalid assembler inputs would cause the assertions to trigger. Although we cannot recover from the errors here, nor provide caret diagnostics, attempt to handle them slightly more gracefully by reporting a fatal error. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209387 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/MC/MCWinCOFFStreamer.h | 3 +++ lib/MC/WinCOFFStreamer.cpp | 31 +++++++++++++++++++++-------- test/MC/COFF/invalid-def.s | 6 ++++++ test/MC/COFF/invalid-endef.s | 5 +++++ test/MC/COFF/invalid-scl-range.s | 7 +++++++ test/MC/COFF/invalid-scl.s | 5 +++++ test/MC/COFF/invalid-type-range.s | 7 +++++++ test/MC/COFF/invalid-type.s | 5 +++++ 8 files changed, 61 insertions(+), 8 deletions(-) create mode 100644 test/MC/COFF/invalid-def.s create mode 100644 test/MC/COFF/invalid-endef.s create mode 100644 test/MC/COFF/invalid-scl-range.s create mode 100644 test/MC/COFF/invalid-scl.s create mode 100644 test/MC/COFF/invalid-type-range.s create mode 100644 test/MC/COFF/invalid-type.s diff --git a/include/llvm/MC/MCWinCOFFStreamer.h b/include/llvm/MC/MCWinCOFFStreamer.h index b0a27cdbd75b..34e39bb0a636 100644 --- a/include/llvm/MC/MCWinCOFFStreamer.h +++ b/include/llvm/MC/MCWinCOFFStreamer.h @@ -65,6 +65,9 @@ class MCWinCOFFStreamer : public MCObjectStreamer { protected: const MCSymbol *CurSymbol; void EmitInstToData(const MCInst &Inst, const MCSubtargetInfo &STI) override; + +private: + LLVM_ATTRIBUTE_NORETURN void FatalError(const Twine &Msg) const; }; } diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp index 40b8dd944bd9..e6df4651a536 100644 --- a/lib/MC/WinCOFFStreamer.cpp +++ b/lib/MC/WinCOFFStreamer.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmLayout.h" @@ -125,30 +126,39 @@ void MCWinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *Symbol) { assert((!Symbol->isInSection() || Symbol->getSection().getVariant() == MCSection::SV_COFF) && "Got non-COFF section in the COFF backend!"); - assert(!CurSymbol && "starting new symbol definition in a symbol definition"); + + if (CurSymbol) + FatalError("starting a new symbol definition without completing the " + "previous one"); CurSymbol = Symbol; } void MCWinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) { - assert(CurSymbol && "StorageClass specified outside of symbol definition"); - assert((StorageClass & ~0xFF) == 0 && - "StorageClass must only have data in the first byte!"); + if (!CurSymbol) + FatalError("storage class specified outside of symbol definition"); + + if (StorageClass & ~0xff) + FatalError(Twine("storage class value '") + itostr(StorageClass) + + "' out of range"); MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*CurSymbol); SD.modifyFlags(StorageClass << COFF::SF_ClassShift, COFF::SF_ClassMask); } void MCWinCOFFStreamer::EmitCOFFSymbolType(int Type) { - assert(CurSymbol && "SymbolType specified outside of a symbol definition"); - assert((Type & ~0xFFFF) == 0 && - "Type must only have data in the first 2 bytes"); + if (!CurSymbol) + FatalError("symbol type specified outside of a symbol definition"); + + if (Type & ~0xffff) + FatalError(Twine("type value '") + itostr(Type) + "' out of range"); MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*CurSymbol); SD.modifyFlags(Type << COFF::SF_TypeShift, COFF::SF_TypeMask); } void MCWinCOFFStreamer::EndCOFFSymbolDef() { - assert(CurSymbol && "ending symbol definition without beginning one"); + if (!CurSymbol) + FatalError("ending symbol definition without starting one"); CurSymbol = nullptr; } @@ -239,5 +249,10 @@ void MCWinCOFFStreamer::EmitWin64EHHandlerData() { void MCWinCOFFStreamer::FinishImpl() { MCObjectStreamer::FinishImpl(); } + +LLVM_ATTRIBUTE_NORETURN +void MCWinCOFFStreamer::FatalError(const Twine &Msg) const { + getContext().FatalError(SMLoc(), Msg); +} } diff --git a/test/MC/COFF/invalid-def.s b/test/MC/COFF/invalid-def.s new file mode 100644 index 000000000000..bfa1a54cbd75 --- /dev/null +++ b/test/MC/COFF/invalid-def.s @@ -0,0 +1,6 @@ +# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s +# REQUIRES: asserts + + .def first + .def second + diff --git a/test/MC/COFF/invalid-endef.s b/test/MC/COFF/invalid-endef.s new file mode 100644 index 000000000000..543685a66c6b --- /dev/null +++ b/test/MC/COFF/invalid-endef.s @@ -0,0 +1,5 @@ +# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s +# REQUIRES: asserts + + .endef + diff --git a/test/MC/COFF/invalid-scl-range.s b/test/MC/COFF/invalid-scl-range.s new file mode 100644 index 000000000000..ec0c2bb19252 --- /dev/null +++ b/test/MC/COFF/invalid-scl-range.s @@ -0,0 +1,7 @@ +# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s +# REQUIRES: asserts + + .def storage_class_range + .scl 1337 + .endef + diff --git a/test/MC/COFF/invalid-scl.s b/test/MC/COFF/invalid-scl.s new file mode 100644 index 000000000000..0d62497e96d9 --- /dev/null +++ b/test/MC/COFF/invalid-scl.s @@ -0,0 +1,5 @@ +# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s +# REQUIRES: asserts + + .scl 1337 + diff --git a/test/MC/COFF/invalid-type-range.s b/test/MC/COFF/invalid-type-range.s new file mode 100644 index 000000000000..9397cc50c35b --- /dev/null +++ b/test/MC/COFF/invalid-type-range.s @@ -0,0 +1,7 @@ +# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s +# REQUIRES: asserts + + .def invalid_type_range + .type 65536 + .endef + diff --git a/test/MC/COFF/invalid-type.s b/test/MC/COFF/invalid-type.s new file mode 100644 index 000000000000..a5c61f4aba8b --- /dev/null +++ b/test/MC/COFF/invalid-type.s @@ -0,0 +1,5 @@ +# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s +# REQUIRES: asserts + + .type 65536 + From bce7d05ba9bf159da47af70f360c8ff7205c91a6 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 22 May 2014 03:20:30 +0000 Subject: [PATCH 059/906] R600/SI: Match fp_to_uint / uint_to_fp for f64 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209388 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 9 +++++++-- test/CodeGen/R600/fp_to_uint.f64.ll | 9 +++++++++ test/CodeGen/R600/uint_to_fp.f64.ll | 9 +++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/R600/fp_to_uint.f64.ll create mode 100644 test/CodeGen/R600/uint_to_fp.f64.ll diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index b216762564f4..76906f86929e 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -977,8 +977,13 @@ defm V_CVT_F64_F32 : VOP1_64_32 <0x00000010, "V_CVT_F64_F32", //defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", []>; //defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", []>; //defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3", []>; -//defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>; -//defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>; +defm V_CVT_U32_F64 : VOP1_32_64 <0x00000015, "V_CVT_U32_F64", + [(set i32:$dst, (fp_to_uint f64:$src0))] +>; +defm V_CVT_F64_U32 : VOP1_64_32 <0x00000016, "V_CVT_F64_U32", + [(set f64:$dst, (uint_to_fp i32:$src0))] +>; + defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32", [(set f32:$dst, (AMDGPUfract f32:$src0))] >; diff --git a/test/CodeGen/R600/fp_to_uint.f64.ll b/test/CodeGen/R600/fp_to_uint.f64.ll new file mode 100644 index 000000000000..bf607cef0884 --- /dev/null +++ b/test/CodeGen/R600/fp_to_uint.f64.ll @@ -0,0 +1,9 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +; SI-LABEL: @fp_to_uint_i32_f64 +; SI: V_CVT_U32_F64_e32 +define void @fp_to_uint_i32_f64(i32 addrspace(1)* %out, double %in) { + %cast = fptoui double %in to i32 + store i32 %cast, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/uint_to_fp.f64.ll b/test/CodeGen/R600/uint_to_fp.f64.ll new file mode 100644 index 000000000000..75150c229130 --- /dev/null +++ b/test/CodeGen/R600/uint_to_fp.f64.ll @@ -0,0 +1,9 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +; SI-LABEL: @uint_to_fp_f64_i32 +; SI: V_CVT_F64_U32_e32 +define void @uint_to_fp_f64_i32(double addrspace(1)* %out, i32 %in) { + %cast = uitofp i32 %in to double + store double %cast, double addrspace(1)* %out, align 8 + ret void +} From 71ce2118bb80aab2bf4503477b0fdb90401bca98 Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Thu, 22 May 2014 04:46:46 +0000 Subject: [PATCH 060/906] ARM: introduce llvm.arm.undefined intrinsic This intrinsic permits the emission of platform specific undefined sequences. ARM has reserved the 0xde opcode which takes a single integer parameter (ignored by the CPU). This permits the operating system to implement custom behaviour on this trap. The llvm.arm.undefined intrinsic is meant to provide a means for generating the target specific behaviour from the frontend. This is particularly useful for Windows on ARM which has made use of a series of these special opcodes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209390 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsARM.td | 5 +++++ lib/Target/ARM/ARMInstrInfo.td | 2 +- lib/Target/ARM/ARMInstrThumb.td | 4 ++-- lib/Target/ARM/ARMInstrThumb2.td | 4 ++-- test/CodeGen/ARM/undefined.ll | 14 ++++++++++++++ 5 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 test/CodeGen/ARM/undefined.ll diff --git a/include/llvm/IR/IntrinsicsARM.td b/include/llvm/IR/IntrinsicsARM.td index b8ba9291a7af..d19d7b82fafe 100644 --- a/include/llvm/IR/IntrinsicsARM.td +++ b/include/llvm/IR/IntrinsicsARM.td @@ -125,6 +125,11 @@ def int_arm_crc32cw : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], def int_arm_hint : Intrinsic<[], [llvm_i32_ty]>; +//===----------------------------------------------------------------------===// +// UND (reserved undefined sequence) + +def int_arm_undefined : Intrinsic<[], [llvm_i32_ty]>; + //===----------------------------------------------------------------------===// // Advanced SIMD (NEON) diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index f642893161c3..718d5da9d05a 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -1969,7 +1969,7 @@ def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt", // A8.8.247 UDF - Undefined (Encoding A1) def UDF : AInoP<(outs), (ins imm0_65535:$imm16), MiscFrm, NoItinerary, - "udf", "\t$imm16", []> { + "udf", "\t$imm16", [(int_arm_undefined imm0_65535:$imm16)]> { bits<16> imm16; let Inst{31-28} = 0b1110; // AL let Inst{27-25} = 0b011; diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index ff3832d98b5e..e17f73af03ec 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -1194,8 +1194,8 @@ def tTST : // A8.6.230 Sched<[WriteALU]>; // A8.8.247 UDF - Undefined (Encoding T1) -def tUDF : TI<(outs), (ins imm0_255:$imm8), IIC_Br, "udf\t$imm8", []>, - Encoding16 { +def tUDF : TI<(outs), (ins imm0_255:$imm8), IIC_Br, "udf\t$imm8", + [(int_arm_undefined imm0_255:$imm8)]>, Encoding16 { bits<8> imm8; let Inst{15-12} = 0b1101; let Inst{11-8} = 0b1110; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 28f528a510e9..c30d6abbb299 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -2408,8 +2408,8 @@ def t2UBFX: T2TwoRegBitFI< } // A8.8.247 UDF - Undefined (Encoding T2) -def t2UDF - : T2XI<(outs), (ins imm0_65535:$imm16), IIC_Br, "udf.w\t$imm16", []> { +def t2UDF : T2XI<(outs), (ins imm0_65535:$imm16), IIC_Br, "udf.w\t$imm16", + [(int_arm_undefined imm0_65535:$imm16)]> { bits<16> imm16; let Inst{31-29} = 0b111; let Inst{28-27} = 0b10; diff --git a/test/CodeGen/ARM/undefined.ll b/test/CodeGen/ARM/undefined.ll new file mode 100644 index 000000000000..86422fb54412 --- /dev/null +++ b/test/CodeGen/ARM/undefined.ll @@ -0,0 +1,14 @@ +; RUN: llc -mtriple armv7-eabi -o - %s | FileCheck %s +; RUN: llc -mtriple thumbv6m-eabi -o - %s | FileCheck %s +; RUN: llc -mtriple thumbv7-eabi -o - %s | FileCheck %s + +declare void @llvm.arm.undefined(i32) nounwind + +define void @undefined_trap() { +entry: + tail call void @llvm.arm.undefined(i32 254) + ret void +} + +; CHECK-LABEL: undefined_trap +; CHECK: udf #254 From 9c553cfe7951be8ed9344d69b80190221dcfb8eb Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 22 May 2014 05:33:03 +0000 Subject: [PATCH 061/906] Remove unused variable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209391 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TargetLoweringBase.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 978432411ef8..2634d71fce36 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -915,7 +915,6 @@ bool TargetLoweringBase::isLegalRC(const TargetRegisterClass *RC) const { MachineBasicBlock* TargetLoweringBase::emitPatchPoint(MachineInstr *MI, MachineBasicBlock *MBB) const { - const TargetMachine &TM = getTargetMachine(); MachineFunction &MF = *MI->getParent()->getParent(); // MI changes inside this loop as we grow operands. From 327edb432a050b9753ffc236cb85ea71966d0d51 Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Thu, 22 May 2014 06:02:59 +0000 Subject: [PATCH 062/906] MC: initialise MCAsmParser variable Properly initialise HadError to false during construction. Detected as use-of-uninitialised variable by MSan! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209393 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index ec2094d6715b..168597fff45a 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -498,8 +498,8 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out, const MCAsmInfo &_MAI) : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM), PlatformParser(nullptr), CurBuffer(0), MacrosEnabledFlag(true), - CppHashLineNumber(0), AssemblerDialect(~0U), IsDarwin(false), - ParsingInlineAsm(false) { + HadError(false), CppHashLineNumber(0), AssemblerDialect(~0U), + IsDarwin(false), ParsingInlineAsm(false) { // Save the old handler. SavedDiagHandler = SrcMgr.getDiagHandler(); SavedDiagContext = SrcMgr.getDiagContext(); From de9e4c88c89171bcebe04baaff1af99b2b44efc6 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 22 May 2014 07:40:55 +0000 Subject: [PATCH 063/906] AArch64/ARM64: enable more AArch64 tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209408 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../AArch64/neon-simd-post-ldst-multi-elem.ll | 1 + test/CodeGen/AArch64/neon-simd-post-ldst-one.ll | 1 + test/CodeGen/AArch64/sibling-call.ll | 13 +++++++------ 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/test/CodeGen/AArch64/neon-simd-post-ldst-multi-elem.ll b/test/CodeGen/AArch64/neon-simd-post-ldst-multi-elem.ll index 8acf6b792eb1..181c69c89b26 100644 --- a/test/CodeGen/AArch64/neon-simd-post-ldst-multi-elem.ll +++ b/test/CodeGen/AArch64/neon-simd-post-ldst-multi-elem.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s +; arm64 has equivalent tests to these in various files. ;Check for a post-increment updating load. define <4 x i16> @test_vld1_fx_update(i16** %ptr) nounwind { diff --git a/test/CodeGen/AArch64/neon-simd-post-ldst-one.ll b/test/CodeGen/AArch64/neon-simd-post-ldst-one.ll index e53d6cb339b0..75f57c5d2ea9 100644 --- a/test/CodeGen/AArch64/neon-simd-post-ldst-one.ll +++ b/test/CodeGen/AArch64/neon-simd-post-ldst-one.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s +; arm64 has equivalents of these tests separately. define { [2 x <16 x i8>] } @test_vld2q_dup_fx_update(i8* %a, i8** %ptr) { ; CHECK-LABEL: test_vld2q_dup_fx_update diff --git a/test/CodeGen/AArch64/sibling-call.ll b/test/CodeGen/AArch64/sibling-call.ll index 20f1062a44dc..a08f8cbd702e 100644 --- a/test/CodeGen/AArch64/sibling-call.ll +++ b/test/CodeGen/AArch64/sibling-call.ll @@ -1,4 +1,5 @@ ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -arm64-load-store-opt=0 | FileCheck %s declare void @callee_stack0() declare void @callee_stack8([8 x i32], i64) @@ -73,10 +74,10 @@ define void @caller_to16_from16([8 x i32], i64 %a, i64 %b) { tail call void @callee_stack16([8 x i32] undef, i64 %b, i64 %a) ret void -; CHECK: ldr x0, -; CHECK: ldr x1, -; CHECK: str x1, -; CHECK: str x0, +; CHECK: ldr [[VAL0:x[0-9]+]], +; CHECK: ldr [[VAL1:x[0-9]+]], +; CHECK: str [[VAL1]], +; CHECK: str [[VAL0]], ; CHECK-NOT: add sp, sp, ; CHECK: b callee_stack16 @@ -91,7 +92,7 @@ define void @indirect_tail() { %fptr = load void(i32)** @func tail call void %fptr(i32 42) ret void -; CHECK: ldr [[FPTR:x[1-9]+]], [{{x[0-9]+}}, #:lo12:func] -; CHECK: movz w0, #42 +; CHECK: ldr [[FPTR:x[1-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:func] +; CHECK: movz w0, #{{42|0x2a}} ; CHECK: br [[FPTR]] } From 2447dcc2e850a90ef65d5ca5b6c6a298a4ed3b7e Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 22 May 2014 07:41:37 +0000 Subject: [PATCH 064/906] ARM64: assert if we see i64 -> i64 extend in the DAG. Should be no change in behaviour, but it makes the intended functionality a bit clearer and means we only have to reason about real extend operations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209409 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM64/ARM64ISelDAGToDAG.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp index 45a837e69f66..ce4203f321cd 100644 --- a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp +++ b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp @@ -369,8 +369,7 @@ getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { return ARM64_AM::SXTH; else if (SrcVT == MVT::i32) return ARM64_AM::SXTW; - else if (SrcVT == MVT::i64) - return ARM64_AM::SXTX; + assert(SrcVT != MVT::i64 && "extend from 64-bits?"); return ARM64_AM::InvalidShiftExtend; } else if (N.getOpcode() == ISD::ZERO_EXTEND || @@ -382,8 +381,7 @@ getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { return ARM64_AM::UXTH; else if (SrcVT == MVT::i32) return ARM64_AM::UXTW; - else if (SrcVT == MVT::i64) - return ARM64_AM::UXTX; + assert(SrcVT != MVT::i64 && "extend from 64-bits?"); return ARM64_AM::InvalidShiftExtend; } else if (N.getOpcode() == ISD::AND) { From c96096cc0f080016ebf5308fe408ca4de8c3c19d Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Thu, 22 May 2014 11:23:21 +0000 Subject: [PATCH 065/906] [mips][mips64r6] Add b[on]vc Summary: This required me to implement the disassembler for MIPS64r6 since the encodings are ambiguous with other instructions. This in turn revealed a few assembly/disassembly bugs which I have fixed. * da[ht]i only take two operands according to the spec, not three. * DecodeBranchTarget2[16] correctly handles wider immediates than simm16 * Also made non-functional change to DecodeBranchTarget and DecodeBranchTargetMM to keep implementation style consistent between them. * Difficult encodings are handled by a custom decode method on the most general encoding in the group. This method will convert the MCInst to a different opcode if necessary. DecodeBranchTarget is not currently the inverse of getBranchTargetOpValue so disassembling some branch instructions emit incorrect output. This seems to affect branches with delay slots on all MIPS ISA's. I've left this bug for now and temporarily removed the check for the immediate on bc[12]eqz/bc[12]nez in the MIPS32r6/MIPS64r6 tests. jialc and jic crash the disassembler for some reason. I've left these instructions commented out for the moment. Depends on D3760 Reviewers: jkolek, zoran.jovanovic, vmedic Reviewed By: vmedic Differential Revision: http://reviews.llvm.org/D3761 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209415 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Mips/Disassembler/MipsDisassembler.cpp | 253 +++++++++++++++++- lib/Target/Mips/Mips32r6InstrFormats.td | 77 +++++- lib/Target/Mips/Mips32r6InstrInfo.td | 62 +++-- lib/Target/Mips/Mips64r6InstrInfo.td | 11 +- test/MC/Disassembler/Mips/mips32r6.txt | 116 ++++++++ test/MC/Disassembler/Mips/mips64r6.txt | 129 +++++++++ test/MC/Mips/mips32r6/valid-xfail.s | 19 ++ test/MC/Mips/mips32r6/valid.s | 22 ++ test/MC/Mips/mips64r6/valid-xfail.s | 19 ++ test/MC/Mips/mips64r6/valid.s | 28 +- 10 files changed, 683 insertions(+), 53 deletions(-) create mode 100644 test/MC/Disassembler/Mips/mips32r6.txt create mode 100644 test/MC/Disassembler/Mips/mips64r6.txt create mode 100644 test/MC/Mips/mips32r6/valid-xfail.s create mode 100644 test/MC/Mips/mips64r6/valid-xfail.s diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index aeafe92d674a..95670aa4440c 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -63,6 +63,10 @@ class MipsDisassembler : public MipsDisassemblerBase { IsMicroMips = STI.getFeatureBits() & Mips::FeatureMicroMips; } + bool isMips32r6() const { + return STI.getFeatureBits() & Mips::FeatureMips32r6; + } + /// getInstruction - See MCDisassembler. DecodeStatus getInstruction(MCInst &instr, uint64_t &size, @@ -286,6 +290,32 @@ static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn, template static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address, const void *Decoder); + +template +static DecodeStatus +DecodeAddiGroupBranch(MCInst &MI, InsnType insn, uint64_t Address, + const void *Decoder); + +template +static DecodeStatus +DecodeDaddiGroupBranch(MCInst &MI, InsnType insn, uint64_t Address, + const void *Decoder); + +template +static DecodeStatus +DecodeBlezlGroupBranch(MCInst &MI, InsnType insn, uint64_t Address, + const void *Decoder); + +template +static DecodeStatus +DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn, uint64_t Address, + const void *Decoder); + +template +static DecodeStatus +DecodeBgtzGroupBranch(MCInst &MI, InsnType insn, uint64_t Address, + const void *Decoder); + namespace llvm { extern Target TheMipselTarget, TheMipsTarget, TheMips64Target, TheMips64elTarget; @@ -333,6 +363,12 @@ extern "C" void LLVMInitializeMipsDisassembler() { #include "MipsGenDisassemblerTables.inc" +static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) { + const MipsDisassemblerBase *Dis = static_cast(D); + const MCRegisterInfo *RegInfo = Dis->getContext().getRegisterInfo(); + return *(RegInfo->getRegClass(RC).begin() + RegNo); +} + template static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address, const void *Decoder) { @@ -379,6 +415,202 @@ static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address, return MCDisassembler::Success; } +template +static DecodeStatus DecodeAddiGroupBranch(MCInst &MI, InsnType insn, + uint64_t Address, + const void *Decoder) { + // If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled + // (otherwise we would have matched the ADDI instruction from the earlier + // ISA's instead). + // + // We have: + // 0b001000 sssss ttttt iiiiiiiiiiiiiiii + // BOVC if rs >= rt + // BEQZALC if rs == 0 && rt != 0 + // BEQC if rs < rt && rs != 0 + + InsnType Rs = fieldFromInstruction(insn, 21, 5); + InsnType Rt = fieldFromInstruction(insn, 16, 5); + InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2; + bool HasRs = false; + + if (Rs >= Rt) { + MI.setOpcode(Mips::BOVC); + HasRs = true; + } else if (Rs != 0 && Rs < Rt) { + MI.setOpcode(Mips::BEQC); + HasRs = true; + } else + MI.setOpcode(Mips::BEQZALC); + + if (HasRs) + MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID, + Rs))); + + MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID, + Rt))); + MI.addOperand(MCOperand::CreateImm(Imm)); + + return MCDisassembler::Success; +} + +template +static DecodeStatus DecodeDaddiGroupBranch(MCInst &MI, InsnType insn, + uint64_t Address, + const void *Decoder) { + // If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled + // (otherwise we would have matched the ADDI instruction from the earlier + // ISA's instead). + // + // We have: + // 0b011000 sssss ttttt iiiiiiiiiiiiiiii + // BNVC if rs >= rt + // BNEZALC if rs == 0 && rt != 0 + // BNEC if rs < rt && rs != 0 + + InsnType Rs = fieldFromInstruction(insn, 21, 5); + InsnType Rt = fieldFromInstruction(insn, 16, 5); + InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2; + bool HasRs = false; + + if (Rs >= Rt) { + MI.setOpcode(Mips::BNVC); + HasRs = true; + } else if (Rs != 0 && Rs < Rt) { + MI.setOpcode(Mips::BNEC); + HasRs = true; + } else + MI.setOpcode(Mips::BNEZALC); + + if (HasRs) + MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID, + Rs))); + + MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID, + Rt))); + MI.addOperand(MCOperand::CreateImm(Imm)); + + return MCDisassembler::Success; +} + +template +static DecodeStatus DecodeBlezlGroupBranch(MCInst &MI, InsnType insn, + uint64_t Address, + const void *Decoder) { + // If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled + // (otherwise we would have matched the BLEZL instruction from the earlier + // ISA's instead). + // + // We have: + // 0b010110 sssss ttttt iiiiiiiiiiiiiiii + // Invalid if rs == 0 + // BLEZC if rs == 0 && rt != 0 + // BGEZC if rs == rt && rt != 0 + // BGEC if rs != rt && rs != 0 && rt != 0 + + InsnType Rs = fieldFromInstruction(insn, 21, 5); + InsnType Rt = fieldFromInstruction(insn, 16, 5); + InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2; + + if (Rt == 0) + return MCDisassembler::Fail; + else if (Rs == 0) + MI.setOpcode(Mips::BLEZC); + else if (Rs == Rt) + MI.setOpcode(Mips::BGEZC); + else + return MCDisassembler::Fail; // FIXME: BGEC is not implemented yet. + + MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID, + Rt))); + + MI.addOperand(MCOperand::CreateImm(Imm)); + + return MCDisassembler::Success; +} + +template +static DecodeStatus DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn, + uint64_t Address, + const void *Decoder) { + // If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled + // (otherwise we would have matched the BGTZL instruction from the earlier + // ISA's instead). + // + // We have: + // 0b010111 sssss ttttt iiiiiiiiiiiiiiii + // Invalid if rs == 0 + // BGTZC if rs == 0 && rt != 0 + // BLTZC if rs == rt && rt != 0 + // BLTC if rs != rt && rs != 0 && rt != 0 + + InsnType Rs = fieldFromInstruction(insn, 21, 5); + InsnType Rt = fieldFromInstruction(insn, 16, 5); + InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2; + + if (Rt == 0) + return MCDisassembler::Fail; + else if (Rs == 0) + MI.setOpcode(Mips::BGTZC); + else if (Rs == Rt) + MI.setOpcode(Mips::BLTZC); + else + return MCDisassembler::Fail; // FIXME: BLTC is not implemented yet. + + MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID, + Rt))); + + MI.addOperand(MCOperand::CreateImm(Imm)); + + return MCDisassembler::Success; +} + +template +static DecodeStatus DecodeBgtzGroupBranch(MCInst &MI, InsnType insn, + uint64_t Address, + const void *Decoder) { + // If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled + // (otherwise we would have matched the BGTZ instruction from the earlier + // ISA's instead). + // + // We have: + // 0b000111 sssss ttttt iiiiiiiiiiiiiiii + // BGTZ if rt == 0 + // BGTZALC if rs == 0 && rt != 0 + // BLTZALC if rs != 0 && rs == rt + // BLTUC if rs != 0 && rs != rt + + InsnType Rs = fieldFromInstruction(insn, 21, 5); + InsnType Rt = fieldFromInstruction(insn, 16, 5); + InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2; + bool HasRs = false; + bool HasRt = false; + + if (Rt == 0) { + MI.setOpcode(Mips::BGTZ); + HasRs = true; + } else if (Rs == 0) { + MI.setOpcode(Mips::BGTZALC); + HasRt = true; + } else if (Rs == Rt) { + MI.setOpcode(Mips::BLTZALC); + HasRs = true; + } else + return MCDisassembler::Fail; // BLTUC not implemented yet + + if (HasRs) + MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID, + Rs))); + + if (HasRt) + MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID, + Rt))); + + MI.addOperand(MCOperand::CreateImm(Imm)); + + return MCDisassembler::Success; +} + /// readInstruction - read four bytes from the MemoryObject /// and return 32 bit word sorted according to the given endianess static DecodeStatus readInstruction32(const MemoryObject ®ion, @@ -448,6 +680,15 @@ MipsDisassembler::getInstruction(MCInst &instr, return MCDisassembler::Fail; } + if (isMips32r6()) { + Result = decodeInstruction(DecoderTableMips32r6_64r632, instr, Insn, + Address, this, STI); + if (Result != MCDisassembler::Fail) { + Size = 4; + return Result; + } + } + // Calling the auto-generated decoder function. Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address, this, STI); @@ -491,12 +732,6 @@ Mips64Disassembler::getInstruction(MCInst &instr, return MCDisassembler::Fail; } -static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) { - const MipsDisassemblerBase *Dis = static_cast(D); - const MCRegisterInfo *RegInfo = Dis->getContext().getRegisterInfo(); - return *(RegInfo->getRegClass(RC).begin() + RegNo); -} - static DecodeStatus DecodeCPU16RegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -867,8 +1102,7 @@ static DecodeStatus DecodeBranchTarget(MCInst &Inst, unsigned Offset, uint64_t Address, const void *Decoder) { - unsigned BranchOffset = Offset & 0xffff; - BranchOffset = SignExtend32<18>(BranchOffset << 2) + 4; + int32_t BranchOffset = (SignExtend32<16>(Offset) << 2) + 4; Inst.addOperand(MCOperand::CreateImm(BranchOffset)); return MCDisassembler::Success; } @@ -907,8 +1141,7 @@ static DecodeStatus DecodeBranchTargetMM(MCInst &Inst, unsigned Offset, uint64_t Address, const void *Decoder) { - unsigned BranchOffset = Offset & 0xffff; - BranchOffset = SignExtend32<18>(BranchOffset << 1); + int32_t BranchOffset = SignExtend32<16>(Offset) << 1; Inst.addOperand(MCOperand::CreateImm(BranchOffset)); return MCDisassembler::Success; } diff --git a/lib/Target/Mips/Mips32r6InstrFormats.td b/lib/Target/Mips/Mips32r6InstrFormats.td index 926181b9aa4d..a3f9df52edb4 100644 --- a/lib/Target/Mips/Mips32r6InstrFormats.td +++ b/lib/Target/Mips/Mips32r6InstrFormats.td @@ -23,14 +23,23 @@ class MipsR6Inst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>, // //===----------------------------------------------------------------------===// -def OPGROUP_COP1 { bits<6> Value = 0b010001; } -def OPGROUP_COP2 { bits<6> Value = 0b010010; } -def OPGROUP_AUI { bits<6> Value = 0b001111; } -def OPGROUP_DAUI { bits<6> Value = 0b011101; } -def OPGROUP_PCREL { bits<6> Value = 0b111011; } -def OPGROUP_REGIMM { bits<6> Value = 0b000001; } -def OPGROUP_SPECIAL { bits<6> Value = 0b000000; } -def OPGROUP_SPECIAL3 { bits<6> Value = 0b011111; } +class OPGROUP Val> { + bits<6> Value = Val; +} +def OPGROUP_COP1 : OPGROUP<0b010001>; +def OPGROUP_COP2 : OPGROUP<0b010010>; +def OPGROUP_ADDI : OPGROUP<0b001000>; +def OPGROUP_AUI : OPGROUP<0b001111>; +def OPGROUP_BLEZ : OPGROUP<0b000110>; +def OPGROUP_BGTZ : OPGROUP<0b000111>; +def OPGROUP_BLEZL : OPGROUP<0b010110>; +def OPGROUP_BGTZL : OPGROUP<0b010111>; +def OPGROUP_DADDI : OPGROUP<0b011000>; +def OPGROUP_DAUI : OPGROUP<0b011101>; +def OPGROUP_PCREL : OPGROUP<0b111011>; +def OPGROUP_REGIMM : OPGROUP<0b000001>; +def OPGROUP_SPECIAL : OPGROUP<0b000000>; +def OPGROUP_SPECIAL3 : OPGROUP<0b011111>; class OPCODE2 Val> { bits<2> Value = Val; @@ -91,6 +100,22 @@ class FIELD_CMP_FORMAT Val> { def FIELD_CMP_FORMAT_S : FIELD_CMP_FORMAT<0b10100>; def FIELD_CMP_FORMAT_D : FIELD_CMP_FORMAT<0b10101>; +//===----------------------------------------------------------------------===// +// +// Disambiguators +// +//===----------------------------------------------------------------------===// +// +// Some encodings are ambiguous except by comparing field values. + +class DecodeDisambiguates { + string DecoderMethod = !strconcat("Decode", Name); +} + +class DecodeDisambiguatedBy : DecodeDisambiguates { + string DecoderNamespace = "Mips32r6_64r6_Ambiguous"; +} + //===----------------------------------------------------------------------===// // // Encoding Formats @@ -220,25 +245,53 @@ class SPECIAL_3R_FM mulop, bits<6> funct> : MipsR6Inst { let Inst{5-0} = funct; } -class CMP_BRANCH_OFF16_FM funct> : MipsR6Inst { +// This class is ambiguous with other branches: +// BEQC/BNEC require that rs > rt +class CMP_BRANCH_2R_OFF16_FM : MipsR6Inst { bits<5> rs; bits<5> rt; bits<16> offset; bits<32> Inst; - let Inst{31-26} = funct; + let Inst{31-26} = funct.Value; let Inst{25-21} = rs; let Inst{20-16} = rt; let Inst{15-0} = offset; } -class CMP_BRANCH_RT_OFF16_FM funct> : CMP_BRANCH_OFF16_FM { +// This class is ambiguous with other branches: +// BLEZC/BGEZC/BEQZALC/BNEZALC/BGTZALC require that rs == 0 && rt != 0 +// The '1R_RT' in the name means 1 register in the rt field. +class CMP_BRANCH_1R_RT_OFF16_FM : MipsR6Inst { + bits<5> rt; + bits<16> offset; + + bits<32> Inst; + + let Inst{31-26} = funct.Value; let Inst{25-21} = 0b00000; + let Inst{20-16} = rt; + let Inst{15-0} = offset; +} + +// This class is ambiguous with other branches: +// BLTZC/BGTZC/BLTZALC/BGEZALC require that rs == rt && rt != 0 +// The '1R_BOTH' in the name means 1 register in both the rs and rt fields. +class CMP_BRANCH_1R_BOTH_OFF16_FM : MipsR6Inst { + bits<5> rt; + bits<16> offset; + + bits<32> Inst; + + let Inst{31-26} = funct.Value; + let Inst{25-21} = rt; + let Inst{20-16} = rt; + let Inst{15-0} = offset; } class CMP_BRANCH_OFF21_FM funct> : MipsR6Inst { - bits<5> rs; + bits<5> rs; // rs != 0 bits<21> offset; bits<32> Inst; diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td index 7599f07c8a8a..845c851b5ec5 100644 --- a/lib/Target/Mips/Mips32r6InstrInfo.td +++ b/lib/Target/Mips/Mips32r6InstrInfo.td @@ -90,21 +90,31 @@ class AUIPC_ENC : PCREL16_FM; class BALC_ENC : BRANCH_OFF26_FM<0b111010>; class BC_ENC : BRANCH_OFF26_FM<0b110010>; -class BEQC_ENC : CMP_BRANCH_OFF16_FM<0b001000>; -class BEQZALC_ENC : CMP_BRANCH_RT_OFF16_FM<0b001000>; -class BNEC_ENC : CMP_BRANCH_OFF16_FM<0b011000>; -class BNEZALC_ENC : CMP_BRANCH_RT_OFF16_FM<0b011000>; - -class BLTZC_ENC : CMP_BRANCH_OFF16_FM<0b010111>; -class BGEZC_ENC : CMP_BRANCH_OFF16_FM<0b010110>; -class BGTZALC_ENC : CMP_BRANCH_RT_OFF16_FM<0b000111>; - -class BLEZC_ENC : CMP_BRANCH_RT_OFF16_FM<0b010110>; -class BLTZALC_ENC : CMP_BRANCH_OFF16_FM<0b000111>; -class BGTZC_ENC : CMP_BRANCH_RT_OFF16_FM<0b010111>; +class BEQC_ENC : CMP_BRANCH_2R_OFF16_FM, + DecodeDisambiguates<"AddiGroupBranch">; +class BEQZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM, + DecodeDisambiguatedBy<"DaddiGroupBranch">; +class BNEC_ENC : CMP_BRANCH_2R_OFF16_FM, + DecodeDisambiguates<"DaddiGroupBranch">; +class BNEZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM, + DecodeDisambiguatedBy<"DaddiGroupBranch">; + +class BLTZC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM, + DecodeDisambiguates<"BgtzlGroupBranch">; +class BGEZC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM, + DecodeDisambiguates<"BlezlGroupBranch">; +class BGTZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM, + DecodeDisambiguatedBy<"BgtzGroupBranch">; + +class BLEZC_ENC : CMP_BRANCH_1R_RT_OFF16_FM, + DecodeDisambiguatedBy<"BlezlGroupBranch">; +class BLTZALC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM, + DecodeDisambiguates<"BgtzGroupBranch">; +class BGTZC_ENC : CMP_BRANCH_1R_RT_OFF16_FM, + DecodeDisambiguatedBy<"BgtzlGroupBranch">; class BEQZC_ENC : CMP_BRANCH_OFF21_FM<0b110110>; -class BGEZALC_ENC : CMP_BRANCH_OFF16_FM<0b000110>; +class BGEZALC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM; class BNEZC_ENC : CMP_BRANCH_OFF21_FM<0b111110>; class BC1EQZ_ENC : COP1_BCCZ_FM; @@ -116,7 +126,11 @@ class JIALC_ENC : JMP_IDX_COMPACT_FM<0b111110>; class JIC_ENC : JMP_IDX_COMPACT_FM<0b110110>; class BITSWAP_ENC : SPECIAL3_2R_FM; -class BLEZALC_ENC : CMP_BRANCH_RT_OFF16_FM<0b000110>; +class BLEZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM; +class BNVC_ENC : CMP_BRANCH_2R_OFF16_FM, + DecodeDisambiguatedBy<"DaddiGroupBranch">; +class BOVC_ENC : CMP_BRANCH_2R_OFF16_FM, + DecodeDisambiguatedBy<"AddiGroupBranch">; class DIV_ENC : SPECIAL_3R_FM<0b00010, 0b011010>; class DIVU_ENC : SPECIAL_3R_FM<0b00010, 0b011011>; class MOD_ENC : SPECIAL_3R_FM<0b00011, 0b011010>; @@ -302,7 +316,7 @@ class CMP_CBR_EQNE_Z_DESC_BASE : BRANCH_DESC_BASE { - dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, opnd:$offset); + dag InOperandList = (ins GPROpnd:$rt, opnd:$offset); dag OutOperandList = (outs); string AsmString = !strconcat(instr_asm, "\t$rt, $offset"); list Defs = [AT]; @@ -317,13 +331,8 @@ class BC_DESC : BC_DESC_BASE<"bc", brtarget26>; class BEQC_DESC : CMP_BC_DESC_BASE<"beqc", brtarget, GPR32Opnd>; class BNEC_DESC : CMP_BC_DESC_BASE<"bnec", brtarget, GPR32Opnd>; -class BLTZC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bltzc", brtarget, GPR32Opnd> { - string Constraints = "$rs = $rt"; -} - -class BGEZC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bgezc", brtarget, GPR32Opnd> { - string Constraints = "$rs = $rt"; -} +class BLTZC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bltzc", brtarget, GPR32Opnd>; +class BGEZC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bgezc", brtarget, GPR32Opnd>; class BLEZC_DESC : CMP_CBR_RT_Z_DESC_BASE<"blezc", brtarget, GPR32Opnd>; class BGTZC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bgtzc", brtarget, GPR32Opnd>; @@ -351,6 +360,9 @@ class COP2_BCCZ_DESC_BASE : BRANCH_DESC_BASE { class BC2EQZ_DESC : COP2_BCCZ_DESC_BASE<"bc2eqz $ct, $offset">; class BC2NEZ_DESC : COP2_BCCZ_DESC_BASE<"bc2nez $ct, $offset">; +class BOVC_DESC : CMP_BC_DESC_BASE<"bovc", brtarget, GPR32Opnd>; +class BNVC_DESC : CMP_BC_DESC_BASE<"bnvc", brtarget, GPR32Opnd>; + class JMP_IDX_COMPACT_DESC_BASE { dag InOperandList = (ins GPROpnd:$rt, opnd:$offset); @@ -398,7 +410,6 @@ class BEQZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"beqzalc", brtarget, GPR32Opnd> { } class BGEZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bgezalc", brtarget, GPR32Opnd> { - string Constraints = "$rs = $rt"; list Defs = [RA]; } @@ -411,7 +422,6 @@ class BLEZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"blezalc", brtarget, GPR32Opnd> { } class BLTZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bltzalc", brtarget, GPR32Opnd> { - string Constraints = "$rs = $rt"; list Defs = [RA]; } @@ -533,8 +543,8 @@ def BLTZC : BLTZC_ENC, BLTZC_DESC, ISA_MIPS32R6; def BNEC : BNEC_ENC, BNEC_DESC, ISA_MIPS32R6; def BNEZALC : BNEZALC_ENC, BNEZALC_DESC, ISA_MIPS32R6; def BNEZC : BNEZC_ENC, BNEZC_DESC, ISA_MIPS32R6; -def BNVC; -def BOVC; +def BNVC : BNVC_ENC, BNVC_DESC, ISA_MIPS32R6; +def BOVC : BOVC_ENC, BOVC_DESC, ISA_MIPS32R6; def CLASS_D : CLASS_D_ENC, CLASS_D_DESC, ISA_MIPS32R6; def CLASS_S : CLASS_S_ENC, CLASS_S_DESC, ISA_MIPS32R6; defm S : CMP_CC_M; diff --git a/lib/Target/Mips/Mips64r6InstrInfo.td b/lib/Target/Mips/Mips64r6InstrInfo.td index 0f48784898e7..2e87a60a1e8f 100644 --- a/lib/Target/Mips/Mips64r6InstrInfo.td +++ b/lib/Target/Mips/Mips64r6InstrInfo.td @@ -45,9 +45,16 @@ class DMULU_ENC : SPECIAL_3R_FM<0b00010, 0b111001>; // //===----------------------------------------------------------------------===// +class AHI_ATI_DESC_BASE { + dag OutOperandList = (outs GPROpnd:$rs); + dag InOperandList = (ins GPROpnd:$rt, simm16:$imm); + string AsmString = !strconcat(instr_asm, "\t$rt, $imm"); + string Constraints = "$rs = $rt"; +} + class DALIGN_DESC : ALIGN_DESC_BASE<"dalign", GPR64Opnd, uimm3>; -class DAHI_DESC : AUI_DESC_BASE<"dahi", GPR64Opnd>; -class DATI_DESC : AUI_DESC_BASE<"dati", GPR64Opnd>; +class DAHI_DESC : AHI_ATI_DESC_BASE<"dahi", GPR64Opnd>; +class DATI_DESC : AHI_ATI_DESC_BASE<"dati", GPR64Opnd>; class DAUI_DESC : AUI_DESC_BASE<"daui", GPR64Opnd>; class DBITSWAP_DESC : BITSWAP_DESC_BASE<"dbitswap", GPR64Opnd>; class DDIV_DESC : DIVMOD_DESC_BASE<"ddiv", GPR64Opnd>; diff --git a/test/MC/Disassembler/Mips/mips32r6.txt b/test/MC/Disassembler/Mips/mips32r6.txt new file mode 100644 index 000000000000..adbcd9943075 --- /dev/null +++ b/test/MC/Disassembler/Mips/mips32r6.txt @@ -0,0 +1,116 @@ +# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r6 | FileCheck %s + +0xec 0x80 0x00 0x19 # CHECK: addiupc $4, 100 +0x7c 0x43 0x22 0xa0 # CHECK: align $4, $2, $3, 2 +0xec 0x7f 0x00 0x38 # CHECK: aluipc $3, 56 +0x3c 0x62 0xff 0xe9 # CHECK: aui $3, $2, -23 +0xec 0x7e 0xff 0xff # CHECK: auipc $3, -1 +0xe8 0x37 0x96 0xb8 # CHECK: balc 14572256 +0xc8 0x37 0x96 0xb8 # CHECK: bc 14572256 + +# FIXME: Don't check the immediate on these for the moment, the encode/decode +# functions are not inverses of eachother. +# The immediate should be 4 but the disassembler currently emits 8 +0x45 0x20 0x00 0x01 # CHECK: bc1eqz $f0, +0x45 0x3f 0x00 0x01 # CHECK: bc1eqz $f31, +0x45 0xa0 0x00 0x01 # CHECK: bc1nez $f0, +0x45 0xbf 0x00 0x01 # CHECK: bc1nez $f31, +# FIXME: Don't check the immediate on these for the moment, the encode/decode +# functions are not inverses of eachother. +# The immediate should be 8 but the disassembler currently emits 12 +0x49 0x20 0x00 0x02 # CHECK: bc2eqz $0, +0x49 0x3f 0x00 0x02 # CHECK: bc2eqz $31, +0x49 0xa0 0x00 0x02 # CHECK: bc2nez $0, +0x49 0xbf 0x00 0x02 # CHECK: bc2nez $31, + +0x20 0xa6 0x00 0x40 # CHECK: beqc $5, $6, 256 +# FIXME: Don't check the immediate on the bcczal's for the moment, the +# encode/decode functions are not inverses of eachother. +0x20 0x02 0x01 0x4d # CHECK: beqzalc $2, +0x60 0xa6 0x00 0x40 # CHECK: bnec $5, $6, 256 +0x60 0x02 0x01 0x4d # CHECK: bnezalc $2, +0xd8 0xa0 0x46 0x90 # CHECK: beqzc $5, 72256 +0x18 0x42 0x01 0x4d # CHECK: bgezalc $2, +0xf8 0xa0 0x46 0x90 # CHECK: bnezc $5, 72256 +0x5c 0xa5 0x00 0x40 # CHECK: bltzc $5, 256 +0x58 0xa5 0x00 0x40 # CHECK: bgezc $5, 256 +0x1c 0x02 0x01 0x4d # CHECK: bgtzalc $2, +0x58 0x05 0x00 0x40 # CHECK: blezc $5, 256 +0x1c 0x42 0x01 0x4d # CHECK: bltzalc $2, +0x5c 0x05 0x00 0x40 # CHECK: bgtzc $5, 256 +0x7c 0x02 0x20 0x20 # CHECK: bitswap $4, $2 +0x18 0x02 0x01 0x4d # CHECK: blezalc $2, +0x60 0x00 0x00 0x01 # CHECK: bnvc $zero, $zero, 4 +0x60 0x40 0x00 0x01 # CHECK: bnvc $2, $zero, 4 +0x60 0x82 0x00 0x01 # CHECK: bnvc $4, $2, 4 +0x20 0x00 0x00 0x01 # CHECK: bovc $zero, $zero, 4 +0x20 0x40 0x00 0x01 # CHECK: bovc $2, $zero, 4 +0x20 0x82 0x00 0x01 # CHECK: bovc $4, $2, 4 +0x46 0x84 0x18 0x80 # CHECK: cmp.f.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x80 # CHECK: cmp.f.d $f2, $f3, $f4 +0x46 0x84 0x18 0x81 # CHECK: cmp.un.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x81 # CHECK: cmp.un.d $f2, $f3, $f4 +0x46 0x84 0x18 0x82 # CHECK: cmp.eq.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x82 # CHECK: cmp.eq.d $f2, $f3, $f4 +0x46 0x84 0x18 0x83 # CHECK: cmp.ueq.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x83 # CHECK: cmp.ueq.d $f2, $f3, $f4 +0x46 0x84 0x18 0x84 # CHECK: cmp.olt.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x84 # CHECK: cmp.olt.d $f2, $f3, $f4 +0x46 0x84 0x18 0x85 # CHECK: cmp.ult.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x85 # CHECK: cmp.ult.d $f2, $f3, $f4 +0x46 0x84 0x18 0x86 # CHECK: cmp.ole.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x86 # CHECK: cmp.ole.d $f2, $f3, $f4 +0x46 0x84 0x18 0x87 # CHECK: cmp.ule.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x87 # CHECK: cmp.ule.d $f2, $f3, $f4 +0x46 0x84 0x18 0x88 # CHECK: cmp.sf.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x88 # CHECK: cmp.sf.d $f2, $f3, $f4 +0x46 0x84 0x18 0x89 # CHECK: cmp.ngle.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x89 # CHECK: cmp.ngle.d $f2, $f3, $f4 +0x46 0x84 0x18 0x8a # CHECK: cmp.seq.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x8a # CHECK: cmp.seq.d $f2, $f3, $f4 +0x46 0x84 0x18 0x8b # CHECK: cmp.ngl.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x8b # CHECK: cmp.ngl.d $f2, $f3, $f4 +0x46 0x84 0x18 0x8c # CHECK: cmp.lt.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x8c # CHECK: cmp.lt.d $f2, $f3, $f4 +0x46 0x84 0x18 0x8d # CHECK: cmp.nge.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x8d # CHECK: cmp.nge.d $f2, $f3, $f4 +0x46 0x84 0x18 0x8e # CHECK: cmp.le.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x8e # CHECK: cmp.le.d $f2, $f3, $f4 +0x46 0x84 0x18 0x8f # CHECK: cmp.ngt.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x8f # CHECK: cmp.ngt.d $f2, $f3, $f4 +0x00 0x64 0x10 0x9a # CHECK: div $2, $3, $4 +0x00 0x64 0x10 0x9b # CHECK: divu $2, $3, $4 +# 0xf8 0x05 0x01 0x00 # CHECK-TODO: jialc $5, 256 +# 0xd8 0x05 0x01 0x00 # CHECK-TODO: jic $5, 256 +0xec 0x48 0x00 0x43 # CHECK: lwpc $2, 268 +0xec 0x50 0x00 0x43 # CHECK: lwupc $2, 268 +0x00 0x64 0x10 0xda # CHECK: mod $2, $3, $4 +0x00 0x64 0x10 0xdb # CHECK: modu $2, $3, $4 +0x00 0x64 0x10 0x98 # CHECK: mul $2, $3, $4 +0x00 0x64 0x10 0xd8 # CHECK: muh $2, $3, $4 +0x00 0x64 0x10 0x99 # CHECK: mulu $2, $3, $4 +0x00 0x64 0x10 0xd9 # CHECK: muhu $2, $3, $4 +0x46 0x04 0x18 0x98 # CHECK: maddf.s $f2, $f3, $f4 +0x46 0x24 0x18 0x98 # CHECK: maddf.d $f2, $f3, $f4 +0x46 0x04 0x18 0x99 # CHECK: msubf.s $f2, $f3, $f4 +0x46 0x24 0x18 0x99 # CHECK: msubf.d $f2, $f3, $f4 +0x46 0x22 0x08 0x10 # CHECK: sel.d $f0, $f1, $f2 +0x46 0x02 0x08 0x10 # CHECK: sel.s $f0, $f1, $f2 +0x00 0x64 0x10 0x35 # CHECK: seleqz $2, $3, $4 +0x00 0x64 0x10 0x37 # CHECK: selnez $2, $3, $4 +0x46 0x04 0x10 0x1d # CHECK: max.s $f0, $f2, $f4 +0x46 0x24 0x10 0x1d # CHECK: max.d $f0, $f2, $f4 +0x46 0x04 0x10 0x1c # CHECK: min.s $f0, $f2, $f4 +0x46 0x24 0x10 0x1c # CHECK: min.d $f0, $f2, $f4 +0x46 0x04 0x10 0x1f # CHECK: maxa.s $f0, $f2, $f4 +0x46 0x24 0x10 0x1f # CHECK: maxa.d $f0, $f2, $f4 +0x46 0x04 0x10 0x1e # CHECK: mina.s $f0, $f2, $f4 +0x46 0x24 0x10 0x1e # CHECK: mina.d $f0, $f2, $f4 +0x46 0x04 0x10 0x14 # CHECK: seleqz.s $f0, $f2, $f4 +0x46 0x24 0x10 0x14 # CHECK: seleqz.d $f0, $f2, $f4 +0x46 0x04 0x10 0x17 # CHECK: selnez.s $f0, $f2, $f4 +0x46 0x24 0x10 0x17 # CHECK: selnez.d $f0, $f2, $f4 +0x46 0x00 0x20 0x9a # CHECK: rint.s $f2, $f4 +0x46 0x20 0x20 0x9a # CHECK: rint.d $f2, $f4 +0x46 0x00 0x20 0x9b # CHECK: class.s $f2, $f4 +0x46 0x20 0x20 0x9b # CHECK: class.d $f2, $f4 diff --git a/test/MC/Disassembler/Mips/mips64r6.txt b/test/MC/Disassembler/Mips/mips64r6.txt new file mode 100644 index 000000000000..f5bb14e9a175 --- /dev/null +++ b/test/MC/Disassembler/Mips/mips64r6.txt @@ -0,0 +1,129 @@ +# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips64r6 | FileCheck %s + +0xec 0x80 0x00 0x19 # CHECK: addiupc $4, 100 +0x7c 0x43 0x22 0xa0 # CHECK: align $4, $2, $3, 2 +0xec 0x7f 0x00 0x38 # CHECK: aluipc $3, 56 +0x3c 0x62 0xff 0xe9 # CHECK: aui $3, $2, -23 +0xec 0x7e 0xff 0xff # CHECK: auipc $3, -1 +0xe8 0x37 0x96 0xb8 # CHECK: balc 14572256 +0xc8 0x37 0x96 0xb8 # CHECK: bc 14572256 + +# FIXME: Don't check the immediate on these for the moment, the encode/decode +# functions are not inverses of eachother. +# The immediate should be 4 but the disassembler currently emits 8 +0x45 0x20 0x00 0x01 # CHECK: bc1eqz $f0, +0x45 0x3f 0x00 0x01 # CHECK: bc1eqz $f31, +0x45 0xa0 0x00 0x01 # CHECK: bc1nez $f0, +0x45 0xbf 0x00 0x01 # CHECK: bc1nez $f31, +# FIXME: Don't check the immediate on these for the moment, the encode/decode +# functions are not inverses of eachother. +# The immediate should be 8 but the disassembler currently emits 12 +0x49 0x20 0x00 0x02 # CHECK: bc2eqz $0, +0x49 0x3f 0x00 0x02 # CHECK: bc2eqz $31, +0x49 0xa0 0x00 0x02 # CHECK: bc2nez $0, +0x49 0xbf 0x00 0x02 # CHECK: bc2nez $31, + +0x20 0xa6 0x00 0x40 # CHECK: beqc $5, $6, 256 +# FIXME: Don't check the immediate on the bcczal's for the moment, the +# encode/decode functions are not inverses of eachother. +0x20 0x02 0x01 0x4d # CHECK: beqzalc $2, +0x60 0xa6 0x00 0x40 # CHECK: bnec $5, $6, 256 +0x60 0x02 0x01 0x4d # CHECK: bnezalc $2, +0xd8 0xa0 0x46 0x90 # CHECK: beqzc $5, 72256 +0x18 0x42 0x01 0x4d # CHECK: bgezalc $2, +0xf8 0xa0 0x46 0x90 # CHECK: bnezc $5, 72256 +0x5c 0xa5 0x00 0x40 # CHECK: bltzc $5, 256 +0x58 0xa5 0x00 0x40 # CHECK: bgezc $5, 256 +0x1c 0x02 0x01 0x4d # CHECK: bgtzalc $2, +0x58 0x05 0x00 0x40 # CHECK: blezc $5, 256 +0x1c 0x42 0x01 0x4d # CHECK: bltzalc $2, +0x5c 0x05 0x00 0x40 # CHECK: bgtzc $5, 256 +0x7c 0x02 0x20 0x20 # CHECK: bitswap $4, $2 +0x18 0x02 0x01 0x4d # CHECK: blezalc $2, +0x60 0x00 0x00 0x01 # CHECK: bnvc $zero, $zero, 4 +0x60 0x40 0x00 0x01 # CHECK: bnvc $2, $zero, 4 +0x60 0x82 0x00 0x01 # CHECK: bnvc $4, $2, 4 +0x20 0x00 0x00 0x01 # CHECK: bovc $zero, $zero, 4 +0x20 0x40 0x00 0x01 # CHECK: bovc $2, $zero, 4 +0x20 0x82 0x00 0x01 # CHECK: bovc $4, $2, 4 +0x46 0x84 0x18 0x80 # CHECK: cmp.f.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x80 # CHECK: cmp.f.d $f2, $f3, $f4 +0x46 0x84 0x18 0x81 # CHECK: cmp.un.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x81 # CHECK: cmp.un.d $f2, $f3, $f4 +0x46 0x84 0x18 0x82 # CHECK: cmp.eq.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x82 # CHECK: cmp.eq.d $f2, $f3, $f4 +0x46 0x84 0x18 0x83 # CHECK: cmp.ueq.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x83 # CHECK: cmp.ueq.d $f2, $f3, $f4 +0x46 0x84 0x18 0x84 # CHECK: cmp.olt.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x84 # CHECK: cmp.olt.d $f2, $f3, $f4 +0x46 0x84 0x18 0x85 # CHECK: cmp.ult.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x85 # CHECK: cmp.ult.d $f2, $f3, $f4 +0x46 0x84 0x18 0x86 # CHECK: cmp.ole.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x86 # CHECK: cmp.ole.d $f2, $f3, $f4 +0x46 0x84 0x18 0x87 # CHECK: cmp.ule.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x87 # CHECK: cmp.ule.d $f2, $f3, $f4 +0x46 0x84 0x18 0x88 # CHECK: cmp.sf.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x88 # CHECK: cmp.sf.d $f2, $f3, $f4 +0x46 0x84 0x18 0x89 # CHECK: cmp.ngle.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x89 # CHECK: cmp.ngle.d $f2, $f3, $f4 +0x46 0x84 0x18 0x8a # CHECK: cmp.seq.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x8a # CHECK: cmp.seq.d $f2, $f3, $f4 +0x46 0x84 0x18 0x8b # CHECK: cmp.ngl.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x8b # CHECK: cmp.ngl.d $f2, $f3, $f4 +0x46 0x84 0x18 0x8c # CHECK: cmp.lt.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x8c # CHECK: cmp.lt.d $f2, $f3, $f4 +0x46 0x84 0x18 0x8d # CHECK: cmp.nge.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x8d # CHECK: cmp.nge.d $f2, $f3, $f4 +0x46 0x84 0x18 0x8e # CHECK: cmp.le.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x8e # CHECK: cmp.le.d $f2, $f3, $f4 +0x46 0x84 0x18 0x8f # CHECK: cmp.ngt.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x8f # CHECK: cmp.ngt.d $f2, $f3, $f4 +0x7c 0x43 0x23 0x64 # CHECK: dalign $4, $2, $3, 5 +0x74 0x62 0x12 0x34 # CHECK: daui $3, $2, 4660 +0x04 0x66 0x56 0x78 # CHECK: dahi $3, 22136 +0x04 0x7e 0xab 0xcd # CHECK: dati $3, -21555 +0x7c 0x02 0x20 0x24 # CHECK: dbitswap $4, $2 +0x00 0x64 0x10 0x9a # CHECK: div $2, $3, $4 +0x00 0x64 0x10 0x9b # CHECK: divu $2, $3, $4 +# 0xf8 0x05 0x01 0x00 # CHECK-TODO: jialc $5, 256 +# 0xd8 0x05 0x01 0x00 # CHECK-TODO: jic $5, 256 +0xec 0x48 0x00 0x43 # CHECK: lwpc $2, 268 +0xec 0x50 0x00 0x43 # CHECK: lwupc $2, 268 +0x00 0x64 0x10 0xda # CHECK: mod $2, $3, $4 +0x00 0x64 0x10 0xdb # CHECK: modu $2, $3, $4 +0x00 0x64 0x10 0x9e # CHECK: ddiv $2, $3, $4 +0x00 0x64 0x10 0x9f # CHECK: ddivu $2, $3, $4 +0x00 0x64 0x10 0xde # CHECK: dmod $2, $3, $4 +0x00 0x64 0x10 0xdf # CHECK: dmodu $2, $3, $4 +0x00 0x64 0x10 0x98 # CHECK: mul $2, $3, $4 +0x00 0x64 0x10 0xd8 # CHECK: muh $2, $3, $4 +0x00 0x64 0x10 0x99 # CHECK: mulu $2, $3, $4 +0x00 0x64 0x10 0xd9 # CHECK: muhu $2, $3, $4 +0x00 0x64 0x10 0xb8 # CHECK: dmul $2, $3, $4 +0x00 0x64 0x10 0xf8 # CHECK: dmuh $2, $3, $4 +0x00 0x64 0x10 0xb9 # CHECK: dmulu $2, $3, $4 +0x00 0x64 0x10 0xf9 # CHECK: dmuhu $2, $3, $4 +0x46 0x04 0x18 0x98 # CHECK: maddf.s $f2, $f3, $f4 +0x46 0x24 0x18 0x98 # CHECK: maddf.d $f2, $f3, $f4 +0x46 0x04 0x18 0x99 # CHECK: msubf.s $f2, $f3, $f4 +0x46 0x24 0x18 0x99 # CHECK: msubf.d $f2, $f3, $f4 +0x46 0x22 0x08 0x10 # CHECK: sel.d $f0, $f1, $f2 +0x46 0x02 0x08 0x10 # CHECK: sel.s $f0, $f1, $f2 +0x00 0x64 0x10 0x35 # CHECK: seleqz $2, $3, $4 +0x00 0x64 0x10 0x37 # CHECK: selnez $2, $3, $4 +0x46 0x04 0x10 0x1d # CHECK: max.s $f0, $f2, $f4 +0x46 0x24 0x10 0x1d # CHECK: max.d $f0, $f2, $f4 +0x46 0x04 0x10 0x1c # CHECK: min.s $f0, $f2, $f4 +0x46 0x24 0x10 0x1c # CHECK: min.d $f0, $f2, $f4 +0x46 0x04 0x10 0x1f # CHECK: maxa.s $f0, $f2, $f4 +0x46 0x24 0x10 0x1f # CHECK: maxa.d $f0, $f2, $f4 +0x46 0x04 0x10 0x1e # CHECK: mina.s $f0, $f2, $f4 +0x46 0x24 0x10 0x1e # CHECK: mina.d $f0, $f2, $f4 +0x46 0x04 0x10 0x14 # CHECK: seleqz.s $f0, $f2, $f4 +0x46 0x24 0x10 0x14 # CHECK: seleqz.d $f0, $f2, $f4 +0x46 0x04 0x10 0x17 # CHECK: selnez.s $f0, $f2, $f4 +0x46 0x24 0x10 0x17 # CHECK: selnez.d $f0, $f2, $f4 +0x46 0x00 0x20 0x9a # CHECK: rint.s $f2, $f4 +0x46 0x20 0x20 0x9a # CHECK: rint.d $f2, $f4 +0x46 0x00 0x20 0x9b # CHECK: class.s $f2, $f4 +0x46 0x20 0x20 0x9b # CHECK: class.d $f2, $f4 diff --git a/test/MC/Mips/mips32r6/valid-xfail.s b/test/MC/Mips/mips32r6/valid-xfail.s new file mode 100644 index 000000000000..0c911d71f47e --- /dev/null +++ b/test/MC/Mips/mips32r6/valid-xfail.s @@ -0,0 +1,19 @@ +# Instructions that should be valid but currently fail for known reasons (e.g. +# they aren't implemented yet). +# This test is set up to XPASS if any instruction generates an encoding. +# +# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 | not FileCheck %s +# CHECK-NOT: encoding +# XFAIL: * + + .set noat + bovc $0, $2, 4 # TODO: bovc $0, $2, 4 # encoding: [0x20,0x40,0x00,0x01] + bovc $2, $4, 4 # TODO: bovc $2, $4, 4 # encoding: [0x20,0x82,0x00,0x01] + bnvc $0, $2, 4 # TODO: bnvc $0, $2, 4 # encoding: [0x60,0x40,0x00,0x01] + bnvc $2, $4, 4 # TODO: bnvc $2, $4, 4 # encoding: [0x60,0x82,0x00,0x01] + beqc $0, $6, 256 # TODO: beqc $6, $zero, 256 # encoding: [0x20,0xc0,0x00,0x40] + beqc $5, $0, 256 # TODO: beqc $5, $zero, 256 # encoding: [0x20,0xa0,0x00,0x40] + beqc $6, $5, 256 # TODO: beqc $5, $6, 256 # encoding: [0x20,0xa6,0x00,0x40] + bnec $0, $6, 256 # TODO: bnec $6, $zero, 256 # encoding: [0x60,0xc0,0x00,0x40] + bnec $5, $0, 256 # TODO: bnec $5, $zero, 256 # encoding: [0x60,0xa0,0x00,0x40] + bnec $6, $5, 256 # TODO: bnec $5, $6, 256 # encoding: [0x60,0xa6,0x00,0x40] diff --git a/test/MC/Mips/mips32r6/valid.s b/test/MC/Mips/mips32r6/valid.s index 33965c171ab8..5b4b92871fb2 100644 --- a/test/MC/Mips/mips32r6/valid.s +++ b/test/MC/Mips/mips32r6/valid.s @@ -1,5 +1,15 @@ # Instructions that are valid # +# Branches have some unusual encoding rules in MIPS32r6 so we need to test: +# rs == 0 +# rs != 0 +# rt == 0 +# rt != 0 +# rs < rt +# rs == rt +# rs > rt +# appropriately for each branch instruction +# # RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 | FileCheck %s .set noat @@ -19,8 +29,12 @@ bc2eqz $31,8 # CHECK: bc2eqz $31, 8 # encoding: [0x49,0x3f,0x00,0x02] bc2nez $0,8 # CHECK: bc2nez $0, 8 # encoding: [0x49,0xa0,0x00,0x02] bc2nez $31,8 # CHECK: bc2nez $31, 8 # encoding: [0x49,0xbf,0x00,0x02] + # beqc requires rs < rt && rs != 0 but we also accept when this is not true. See also bovc + # FIXME: Testcases are in valid-xfail.s at the moment beqc $5, $6, 256 # CHECK: beqc $5, $6, 256 # encoding: [0x20,0xa6,0x00,0x40] beqzalc $2, 1332 # CHECK: beqzalc $2, 1332 # encoding: [0x20,0x02,0x01,0x4d] + # bnec requires rs < rt && rs != 0 but we accept when this is not true. See also bnvc + # FIXME: Testcases are in valid-xfail.s at the moment bnec $5, $6, 256 # CHECK: bnec $5, $6, 256 # encoding: [0x60,0xa6,0x00,0x40] bnezalc $2, 1332 # CHECK: bnezalc $2, 1332 # encoding: [0x60,0x02,0x01,0x4d] beqzc $5, 72256 # CHECK: beqzc $5, 72256 # encoding: [0xd8,0xa0,0x46,0x90] @@ -34,6 +48,14 @@ bgtzc $5, 256 # CHECK: bgtzc $5, 256 # encoding: [0x5c,0x05,0x00,0x40] bitswap $4, $2 # CHECK: bitswap $4, $2 # encoding: [0x7c,0x02,0x20,0x20] blezalc $2, 1332 # CHECK: blezalc $2, 1332 # encoding: [0x18,0x02,0x01,0x4d] + # bnvc requires that rs >= rt but we accept both. See also bnec + bnvc $0, $0, 4 # CHECK: bnvc $zero, $zero, 4 # encoding: [0x60,0x00,0x00,0x01] + bnvc $2, $0, 4 # CHECK: bnvc $2, $zero, 4 # encoding: [0x60,0x40,0x00,0x01] + bnvc $4, $2, 4 # CHECK: bnvc $4, $2, 4 # encoding: [0x60,0x82,0x00,0x01] + # bovc requires that rs >= rt but we accept both. See also beqc + bovc $0, $0, 4 # CHECK: bovc $zero, $zero, 4 # encoding: [0x20,0x00,0x00,0x01] + bovc $2, $0, 4 # CHECK: bovc $2, $zero, 4 # encoding: [0x20,0x40,0x00,0x01] + bovc $4, $2, 4 # CHECK: bovc $4, $2, 4 # encoding: [0x20,0x82,0x00,0x01] cmp.f.s $f2,$f3,$f4 # CHECK: cmp.f.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x80] cmp.f.d $f2,$f3,$f4 # CHECK: cmp.f.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x80] cmp.un.s $f2,$f3,$f4 # CHECK: cmp.un.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x81] diff --git a/test/MC/Mips/mips64r6/valid-xfail.s b/test/MC/Mips/mips64r6/valid-xfail.s new file mode 100644 index 000000000000..a75122571d93 --- /dev/null +++ b/test/MC/Mips/mips64r6/valid-xfail.s @@ -0,0 +1,19 @@ +# Instructions that should be valid but currently fail for known reasons (e.g. +# they aren't implemented yet). +# This test is set up to XPASS if any instruction generates an encoding. +# +# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 | not FileCheck %s +# CHECK-NOT: encoding +# XFAIL: * + + .set noat + bovc $0, $2, 4 # TODO: bovc $0, $2, 4 # encoding: [0x20,0x40,0x00,0x01] + bovc $2, $4, 4 # TODO: bovc $2, $4, 4 # encoding: [0x20,0x82,0x00,0x01] + bnvc $0, $2, 4 # TODO: bnvc $0, $2, 4 # encoding: [0x60,0x40,0x00,0x01] + bnvc $2, $4, 4 # TODO: bnvc $2, $4, 4 # encoding: [0x60,0x82,0x00,0x01] + beqc $0, $6, 256 # TODO: beqc $6, $zero, 256 # encoding: [0x20,0xc0,0x00,0x40] + beqc $5, $0, 256 # TODO: beqc $5, $zero, 256 # encoding: [0x20,0xa0,0x00,0x40] + beqc $6, $5, 256 # TODO: beqc $5, $6, 256 # encoding: [0x20,0xa6,0x00,0x40] + bnec $0, $6, 256 # TODO: bnec $6, $zero, 256 # encoding: [0x60,0xc0,0x00,0x40] + bnec $5, $0, 256 # TODO: bnec $5, $zero, 256 # encoding: [0x60,0xa0,0x00,0x40] + bnec $6, $5, 256 # TODO: bnec $5, $6, 256 # encoding: [0x60,0xa6,0x00,0x40] diff --git a/test/MC/Mips/mips64r6/valid.s b/test/MC/Mips/mips64r6/valid.s index 6d7ffbd771f5..efdfc7f56ce6 100644 --- a/test/MC/Mips/mips64r6/valid.s +++ b/test/MC/Mips/mips64r6/valid.s @@ -1,5 +1,15 @@ # Instructions that are valid # +# Branches have some unusual encoding rules in MIPS32r6 so we need to test: +# rs == 0 +# rs != 0 +# rt == 0 +# rt != 0 +# rs < rt +# rs == rt +# rs > rt +# appropriately for each branch instruction +# # RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r6 | FileCheck %s .set noat @@ -19,8 +29,12 @@ bc2eqz $31,8 # CHECK: bc2eqz $31, 8 # encoding: [0x49,0x3f,0x00,0x02] bc2nez $0,8 # CHECK: bc2nez $0, 8 # encoding: [0x49,0xa0,0x00,0x02] bc2nez $31,8 # CHECK: bc2nez $31, 8 # encoding: [0x49,0xbf,0x00,0x02] + # beqc requires rs < rt && rs != 0 but we also accept when this is not true. See also bovc + # FIXME: Testcases are in valid-xfail.s at the moment beqc $5, $6, 256 # CHECK: beqc $5, $6, 256 # encoding: [0x20,0xa6,0x00,0x40] beqzalc $2, 1332 # CHECK: beqzalc $2, 1332 # encoding: [0x20,0x02,0x01,0x4d] + # bnec requires rs < rt && rs != 0 but we accept when this is not true. See also bnvc + # FIXME: Testcases are in valid-xfail.s at the moment bnec $5, $6, 256 # CHECK: bnec $5, $6, 256 # encoding: [0x60,0xa6,0x00,0x40] bnezalc $2, 1332 # CHECK: bnezalc $2, 1332 # encoding: [0x60,0x02,0x01,0x4d] beqzc $5, 72256 # CHECK: beqzc $5, 72256 # encoding: [0xd8,0xa0,0x46,0x90] @@ -34,6 +48,14 @@ bgtzc $5, 256 # CHECK: bgtzc $5, 256 # encoding: [0x5c,0x05,0x00,0x40] bitswap $4, $2 # CHECK: bitswap $4, $2 # encoding: [0x7c,0x02,0x20,0x20] blezalc $2, 1332 # CHECK: blezalc $2, 1332 # encoding: [0x18,0x02,0x01,0x4d] + # bnvc requires that rs >= rt but we accept both. See also bnec + bnvc $0, $0, 4 # CHECK: bnvc $zero, $zero, 4 # encoding: [0x60,0x00,0x00,0x01] + bnvc $2, $0, 4 # CHECK: bnvc $2, $zero, 4 # encoding: [0x60,0x40,0x00,0x01] + bnvc $4, $2, 4 # CHECK: bnvc $4, $2, 4 # encoding: [0x60,0x82,0x00,0x01] + # bovc requires that rs >= rt but we accept both. See also beqc + bovc $0, $0, 4 # CHECK: bovc $zero, $zero, 4 # encoding: [0x20,0x00,0x00,0x01] + bovc $2, $0, 4 # CHECK: bovc $2, $zero, 4 # encoding: [0x20,0x40,0x00,0x01] + bovc $4, $2, 4 # CHECK: bovc $4, $2, 4 # encoding: [0x20,0x82,0x00,0x01] cmp.f.s $f2,$f3,$f4 # CHECK: cmp.f.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x80] cmp.f.d $f2,$f3,$f4 # CHECK: cmp.f.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x80] cmp.un.s $f2,$f3,$f4 # CHECK: cmp.un.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x81] @@ -68,9 +90,9 @@ cmp.ngt.d $f2,$f3,$f4 # CHECK: cmp.ngt.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8f] dalign $4,$2,$3,5 # CHECK: dalign $4, $2, $3, 5 # encoding: [0x7c,0x43,0x23,0x64] daui $3,$2,0x1234 # CHECK: daui $3, $2, 4660 # encoding: [0x74,0x62,0x12,0x34] - dahi $3,$3,0x5678 # CHECK: dahi $3, $3, 22136 # encoding: [0x04,0x66,0x56,0x78] - dati $3,$3,0xabcd # CHECK: dati $3, $3, 43981 # encoding: [0x04,0x7e,0xab,0xcd] - dbitswap $4, $2 # CHECK: bitswap $4, $2 # encoding: [0x7c,0x02,0x20,0x24] + dahi $3,0x5678 # CHECK: dahi $3, 22136 # encoding: [0x04,0x66,0x56,0x78] + dati $3,0xabcd # CHECK: dati $3, 43981 # encoding: [0x04,0x7e,0xab,0xcd] + dbitswap $4, $2 # CHECK: dbitswap $4, $2 # encoding: [0x7c,0x02,0x20,0x24] div $2,$3,$4 # CHECK: div $2, $3, $4 # encoding: [0x00,0x64,0x10,0x9a] divu $2,$3,$4 # CHECK: divu $2, $3, $4 # encoding: [0x00,0x64,0x10,0x9b] jialc $5, 256 # CHECK: jialc $5, 256 # encoding: [0xf8,0x05,0x01,0x00] From 5f45ba140eed1b9746b9c389b365661790797dcc Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Thu, 22 May 2014 11:37:38 +0000 Subject: [PATCH 066/906] [mips][mips64r6] Test that paired single instructions are invalid Summary: These emit the 'unknown instruction' instead of the correct error because they have not been implemented in LLVM for any MIPS ISA. Reviewers: jkolek, zoran.jovanovic, vmedic Reviewed By: vmedic Differential Revision: http://reviews.llvm.org/D3841 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209418 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips32r6InstrInfo.td | 1 - .../Mips/mips64r6/invalid-mips5-wrong-error.s | 44 +++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 test/MC/Mips/mips64r6/invalid-mips5-wrong-error.s diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td index 845c851b5ec5..33060e1f1c55 100644 --- a/lib/Target/Mips/Mips32r6InstrInfo.td +++ b/lib/Target/Mips/Mips32r6InstrInfo.td @@ -25,7 +25,6 @@ include "Mips32r6InstrFormats.td" // Reencoded: sdbbp // Reencoded: sdc2 // Reencoded: swc2 -// Removed: /.ps$/, cvt.ps.s, cvt.ps.pw // Removed: addi // Removed: bc1any2, bc1any4 // Removed: bc2[ft] diff --git a/test/MC/Mips/mips64r6/invalid-mips5-wrong-error.s b/test/MC/Mips/mips64r6/invalid-mips5-wrong-error.s new file mode 100644 index 000000000000..6b980e6ed9a7 --- /dev/null +++ b/test/MC/Mips/mips64r6/invalid-mips5-wrong-error.s @@ -0,0 +1,44 @@ +# Instructions that are invalid +# +# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 \ +# RUN: 2>%t1 +# RUN: FileCheck %s < %t1 + + .set noat + abs.ps $f22,$f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + add.ps $f25,$f27,$f13 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + alnv.ps $f12,$f18,$f30,$12 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + c.eq.ps $fcc5,$f0,$f9 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + c.f.ps $fcc6,$f11,$f11 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + c.le.ps $fcc1,$f7,$f20 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + c.lt.ps $f19,$f5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + c.nge.ps $f1,$f26 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + c.ngl.ps $f21,$f30 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + c.ngle.ps $fcc7,$f12,$f20 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + c.ngt.ps $fcc5,$f30,$f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + c.ole.ps $fcc7,$f21,$f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + c.olt.ps $fcc3,$f7,$f16 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + c.seq.ps $fcc6,$f31,$f14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + c.sf.ps $fcc6,$f4,$f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + c.ueq.ps $fcc1,$f5,$f29 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + c.ule.ps $fcc6,$f17,$f3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + c.ult.ps $fcc7,$f14,$f0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + c.un.ps $fcc4,$f2,$f26 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + cvt.ps.s $f3,$f18,$f19 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + cvt.ps.pw $f3,$f18 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + madd.ps $f22,$f3,$f14,$f3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + mov.ps $f22,$f17 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + movf.ps $f10,$f28,$fcc6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + movn.ps $f31,$f31,$s3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + movt.ps $f20,$f25,$fcc2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + movz.ps $f18,$f17,$ra # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + msub.ps $f12,$f14,$f29,$f17 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + mul.ps $f14,$f0,$f16 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + neg.ps $f19,$f13 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + nmadd.ps $f27,$f4,$f9,$f25 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + nmsub.ps $f6,$f12,$f14,$f17 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + pll.ps $f25,$f9,$f30 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + plu.ps $f1,$f26,$f29 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + pul.ps $f9,$f30,$f26 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + puu.ps $f24,$f9,$f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + sub.ps $f5,$f14,$f26 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction From e3ece90f24497f63cd058f497dae61c4daa27452 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Thu, 22 May 2014 11:42:31 +0000 Subject: [PATCH 067/906] [mips][mips64r6] addi is not available on MIPS32r6/MIPS64r6 Summary: Depends on D3787. Tablegen will raise an assertion without it. Reviewers: zoran.jovanovic, jkolek, vmedic Reviewed By: vmedic Differential Revision: http://reviews.llvm.org/D3842 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209419 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips32r6InstrInfo.td | 1 - lib/Target/Mips/MipsInstrInfo.td | 15 ++++++++++++++- test/MC/Mips/mips32r6/invalid-mips1.s | 8 ++++++++ test/MC/Mips/mips64r6/invalid-mips1.s | 8 ++++++++ 4 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 test/MC/Mips/mips32r6/invalid-mips1.s create mode 100644 test/MC/Mips/mips64r6/invalid-mips1.s diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td index 33060e1f1c55..a1a3f6bc8cf3 100644 --- a/lib/Target/Mips/Mips32r6InstrInfo.td +++ b/lib/Target/Mips/Mips32r6InstrInfo.td @@ -25,7 +25,6 @@ include "Mips32r6InstrFormats.td" // Reencoded: sdbbp // Reencoded: sdc2 // Reencoded: swc2 -// Removed: addi // Removed: bc1any2, bc1any4 // Removed: bc2[ft] // Removed: bc2f, bc2t diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 78cba614d6cf..dbcd67436901 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -166,6 +166,8 @@ def HasMips32r2 : Predicate<"Subtarget.hasMips32r2()">, AssemblerPredicate<"FeatureMips32r2">; def HasMips32r6 : Predicate<"Subtarget.hasMips32r6()">, AssemblerPredicate<"FeatureMips32r6">; +def NotMips32r6 : Predicate<"!Subtarget.hasMips32r6()">, + AssemblerPredicate<"!FeatureMips32r6">; def IsGP64bit : Predicate<"Subtarget.isGP64bit()">, AssemblerPredicate<"FeatureGP64Bit">; def IsGP32bit : Predicate<"!Subtarget.isGP64bit()">, @@ -176,6 +178,8 @@ def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">, AssemblerPredicate<"FeatureMips64r2">; def HasMips64r6 : Predicate<"Subtarget.hasMips64r6()">, AssemblerPredicate<"FeatureMips64r6">; +def NotMips64r6 : Predicate<"!Subtarget.hasMips64r6()">, + AssemblerPredicate<"!FeatureMips64r6">; def IsN64 : Predicate<"Subtarget.isABI_N64()">, AssemblerPredicate<"FeatureN64">; def InMips16Mode : Predicate<"Subtarget.inMips16Mode()">, @@ -211,6 +215,14 @@ class GPR_64 { list GPRPredicates = [IsGP64bit]; } // They are mutually exclusive. //===----------------------------------------------------------------------===// +// FIXME: I'd prefer to use additive predicates to build the instruction sets +// but we are short on assembler feature bits at the moment. Using a +// subtractive predicate will hopefully keep us under the 32 predicate +// limit long enough to develop an alternative way to handle P1||P2 +// predicates. +class ISA_MIPS1_NOT_32R6_64R6 { + list InsnPredicates = [NotMips32r6, NotMips64r6]; +} class ISA_MIPS2 { list InsnPredicates = [HasMips2]; } class ISA_MIPS3 { list InsnPredicates = [HasMips3]; } class ISA_MIPS32 { list InsnPredicates = [HasMips32]; } @@ -1000,7 +1012,8 @@ def LONG_BRANCH_ADDiu : PseudoSE<(outs GPR32Opnd:$dst), def ADDiu : MMRel, ArithLogicI<"addiu", simm16, GPR32Opnd, II_ADDIU, immSExt16, add>, ADDI_FM<0x9>, IsAsCheapAsAMove; -def ADDi : MMRel, ArithLogicI<"addi", simm16, GPR32Opnd>, ADDI_FM<0x8>; +def ADDi : MMRel, ArithLogicI<"addi", simm16, GPR32Opnd>, ADDI_FM<0x8>, + ISA_MIPS1_NOT_32R6_64R6; def SLTi : MMRel, SetCC_I<"slti", setlt, simm16, immSExt16, GPR32Opnd>, SLTI_FM<0xa>; def SLTiu : MMRel, SetCC_I<"sltiu", setult, simm16, immSExt16, GPR32Opnd>, diff --git a/test/MC/Mips/mips32r6/invalid-mips1.s b/test/MC/Mips/mips32r6/invalid-mips1.s new file mode 100644 index 000000000000..aa7d407cf185 --- /dev/null +++ b/test/MC/Mips/mips32r6/invalid-mips1.s @@ -0,0 +1,8 @@ +# Instructions that are invalid +# +# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 \ +# RUN: 2>%t1 +# RUN: FileCheck %s < %t1 + + .set noat + addi $13,$9,26322 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled diff --git a/test/MC/Mips/mips64r6/invalid-mips1.s b/test/MC/Mips/mips64r6/invalid-mips1.s new file mode 100644 index 000000000000..1225005ec84e --- /dev/null +++ b/test/MC/Mips/mips64r6/invalid-mips1.s @@ -0,0 +1,8 @@ +# Instructions that are invalid +# +# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 \ +# RUN: 2>%t1 +# RUN: FileCheck %s < %t1 + + .set noat + addi $13,$9,26322 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled From 00011c71f92328253102032abdb3acbd97fc9d4b Mon Sep 17 00:00:00 2001 From: Bradley Smith Date: Thu, 22 May 2014 11:44:34 +0000 Subject: [PATCH 068/906] Extend sys::getHostCPUFeatures to work on AArch64 platforms git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209420 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Host.cpp | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index e0e85630be6b..7e86e2ffcf28 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -686,7 +686,7 @@ StringRef sys::getHostCPUName() { } #endif -#if defined(__linux__) && defined(__arm__) +#if defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) bool sys::getHostCPUFeatures(StringMap &Features) { std::string Err; DataStreamer *DS = getDataFileStreamer("/proc/cpuinfo", &Err); @@ -715,8 +715,24 @@ bool sys::getHostCPUFeatures(StringMap &Features) { break; } +#if defined(__aarch64__) + // Keep track of which crypto features we have seen + enum { + HWCAP_AES = 0x1, + HWCAP_PMULL = 0x2, + HWCAP_SHA1 = 0x4, + HWCAP_SHA2 = 0x8 + }; + uint32_t crypto = 0; +#endif + for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { StringRef LLVMFeatureStr = StringSwitch(CPUFeatures[I]) +#if defined(__aarch64__) + .Case("asimd", "neon") + .Case("fp", "fp-armv8") + .Case("crc32", "crc") +#else .Case("half", "fp16") .Case("neon", "neon") .Case("vfpv3", "vfp3") @@ -724,12 +740,32 @@ bool sys::getHostCPUFeatures(StringMap &Features) { .Case("vfpv4", "vfp4") .Case("idiva", "hwdiv-arm") .Case("idivt", "hwdiv") +#endif .Default(""); +#if defined(__aarch64__) + // We need to check crypto seperately since we need all of the crypto + // extensions to enable the subtarget feature + if (CPUFeatures[I] == "aes") + crypto |= HWCAP_AES; + else if (CPUFeatures[I] == "pmull") + crypto |= HWCAP_PMULL; + else if (CPUFeatures[I] == "sha1") + crypto |= HWCAP_SHA1; + else if (CPUFeatures[I] == "sha2") + crypto |= HWCAP_SHA2; +#endif + if (LLVMFeatureStr != "") Features.GetOrCreateValue(LLVMFeatureStr).setValue(true); } +#if defined(__aarch64__) + // If we have all crypto bits we can add the feature + if (crypto == (HWCAP_AES | HWCAP_PMULL | HWCAP_SHA1 | HWCAP_SHA2)) + Features.GetOrCreateValue("crypto").setValue(true); +#endif + return true; } #else From 8afb08e5b5536a76bf359c0a75ffc1738d2ad1dc Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Thu, 22 May 2014 11:46:58 +0000 Subject: [PATCH 069/906] [mips] Use addiu in inline assembly tests since addi is not available in all ISA's Summary: This patch is necessary so that they do not fail on MIPS32r6/MIPS64r6 when -integrated-as is enabled by default and we correctly detect the host CPU. No functional change since these tests are testing the behaviour of the constraint used for the third operand rather than the mnemonic. Depends on D3842 Reviewers: zoran.jovanovic, jkolek, vmedic Reviewed By: vmedic Differential Revision: http://reviews.llvm.org/D3843 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209421 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../CodeGen/Mips/inlineasm-cnstrnt-bad-I-1.ll | 2 +- test/CodeGen/Mips/inlineasm-cnstrnt-bad-J.ll | 2 +- test/CodeGen/Mips/inlineasm-cnstrnt-bad-L.ll | 2 +- test/CodeGen/Mips/inlineasm-cnstrnt-bad-N.ll | 2 +- test/CodeGen/Mips/inlineasm-cnstrnt-bad-O.ll | 2 +- test/CodeGen/Mips/inlineasm-cnstrnt-bad-P.ll | 2 +- test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll | 16 +++++------ test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll | 4 +-- test/CodeGen/Mips/inlineasm-operand-code.ll | 28 +++++++++---------- test/CodeGen/Mips/inlineasm_constraint.ll | 24 ++++++++-------- 10 files changed, 42 insertions(+), 42 deletions(-) diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-I-1.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-I-1.ll index f9e53cbb07a4..c09108dc0744 100644 --- a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-I-1.ll +++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-I-1.ll @@ -9,7 +9,7 @@ define i32 @main() nounwind { entry: ;CHECK-ERRORS: error: invalid operand for inline asm constraint 'I' - tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,I"(i32 7, i32 1048576) nounwind + tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,I"(i32 7, i32 1048576) nounwind ret i32 0 } diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-J.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-J.ll index 1fdf672fe197..2b24b0f82c57 100644 --- a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-J.ll +++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-J.ll @@ -10,7 +10,7 @@ entry: ;CHECK-ERRORS: error: invalid operand for inline asm constraint 'J' - tail call i32 asm "addi $0,$1,$2", "=r,r,J"(i32 1024, i32 3) nounwind + tail call i32 asm "addiu $0,$1,$2", "=r,r,J"(i32 1024, i32 3) nounwind ret i32 0 } diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-L.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-L.ll index 49dcc8745857..5edb3e24674e 100644 --- a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-L.ll +++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-L.ll @@ -10,7 +10,7 @@ entry: ;CHECK-ERRORS: error: invalid operand for inline asm constraint 'L' - tail call i32 asm "addi $0,$1,$2", "=r,r,L"(i32 7, i32 1048579) nounwind + tail call i32 asm "addiu $0,$1,$2", "=r,r,L"(i32 7, i32 1048579) nounwind ret i32 0 } diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-N.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-N.ll index 770669d913e8..eaa540acdafa 100644 --- a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-N.ll +++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-N.ll @@ -11,7 +11,7 @@ entry: ;CHECK-ERRORS: error: invalid operand for inline asm constraint 'N' - tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,N"(i32 7, i32 3) nounwind + tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,N"(i32 7, i32 3) nounwind ret i32 0 } diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-O.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-O.ll index cd4431ac5265..56afbaaa9cd6 100644 --- a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-O.ll +++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-O.ll @@ -11,6 +11,6 @@ entry: ;CHECK-ERRORS: error: invalid operand for inline asm constraint 'O' - tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,O"(i32 undef, i32 16384) nounwind + tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,O"(i32 undef, i32 16384) nounwind ret i32 0 } diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-P.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-P.ll index 0a4739ebb96b..0a55cb55e5f2 100644 --- a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-P.ll +++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-P.ll @@ -11,6 +11,6 @@ entry: ;CHECK-ERRORS: error: invalid operand for inline asm constraint 'P' - tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,P"(i32 undef, i32 655536) nounwind + tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,P"(i32 undef, i32 655536) nounwind ret i32 0 } diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll index 94ded307fda9..9464918063f8 100644 --- a/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll +++ b/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll @@ -7,27 +7,27 @@ entry: ; r with char ;CHECK: #APP -;CHECK: addi ${{[0-9]+}},${{[0-9]+}},23 +;CHECK: addiu ${{[0-9]+}},${{[0-9]+}},23 ;CHECK: #NO_APP - tail call i8 asm sideeffect "addi $0,$1,$2", "=r,r,n"(i8 27, i8 23) nounwind + tail call i8 asm sideeffect "addiu $0,$1,$2", "=r,r,n"(i8 27, i8 23) nounwind ; r with short ;CHECK: #APP -;CHECK: addi ${{[0-9]+}},${{[0-9]+}},13 +;CHECK: addiu ${{[0-9]+}},${{[0-9]+}},13 ;CHECK: #NO_APP - tail call i16 asm sideeffect "addi $0,$1,$2", "=r,r,n"(i16 17, i16 13) nounwind + tail call i16 asm sideeffect "addiu $0,$1,$2", "=r,r,n"(i16 17, i16 13) nounwind ; r with int ;CHECK: #APP -;CHECK: addi ${{[0-9]+}},${{[0-9]+}},3 +;CHECK: addiu ${{[0-9]+}},${{[0-9]+}},3 ;CHECK: #NO_APP - tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,n"(i32 7, i32 3) nounwind + tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,n"(i32 7, i32 3) nounwind ; Now c with 1024: make sure register $25 is picked ; CHECK: #APP -; CHECK: addi $25,${{[0-9]+}},1024 +; CHECK: addiu $25,${{[0-9]+}},1024 ; CHECK: #NO_APP - tail call i32 asm sideeffect "addi $0,$1,$2", "=c,c,I"(i32 4194304, i32 1024) nounwind + tail call i32 asm sideeffect "addiu $0,$1,$2", "=c,c,I"(i32 4194304, i32 1024) nounwind ; Now l with 1024: make sure register lo is picked. We do this by checking the instruction ; after the inline expression for a mflo to pull the value out of lo. diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll index 787066602575..a7ba762b1064 100644 --- a/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll +++ b/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll @@ -12,9 +12,9 @@ entry: ; r with long long ;CHECK: #APP -;CHECK: addi ${{[0-9]+}},${{[0-9]+}},3 +;CHECK: addiu ${{[0-9]+}},${{[0-9]+}},3 ;CHECK: #NO_APP - tail call i64 asm sideeffect "addi $0,$1,$2", "=r,r,i"(i64 7, i64 3) nounwind + tail call i64 asm sideeffect "addiu $0,$1,$2", "=r,r,i"(i64 7, i64 3) nounwind ret i32 0 } diff --git a/test/CodeGen/Mips/inlineasm-operand-code.ll b/test/CodeGen/Mips/inlineasm-operand-code.ll index 7bb4adc31bd8..6512851a11be 100644 --- a/test/CodeGen/Mips/inlineasm-operand-code.ll +++ b/test/CodeGen/Mips/inlineasm-operand-code.ll @@ -12,9 +12,9 @@ define i32 @constraint_X() nounwind { entry: ;CHECK_LITTLE_32-LABEL: constraint_X: ;CHECK_LITTLE_32: #APP -;CHECK_LITTLE_32: addi ${{[0-9]+}},${{[0-9]+}},0xfffffffffffffffd +;CHECK_LITTLE_32: addiu ${{[0-9]+}},${{[0-9]+}},0xfffffffffffffffd ;CHECK_LITTLE_32: #NO_APP - tail call i32 asm sideeffect "addi $0,$1,${2:X}", "=r,r,I"(i32 7, i32 -3) ; + tail call i32 asm sideeffect "addiu $0,$1,${2:X}", "=r,r,I"(i32 7, i32 -3) ; ret i32 0 } @@ -23,9 +23,9 @@ define i32 @constraint_x() nounwind { entry: ;CHECK_LITTLE_32-LABEL: constraint_x: ;CHECK_LITTLE_32: #APP -;CHECK_LITTLE_32: addi ${{[0-9]+}},${{[0-9]+}},0xfffd +;CHECK_LITTLE_32: addiu ${{[0-9]+}},${{[0-9]+}},0xfffd ;CHECK_LITTLE_32: #NO_APP - tail call i32 asm sideeffect "addi $0,$1,${2:x}", "=r,r,I"(i32 7, i32 -3) ; + tail call i32 asm sideeffect "addiu $0,$1,${2:x}", "=r,r,I"(i32 7, i32 -3) ; ret i32 0 } @@ -34,9 +34,9 @@ define i32 @constraint_d() nounwind { entry: ;CHECK_LITTLE_32-LABEL: constraint_d: ;CHECK_LITTLE_32: #APP -;CHECK_LITTLE_32: addi ${{[0-9]+}},${{[0-9]+}},-3 +;CHECK_LITTLE_32: addiu ${{[0-9]+}},${{[0-9]+}},-3 ;CHECK_LITTLE_32: #NO_APP - tail call i32 asm sideeffect "addi $0,$1,${2:d}", "=r,r,I"(i32 7, i32 -3) ; + tail call i32 asm sideeffect "addiu $0,$1,${2:d}", "=r,r,I"(i32 7, i32 -3) ; ret i32 0 } @@ -45,9 +45,9 @@ define i32 @constraint_m() nounwind { entry: ;CHECK_LITTLE_32-LABEL: constraint_m: ;CHECK_LITTLE_32: #APP -;CHECK_LITTLE_32: addi ${{[0-9]+}},${{[0-9]+}},-4 +;CHECK_LITTLE_32: addiu ${{[0-9]+}},${{[0-9]+}},-4 ;CHECK_LITTLE_32: #NO_APP - tail call i32 asm sideeffect "addi $0,$1,${2:m}", "=r,r,I"(i32 7, i32 -3) ; + tail call i32 asm sideeffect "addiu $0,$1,${2:m}", "=r,r,I"(i32 7, i32 -3) ; ret i32 0 } @@ -56,15 +56,15 @@ define i32 @constraint_z() nounwind { entry: ;CHECK_LITTLE_32-LABEL: constraint_z: ;CHECK_LITTLE_32: #APP -;CHECK_LITTLE_32: addi ${{[0-9]+}},${{[0-9]+}},-3 +;CHECK_LITTLE_32: addiu ${{[0-9]+}},${{[0-9]+}},-3 ;CHECK_LITTLE_32: #NO_APP - tail call i32 asm sideeffect "addi $0,$1,${2:z}", "=r,r,I"(i32 7, i32 -3) ; + tail call i32 asm sideeffect "addiu $0,$1,${2:z}", "=r,r,I"(i32 7, i32 -3) ; ; z with 0 ;CHECK_LITTLE_32: #APP -;CHECK_LITTLE_32: addi ${{[0-9]+}},${{[0-9]+}},$0 +;CHECK_LITTLE_32: addiu ${{[0-9]+}},${{[0-9]+}},$0 ;CHECK_LITTLE_32: #NO_APP - tail call i32 asm sideeffect "addi $0,$1,${2:z}", "=r,r,I"(i32 7, i32 0) nounwind + tail call i32 asm sideeffect "addiu $0,$1,${2:z}", "=r,r,I"(i32 7, i32 0) nounwind ret i32 0 } @@ -73,9 +73,9 @@ define i32 @constraint_longlong() nounwind { entry: ;CHECK_LITTLE_32-LABEL: constraint_longlong: ;CHECK_LITTLE_32: #APP -;CHECK_LITTLE_32: addi ${{[0-9]+}},${{[0-9]+}},3 +;CHECK_LITTLE_32: addiu ${{[0-9]+}},${{[0-9]+}},3 ;CHECK_LITTLE_32: #NO_APP - tail call i64 asm sideeffect "addi $0,$1,$2 \0A\09", "=r,r,X"(i64 1229801703532086340, i64 3) nounwind + tail call i64 asm sideeffect "addiu $0,$1,$2 \0A\09", "=r,r,X"(i64 1229801703532086340, i64 3) nounwind ret i32 0 } diff --git a/test/CodeGen/Mips/inlineasm_constraint.ll b/test/CodeGen/Mips/inlineasm_constraint.ll index 8d30f45d84e3..8701bf43fdc0 100644 --- a/test/CodeGen/Mips/inlineasm_constraint.ll +++ b/test/CodeGen/Mips/inlineasm_constraint.ll @@ -5,21 +5,21 @@ entry: ; First I with short ; CHECK: #APP -; CHECK: addi ${{[0-9]+}},${{[0-9]+}},4096 +; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},4096 ; CHECK: #NO_APP - tail call i16 asm sideeffect "addi $0,$1,$2", "=r,r,I"(i16 7, i16 4096) nounwind + tail call i16 asm sideeffect "addiu $0,$1,$2", "=r,r,I"(i16 7, i16 4096) nounwind ; Then I with int ; CHECK: #APP -; CHECK: addi ${{[0-9]+}},${{[0-9]+}},-3 +; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},-3 ; CHECK: #NO_APP - tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,I"(i32 7, i32 -3) nounwind + tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,I"(i32 7, i32 -3) nounwind ; Now J with 0 ; CHECK: #APP -; CHECK: addi ${{[0-9]+}},${{[0-9]+}},0 +; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},0 ; CHECK: #NO_APP - tail call i32 asm sideeffect "addi $0,$1,$2\0A\09 ", "=r,r,J"(i32 7, i16 0) nounwind + tail call i32 asm sideeffect "addiu $0,$1,$2\0A\09 ", "=r,r,J"(i32 7, i16 0) nounwind ; Now K with 64 ; CHECK: #APP @@ -35,21 +35,21 @@ entry: ; Now N with -3 ; CHECK: #APP -; CHECK: addi ${{[0-9]+}},${{[0-9]+}},-3 +; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},-3 ; CHECK: #NO_APP - tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,N"(i32 7, i32 -3) nounwind + tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,N"(i32 7, i32 -3) nounwind ; Now O with -3 ; CHECK: #APP -; CHECK: addi ${{[0-9]+}},${{[0-9]+}},-3 +; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},-3 ; CHECK: #NO_APP - tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,O"(i32 7, i16 -3) nounwind + tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,O"(i32 7, i16 -3) nounwind ; Now P with 65535 ; CHECK: #APP -; CHECK: addi ${{[0-9]+}},${{[0-9]+}},65535 +; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},65535 ; CHECK: #NO_APP - tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,P"(i32 7, i32 65535) nounwind + tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,P"(i32 7, i32 65535) nounwind ; Now R Which takes the address of c %c = alloca i32, align 4 From 98eba923346615a9b2896b5a1f2377def1b73c35 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Thu, 22 May 2014 11:51:06 +0000 Subject: [PATCH 070/906] [mips] Change lwl and lwr in inlineasm_constraint.ll to lw Summary: lwl and lwr are not available in MIPS32r6/MIPS64r6. The purpose of the test is to check that the '$1' expands to '0($x)' rather than to test something related to the lwl or lwr instructions so we can simply switch to lw. Depends on D3842 Reviewers: jkolek, zoran.jovanovic, vmedic Reviewed By: vmedic Differential Revision: http://reviews.llvm.org/D3844 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209423 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/Mips/inlineasm_constraint.ll | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/CodeGen/Mips/inlineasm_constraint.ll b/test/CodeGen/Mips/inlineasm_constraint.ll index 8701bf43fdc0..76b73dc276ae 100644 --- a/test/CodeGen/Mips/inlineasm_constraint.ll +++ b/test/CodeGen/Mips/inlineasm_constraint.ll @@ -54,10 +54,10 @@ entry: ; Now R Which takes the address of c %c = alloca i32, align 4 store i32 -4469539, i32* %c, align 4 - %8 = call i32 asm sideeffect "lwl $0, 1 + $1\0A\09lwr $0, 2 + $1\0A\09", "=r,*R"(i32* %c) #1 + %8 = call i32 asm sideeffect "lw $0, 1 + $1\0A\09lw $0, 2 + $1\0A\09", "=r,*R"(i32* %c) #1 ; CHECK: #APP -; CHECK: lwl ${{[0-9]+}}, 1 + 0(${{[0-9]+}}) -; CHECK: lwr ${{[0-9]+}}, 2 + 0(${{[0-9]+}}) +; CHECK: lw ${{[0-9]+}}, 1 + 0(${{[0-9]+}}) +; CHECK: lw ${{[0-9]+}}, 2 + 0(${{[0-9]+}}) ; CHECK: #NO_APP ret i32 0 From b08e03806f019366da41c75173b9358106a22edc Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Thu, 22 May 2014 11:55:04 +0000 Subject: [PATCH 071/906] [mips] Make unalignedload.ll test stricter and easier to modify for MIPS32r6/MIPS64r6 Summary: * Split into two functions, one to test each struct. * R0 and R2 must be defined by an lw with a %got reference to the correct symbol. * Test for $4 (first argument) where appropriate instead of accepting any register. * Test that the two lbu's are correctly combined into $4 Depends on D3844 Reviewers: jkolek, zoran.jovanovic, vmedic Reviewed By: vmedic Differential Revision: http://reviews.llvm.org/D3845 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209424 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/Mips/unalignedload.ll | 41 +++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/test/CodeGen/Mips/unalignedload.ll b/test/CodeGen/Mips/unalignedload.ll index 19f3af7f344a..e86b1bae113d 100644 --- a/test/CodeGen/Mips/unalignedload.ll +++ b/test/CodeGen/Mips/unalignedload.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=mipsel | FileCheck %s -check-prefix=CHECK-EL -; RUN: llc < %s -march=mips | FileCheck %s -check-prefix=CHECK-EB +; RUN: llc < %s -march=mipsel | FileCheck %s -check-prefix=ALL -check-prefix=CHECK-EL +; RUN: llc < %s -march=mips | FileCheck %s -check-prefix=ALL -check-prefix=CHECK-EB %struct.S2 = type { %struct.S1, %struct.S1 } %struct.S1 = type { i8, i8 } %struct.S4 = type { [7 x i8] } @@ -7,21 +7,38 @@ @s2 = common global %struct.S2 zeroinitializer, align 1 @s4 = common global %struct.S4 zeroinitializer, align 1 -define void @foo1() nounwind { +define void @bar1() nounwind { entry: -; CHECK-EL-DAG: lbu ${{[0-9]+}}, 2($[[R0:[0-9]+]]) -; CHECK-EL-DAG: lbu ${{[0-9]+}}, 3($[[R0]]) -; CHECK-EL: jalr -; CHECK-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[R2:[0-9]+]]) +; ALL-LABEL: bar1: + +; ALL-DAG: lw $[[R0:[0-9]+]], %got(s2)( + +; ALL-DAG: lbu $[[PART1:[0-9]+]], 2($[[R0]]) +; ALL-DAG: lbu $[[PART2:[0-9]+]], 3($[[R0]]) + +; CHECK-EL-DAG: sll $[[T0:[0-9]+]], $[[PART2]], 8 +; CHECK-EL-DAG: or $4, $[[T0]], $[[PART1]] + +; CHECK-EB-DAG: sll $[[T0:[0-9]+]], $[[PART1]], 8 +; CHECK-EB-DAG: or $[[T1:[0-9]+]], $[[T0]], $[[PART2]] +; CHECK-EB-DAG: sll $4, $[[T1]], 16 + + tail call void @foo2(%struct.S1* byval getelementptr inbounds (%struct.S2* @s2, i32 0, i32 1)) nounwind + ret void +} + +define void @bar2() nounwind { +entry: +; ALL-LABEL: bar2: + +; ALL-DAG: lw $[[R2:[0-9]+]], %got(s4)( + +; CHECK-EL-DAG: lwl $[[R1:4]], 3($[[R2]]) ; CHECK-EL-DAG: lwr $[[R1]], 0($[[R2]]) -; CHECK-EB-DAG: lbu ${{[0-9]+}}, 3($[[R0:[0-9]+]]) -; CHECK-EB-DAG: lbu ${{[0-9]+}}, 2($[[R0]]) -; CHECK-EB: jalr -; CHECK-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[R2:[0-9]+]]) +; CHECK-EB-DAG: lwl $[[R1:4]], 0($[[R2]]) ; CHECK-EB-DAG: lwr $[[R1]], 3($[[R2]]) - tail call void @foo2(%struct.S1* byval getelementptr inbounds (%struct.S2* @s2, i32 0, i32 1)) nounwind tail call void @foo4(%struct.S4* byval @s4) nounwind ret void } From e072ed71c87ba1be56f1bca1a1a8057760badea0 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 22 May 2014 11:56:09 +0000 Subject: [PATCH 072/906] ARM64: separate load/store operands to simplify assembler This changes ARM64 to use separate operands for each component of an address, and look for separate '[', '$Rn, ..., ']' tokens when parsing. This allows us to do away with quite a bit of special C++ code to handle monolithic "addressing modes" in the MC components. The more incremental matching of the assembler operands also allows for better diagnostics when LLVM is presented with invalid input. Most of the complexity here is with the register-offset instructions, which were extremely dodgy beforehand: even when the instruction used wM, LLVM's model had xM as an operand. We papered over this discrepancy before, but that approach doesn't work now so I split them into separate X and W variants. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209425 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM64/ARM64ISelDAGToDAG.cpp | 177 +- lib/Target/ARM64/ARM64InstrAtomics.td | 296 +-- lib/Target/ARM64/ARM64InstrFormats.td | 1618 ++++++++-------- lib/Target/ARM64/ARM64InstrInfo.cpp | 70 +- lib/Target/ARM64/ARM64InstrInfo.td | 1707 +++++++++-------- lib/Target/ARM64/ARM64RegisterInfo.td | 10 + lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp | 915 +++------ .../ARM64/Disassembler/ARM64Disassembler.cpp | 88 +- .../ARM64/InstPrinter/ARM64InstPrinter.cpp | 71 +- .../ARM64/InstPrinter/ARM64InstPrinter.h | 35 +- .../ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp | 39 +- test/MC/AArch64/basic-a64-diagnostics.s | 165 +- test/MC/AArch64/neon-diagnostics.s | 12 +- test/MC/ARM64/diags.s | 30 +- utils/TableGen/CodeGenDAGPatterns.cpp | 6 +- 15 files changed, 2537 insertions(+), 2702 deletions(-) diff --git a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp index ce4203f321cd..8fec6f02b768 100644 --- a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp +++ b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp @@ -111,27 +111,18 @@ class ARM64DAGToDAGISel : public SelectionDAGISel { return SelectAddrModeUnscaled(N, 16, Base, OffImm); } - bool SelectAddrModeRO8(SDValue N, SDValue &Base, SDValue &Offset, - SDValue &Imm) { - return SelectAddrModeRO(N, 1, Base, Offset, Imm); + template + bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, + SDValue &SignExtend, SDValue &DoShift) { + return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift); } - bool SelectAddrModeRO16(SDValue N, SDValue &Base, SDValue &Offset, - SDValue &Imm) { - return SelectAddrModeRO(N, 2, Base, Offset, Imm); - } - bool SelectAddrModeRO32(SDValue N, SDValue &Base, SDValue &Offset, - SDValue &Imm) { - return SelectAddrModeRO(N, 4, Base, Offset, Imm); - } - bool SelectAddrModeRO64(SDValue N, SDValue &Base, SDValue &Offset, - SDValue &Imm) { - return SelectAddrModeRO(N, 8, Base, Offset, Imm); - } - bool SelectAddrModeRO128(SDValue N, SDValue &Base, SDValue &Offset, - SDValue &Imm) { - return SelectAddrModeRO(N, 16, Base, Offset, Imm); + + template + bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset, + SDValue &SignExtend, SDValue &DoShift) { + return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); } - bool SelectAddrModeNoIndex(SDValue N, SDValue &Val); + /// Form sequences of consecutive 64/128-bit registers for use in NEON /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have @@ -179,11 +170,15 @@ class ARM64DAGToDAGISel : public SelectionDAGISel { SDValue &OffImm); bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, SDValue &OffImm); - bool SelectAddrModeRO(SDValue N, unsigned Size, SDValue &Base, - SDValue &Offset, SDValue &Imm); + bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, + SDValue &Offset, SDValue &SignExtend, + SDValue &DoShift); + bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, + SDValue &Offset, SDValue &SignExtend, + SDValue &DoShift); bool isWorthFolding(SDValue V) const; - bool SelectExtendedSHL(SDValue N, unsigned Size, SDValue &Offset, - SDValue &Imm); + bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, + SDValue &Offset, SDValue &SignExtend); template bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { @@ -219,14 +214,6 @@ static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, isIntImmediate(N->getOperand(1).getNode(), Imm); } -bool ARM64DAGToDAGISel::SelectAddrModeNoIndex(SDValue N, SDValue &Val) { - EVT ValTy = N.getValueType(); - if (ValTy != MVT::i64) - return false; - Val = N; - return true; -} - bool ARM64DAGToDAGISel::SelectInlineAsmMemoryOperand( const SDValue &Op, char ConstraintCode, std::vector &OutOps) { assert(ConstraintCode == 'm' && "unexpected asm memory constraint"); @@ -563,8 +550,8 @@ bool ARM64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, // if we're folding a (sext i8), we need the RHS to be a GPR32, even though // there might not be an actual 32-bit value in the program. We can // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. - if (Reg.getValueType() == MVT::i64 && Ext != ARM64_AM::UXTX && - Ext != ARM64_AM::SXTX) { + assert(Ext != ARM64_AM::UXTX && Ext != ARM64_AM::SXTX); + if (Reg.getValueType() == MVT::i64) { SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32); MachineSDNode *Node = CurDAG->getMachineNode( TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32, Reg, SubReg); @@ -675,47 +662,44 @@ static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { return SDValue(Node, 0); } -static SDValue WidenIfNeeded(SelectionDAG *CurDAG, SDValue N) { - if (N.getValueType() == MVT::i32) { - return Widen(CurDAG, N); - } - - return N; -} - /// \brief Check if the given SHL node (\p N), can be used to form an /// extended register for an addressing mode. bool ARM64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, - SDValue &Offset, SDValue &Imm) { + bool WantExtend, SDValue &Offset, + SDValue &SignExtend) { assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); ConstantSDNode *CSD = dyn_cast(N.getOperand(1)); - if (CSD && (CSD->getZExtValue() & 0x7) == CSD->getZExtValue()) { + if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) + return false; + if (WantExtend) { ARM64_AM::ShiftExtendType Ext = getExtendTypeForNode(N.getOperand(0), true); - if (Ext == ARM64_AM::InvalidShiftExtend) { - Ext = ARM64_AM::UXTX; - Offset = WidenIfNeeded(CurDAG, N.getOperand(0)); - } else { - Offset = WidenIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); - } - - unsigned LegalShiftVal = Log2_32(Size); - unsigned ShiftVal = CSD->getZExtValue(); - - if (ShiftVal != 0 && ShiftVal != LegalShiftVal) + if (Ext == ARM64_AM::InvalidShiftExtend) return false; - Imm = CurDAG->getTargetConstant( - ARM64_AM::getMemExtendImm(Ext, ShiftVal != 0), MVT::i32); - if (isWorthFolding(N)) - return true; + Offset = N.getOperand(0).getOperand(0); + SignExtend = CurDAG->getTargetConstant(Ext == ARM64_AM::SXTW, MVT::i32); + } else { + Offset = N.getOperand(0); + SignExtend = CurDAG->getTargetConstant(0, MVT::i32); } + + unsigned LegalShiftVal = Log2_32(Size); + unsigned ShiftVal = CSD->getZExtValue(); + + if (ShiftVal != 0 && ShiftVal != LegalShiftVal) + return false; + + if (isWorthFolding(N)) + return true; + return false; } -bool ARM64DAGToDAGISel::SelectAddrModeRO(SDValue N, unsigned Size, +bool ARM64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, SDValue &Offset, - SDValue &Imm) { + SDValue &SignExtend, + SDValue &DoShift) { if (N.getOpcode() != ISD::ADD) return false; SDValue LHS = N.getOperand(0); @@ -740,26 +724,30 @@ bool ARM64DAGToDAGISel::SelectAddrModeRO(SDValue N, unsigned Size, // Try to match a shifted extend on the RHS. if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && - SelectExtendedSHL(RHS, Size, Offset, Imm)) { + SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { Base = LHS; + DoShift = CurDAG->getTargetConstant(true, MVT::i32); return true; } // Try to match a shifted extend on the LHS. if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && - SelectExtendedSHL(LHS, Size, Offset, Imm)) { + SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { Base = RHS; + DoShift = CurDAG->getTargetConstant(true, MVT::i32); return true; } - ARM64_AM::ShiftExtendType Ext = ARM64_AM::UXTX; + // There was no shift, whatever else we find. + DoShift = CurDAG->getTargetConstant(false, MVT::i32); + + ARM64_AM::ShiftExtendType Ext = ARM64_AM::InvalidShiftExtend; // Try to match an unshifted extend on the LHS. if (IsExtendedRegisterWorthFolding && (Ext = getExtendTypeForNode(LHS, true)) != ARM64_AM::InvalidShiftExtend) { Base = RHS; - Offset = WidenIfNeeded(CurDAG, LHS.getOperand(0)); - Imm = CurDAG->getTargetConstant(ARM64_AM::getMemExtendImm(Ext, false), - MVT::i32); + Offset = LHS.getOperand(0); + SignExtend = CurDAG->getTargetConstant(Ext == ARM64_AM::SXTW, MVT::i32); if (isWorthFolding(LHS)) return true; } @@ -768,19 +756,62 @@ bool ARM64DAGToDAGISel::SelectAddrModeRO(SDValue N, unsigned Size, if (IsExtendedRegisterWorthFolding && (Ext = getExtendTypeForNode(RHS, true)) != ARM64_AM::InvalidShiftExtend) { Base = LHS; - Offset = WidenIfNeeded(CurDAG, RHS.getOperand(0)); - Imm = CurDAG->getTargetConstant(ARM64_AM::getMemExtendImm(Ext, false), - MVT::i32); + Offset = RHS.getOperand(0); + SignExtend = CurDAG->getTargetConstant(Ext == ARM64_AM::SXTW, MVT::i32); if (isWorthFolding(RHS)) return true; } + return false; +} + +bool ARM64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, + SDValue &Base, SDValue &Offset, + SDValue &SignExtend, + SDValue &DoShift) { + if (N.getOpcode() != ISD::ADD) + return false; + SDValue LHS = N.getOperand(0); + SDValue RHS = N.getOperand(1); + + // We don't want to match immediate adds here, because they are better lowered + // to the register-immediate addressing modes. + if (isa(LHS) || isa(RHS)) + return false; + + // Check if this particular node is reused in any non-memory related + // operation. If yes, do not try to fold this node into the address + // computation, since the computation will be kept. + const SDNode *Node = N.getNode(); + for (SDNode *UI : Node->uses()) { + if (!isa(*UI)) + return false; + } + + // Remember if it is worth folding N when it produces extended register. + bool IsExtendedRegisterWorthFolding = isWorthFolding(N); + + // Try to match a shifted extend on the RHS. + if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && + SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { + Base = LHS; + DoShift = CurDAG->getTargetConstant(true, MVT::i32); + return true; + } + + // Try to match a shifted extend on the LHS. + if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && + SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { + Base = RHS; + DoShift = CurDAG->getTargetConstant(true, MVT::i32); + return true; + } + // Match any non-shifted, non-extend, non-immediate add expression. Base = LHS; - Offset = WidenIfNeeded(CurDAG, RHS); - Ext = ARM64_AM::UXTX; - Imm = CurDAG->getTargetConstant(ARM64_AM::getMemExtendImm(Ext, false), - MVT::i32); + Offset = RHS; + SignExtend = CurDAG->getTargetConstant(false, MVT::i32); + DoShift = CurDAG->getTargetConstant(false, MVT::i32); // Reg1 + Reg2 is free: no check needed. return true; } diff --git a/lib/Target/ARM64/ARM64InstrAtomics.td b/lib/Target/ARM64/ARM64InstrAtomics.td index 989e7a2e74ad..1d1483ac126c 100644 --- a/lib/Target/ARM64/ARM64InstrAtomics.td +++ b/lib/Target/ARM64/ARM64InstrAtomics.td @@ -43,39 +43,63 @@ class relaxed_load // 8-bit loads def : Pat<(acquiring_load GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>; -def : Pat<(relaxed_load ro_indexed8:$addr), - (LDRBBro ro_indexed8:$addr)>; -def : Pat<(relaxed_load am_indexed8:$addr), - (LDRBBui am_indexed8:$addr)>; -def : Pat<(relaxed_load am_unscaled8:$addr), - (LDURBBi am_unscaled8:$addr)>; +def : Pat<(relaxed_load (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend8:$offset)), + (LDRBBroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$offset)>; +def : Pat<(relaxed_load (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend8:$offset)), + (LDRBBroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$offset)>; +def : Pat<(relaxed_load (am_indexed8 GPR64sp:$Rn, + uimm12s1:$offset)), + (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; +def : Pat<(relaxed_load + (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), + (LDURBBi GPR64sp:$Rn, simm9:$offset)>; // 16-bit loads def : Pat<(acquiring_load GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>; -def : Pat<(relaxed_load ro_indexed16:$addr), - (LDRHHro ro_indexed16:$addr)>; -def : Pat<(relaxed_load am_indexed16:$addr), - (LDRHHui am_indexed16:$addr)>; -def : Pat<(relaxed_load am_unscaled16:$addr), - (LDURHHi am_unscaled16:$addr)>; +def : Pat<(relaxed_load (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend)), + (LDRHHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)>; +def : Pat<(relaxed_load (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend)), + (LDRHHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend)>; +def : Pat<(relaxed_load (am_indexed16 GPR64sp:$Rn, + uimm12s2:$offset)), + (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; +def : Pat<(relaxed_load + (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), + (LDURHHi GPR64sp:$Rn, simm9:$offset)>; // 32-bit loads def : Pat<(acquiring_load GPR64sp:$ptr), (LDARW GPR64sp:$ptr)>; -def : Pat<(relaxed_load ro_indexed32:$addr), - (LDRWro ro_indexed32:$addr)>; -def : Pat<(relaxed_load am_indexed32:$addr), - (LDRWui am_indexed32:$addr)>; -def : Pat<(relaxed_load am_unscaled32:$addr), - (LDURWi am_unscaled32:$addr)>; +def : Pat<(relaxed_load (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend32:$extend)), + (LDRWroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>; +def : Pat<(relaxed_load (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend32:$extend)), + (LDRWroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>; +def : Pat<(relaxed_load (am_indexed32 GPR64sp:$Rn, + uimm12s4:$offset)), + (LDRWui GPR64sp:$Rn, uimm12s4:$offset)>; +def : Pat<(relaxed_load + (am_unscaled32 GPR64sp:$Rn, simm9:$offset)), + (LDURWi GPR64sp:$Rn, simm9:$offset)>; // 64-bit loads def : Pat<(acquiring_load GPR64sp:$ptr), (LDARX GPR64sp:$ptr)>; -def : Pat<(relaxed_load ro_indexed64:$addr), - (LDRXro ro_indexed64:$addr)>; -def : Pat<(relaxed_load am_indexed64:$addr), - (LDRXui am_indexed64:$addr)>; -def : Pat<(relaxed_load am_unscaled64:$addr), - (LDURXi am_unscaled64:$addr)>; +def : Pat<(relaxed_load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend64:$extend)), + (LDRXroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; +def : Pat<(relaxed_load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend64:$extend)), + (LDRXroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; +def : Pat<(relaxed_load (am_indexed64 GPR64sp:$Rn, + uimm12s8:$offset)), + (LDRXui GPR64sp:$Rn, uimm12s8:$offset)>; +def : Pat<(relaxed_load + (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (LDURXi GPR64sp:$Rn, simm9:$offset)>; //===---------------------------------- // Atomic stores @@ -103,42 +127,74 @@ class relaxed_store // 8-bit stores def : Pat<(releasing_store GPR64sp:$ptr, GPR32:$val), (STLRB GPR32:$val, GPR64sp:$ptr)>; -def : Pat<(relaxed_store ro_indexed8:$ptr, GPR32:$val), - (STRBBro GPR32:$val, ro_indexed8:$ptr)>; -def : Pat<(relaxed_store am_indexed8:$ptr, GPR32:$val), - (STRBBui GPR32:$val, am_indexed8:$ptr)>; -def : Pat<(relaxed_store am_unscaled8:$ptr, GPR32:$val), - (STURBBi GPR32:$val, am_unscaled8:$ptr)>; +def : Pat<(relaxed_store + (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend), + GPR32:$val), + (STRBBroW GPR32:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend)>; +def : Pat<(relaxed_store + (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend), + GPR32:$val), + (STRBBroX GPR32:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend)>; +def : Pat<(relaxed_store + (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset), GPR32:$val), + (STRBBui GPR32:$val, GPR64sp:$Rn, uimm12s1:$offset)>; +def : Pat<(relaxed_store + (am_unscaled8 GPR64sp:$Rn, simm9:$offset), GPR32:$val), + (STURBBi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>; // 16-bit stores def : Pat<(releasing_store GPR64sp:$ptr, GPR32:$val), (STLRH GPR32:$val, GPR64sp:$ptr)>; -def : Pat<(relaxed_store ro_indexed16:$ptr, GPR32:$val), - (STRHHro GPR32:$val, ro_indexed16:$ptr)>; -def : Pat<(relaxed_store am_indexed16:$ptr, GPR32:$val), - (STRHHui GPR32:$val, am_indexed16:$ptr)>; -def : Pat<(relaxed_store am_unscaled16:$ptr, GPR32:$val), - (STURHHi GPR32:$val, am_unscaled16:$ptr)>; +def : Pat<(relaxed_store (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend), + GPR32:$val), + (STRHHroW GPR32:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)>; +def : Pat<(relaxed_store (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend), + GPR32:$val), + (STRHHroX GPR32:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend)>; +def : Pat<(relaxed_store + (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), GPR32:$val), + (STRHHui GPR32:$val, GPR64sp:$Rn, uimm12s2:$offset)>; +def : Pat<(relaxed_store + (am_unscaled16 GPR64sp:$Rn, simm9:$offset), GPR32:$val), + (STURHHi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>; // 32-bit stores def : Pat<(releasing_store GPR64sp:$ptr, GPR32:$val), (STLRW GPR32:$val, GPR64sp:$ptr)>; -def : Pat<(relaxed_store ro_indexed32:$ptr, GPR32:$val), - (STRWro GPR32:$val, ro_indexed32:$ptr)>; -def : Pat<(relaxed_store am_indexed32:$ptr, GPR32:$val), - (STRWui GPR32:$val, am_indexed32:$ptr)>; -def : Pat<(relaxed_store am_unscaled32:$ptr, GPR32:$val), - (STURWi GPR32:$val, am_unscaled32:$ptr)>; +def : Pat<(relaxed_store (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend32:$extend), + GPR32:$val), + (STRWroW GPR32:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>; +def : Pat<(relaxed_store (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend32:$extend), + GPR32:$val), + (STRWroX GPR32:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>; +def : Pat<(relaxed_store + (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), GPR32:$val), + (STRWui GPR32:$val, GPR64sp:$Rn, uimm12s4:$offset)>; +def : Pat<(relaxed_store + (am_unscaled32 GPR64sp:$Rn, simm9:$offset), GPR32:$val), + (STURWi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>; // 64-bit stores def : Pat<(releasing_store GPR64sp:$ptr, GPR64:$val), (STLRX GPR64:$val, GPR64sp:$ptr)>; -def : Pat<(relaxed_store ro_indexed64:$ptr, GPR64:$val), - (STRXro GPR64:$val, ro_indexed64:$ptr)>; -def : Pat<(relaxed_store am_indexed64:$ptr, GPR64:$val), - (STRXui GPR64:$val, am_indexed64:$ptr)>; -def : Pat<(relaxed_store am_unscaled64:$ptr, GPR64:$val), - (STURXi GPR64:$val, am_unscaled64:$ptr)>; +def : Pat<(relaxed_store (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend), + GPR64:$val), + (STRXroW GPR64:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; +def : Pat<(relaxed_store (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend), + GPR64:$val), + (STRXroX GPR64:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; +def : Pat<(relaxed_store + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset), GPR64:$val), + (STRXui GPR64:$val, GPR64sp:$Rn, uimm12s8:$offset)>; +def : Pat<(relaxed_store + (am_unscaled64 GPR64sp:$Rn, simm9:$offset), GPR64:$val), + (STURXi GPR64:$val, GPR64sp:$Rn, simm9:$offset)>; //===---------------------------------- // Low-level exclusive operations @@ -162,20 +218,20 @@ def ldxr_8 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i64; }]>; -def : Pat<(ldxr_1 am_noindex:$addr), - (SUBREG_TO_REG (i64 0), (LDXRB am_noindex:$addr), sub_32)>; -def : Pat<(ldxr_2 am_noindex:$addr), - (SUBREG_TO_REG (i64 0), (LDXRH am_noindex:$addr), sub_32)>; -def : Pat<(ldxr_4 am_noindex:$addr), - (SUBREG_TO_REG (i64 0), (LDXRW am_noindex:$addr), sub_32)>; -def : Pat<(ldxr_8 am_noindex:$addr), (LDXRX am_noindex:$addr)>; - -def : Pat<(and (ldxr_1 am_noindex:$addr), 0xff), - (SUBREG_TO_REG (i64 0), (LDXRB am_noindex:$addr), sub_32)>; -def : Pat<(and (ldxr_2 am_noindex:$addr), 0xffff), - (SUBREG_TO_REG (i64 0), (LDXRH am_noindex:$addr), sub_32)>; -def : Pat<(and (ldxr_4 am_noindex:$addr), 0xffffffff), - (SUBREG_TO_REG (i64 0), (LDXRW am_noindex:$addr), sub_32)>; +def : Pat<(ldxr_1 GPR64sp:$addr), + (SUBREG_TO_REG (i64 0), (LDXRB GPR64sp:$addr), sub_32)>; +def : Pat<(ldxr_2 GPR64sp:$addr), + (SUBREG_TO_REG (i64 0), (LDXRH GPR64sp:$addr), sub_32)>; +def : Pat<(ldxr_4 GPR64sp:$addr), + (SUBREG_TO_REG (i64 0), (LDXRW GPR64sp:$addr), sub_32)>; +def : Pat<(ldxr_8 GPR64sp:$addr), (LDXRX GPR64sp:$addr)>; + +def : Pat<(and (ldxr_1 GPR64sp:$addr), 0xff), + (SUBREG_TO_REG (i64 0), (LDXRB GPR64sp:$addr), sub_32)>; +def : Pat<(and (ldxr_2 GPR64sp:$addr), 0xffff), + (SUBREG_TO_REG (i64 0), (LDXRH GPR64sp:$addr), sub_32)>; +def : Pat<(and (ldxr_4 GPR64sp:$addr), 0xffffffff), + (SUBREG_TO_REG (i64 0), (LDXRW GPR64sp:$addr), sub_32)>; // Load-exclusives. @@ -195,20 +251,20 @@ def ldaxr_8 : PatFrag<(ops node:$ptr), (int_arm64_ldaxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i64; }]>; -def : Pat<(ldaxr_1 am_noindex:$addr), - (SUBREG_TO_REG (i64 0), (LDAXRB am_noindex:$addr), sub_32)>; -def : Pat<(ldaxr_2 am_noindex:$addr), - (SUBREG_TO_REG (i64 0), (LDAXRH am_noindex:$addr), sub_32)>; -def : Pat<(ldaxr_4 am_noindex:$addr), - (SUBREG_TO_REG (i64 0), (LDAXRW am_noindex:$addr), sub_32)>; -def : Pat<(ldaxr_8 am_noindex:$addr), (LDAXRX am_noindex:$addr)>; - -def : Pat<(and (ldaxr_1 am_noindex:$addr), 0xff), - (SUBREG_TO_REG (i64 0), (LDAXRB am_noindex:$addr), sub_32)>; -def : Pat<(and (ldaxr_2 am_noindex:$addr), 0xffff), - (SUBREG_TO_REG (i64 0), (LDAXRH am_noindex:$addr), sub_32)>; -def : Pat<(and (ldaxr_4 am_noindex:$addr), 0xffffffff), - (SUBREG_TO_REG (i64 0), (LDAXRW am_noindex:$addr), sub_32)>; +def : Pat<(ldaxr_1 GPR64sp:$addr), + (SUBREG_TO_REG (i64 0), (LDAXRB GPR64sp:$addr), sub_32)>; +def : Pat<(ldaxr_2 GPR64sp:$addr), + (SUBREG_TO_REG (i64 0), (LDAXRH GPR64sp:$addr), sub_32)>; +def : Pat<(ldaxr_4 GPR64sp:$addr), + (SUBREG_TO_REG (i64 0), (LDAXRW GPR64sp:$addr), sub_32)>; +def : Pat<(ldaxr_8 GPR64sp:$addr), (LDAXRX GPR64sp:$addr)>; + +def : Pat<(and (ldaxr_1 GPR64sp:$addr), 0xff), + (SUBREG_TO_REG (i64 0), (LDAXRB GPR64sp:$addr), sub_32)>; +def : Pat<(and (ldaxr_2 GPR64sp:$addr), 0xffff), + (SUBREG_TO_REG (i64 0), (LDAXRH GPR64sp:$addr), sub_32)>; +def : Pat<(and (ldaxr_4 GPR64sp:$addr), 0xffffffff), + (SUBREG_TO_REG (i64 0), (LDAXRW GPR64sp:$addr), sub_32)>; // Store-exclusives. @@ -233,28 +289,28 @@ def stxr_8 : PatFrag<(ops node:$val, node:$ptr), }]>; -def : Pat<(stxr_1 GPR64:$val, am_noindex:$addr), - (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; -def : Pat<(stxr_2 GPR64:$val, am_noindex:$addr), - (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; -def : Pat<(stxr_4 GPR64:$val, am_noindex:$addr), - (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; -def : Pat<(stxr_8 GPR64:$val, am_noindex:$addr), - (STXRX GPR64:$val, am_noindex:$addr)>; - -def : Pat<(stxr_1 (zext (and GPR32:$val, 0xff)), am_noindex:$addr), - (STXRB GPR32:$val, am_noindex:$addr)>; -def : Pat<(stxr_2 (zext (and GPR32:$val, 0xffff)), am_noindex:$addr), - (STXRH GPR32:$val, am_noindex:$addr)>; -def : Pat<(stxr_4 (zext GPR32:$val), am_noindex:$addr), - (STXRW GPR32:$val, am_noindex:$addr)>; - -def : Pat<(stxr_1 (and GPR64:$val, 0xff), am_noindex:$addr), - (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; -def : Pat<(stxr_2 (and GPR64:$val, 0xffff), am_noindex:$addr), - (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; -def : Pat<(stxr_4 (and GPR64:$val, 0xffffffff), am_noindex:$addr), - (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; +def : Pat<(stxr_1 GPR64:$val, GPR64sp:$addr), + (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stxr_2 GPR64:$val, GPR64sp:$addr), + (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stxr_4 GPR64:$val, GPR64sp:$addr), + (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stxr_8 GPR64:$val, GPR64sp:$addr), + (STXRX GPR64:$val, GPR64sp:$addr)>; + +def : Pat<(stxr_1 (zext (and GPR32:$val, 0xff)), GPR64sp:$addr), + (STXRB GPR32:$val, GPR64sp:$addr)>; +def : Pat<(stxr_2 (zext (and GPR32:$val, 0xffff)), GPR64sp:$addr), + (STXRH GPR32:$val, GPR64sp:$addr)>; +def : Pat<(stxr_4 (zext GPR32:$val), GPR64sp:$addr), + (STXRW GPR32:$val, GPR64sp:$addr)>; + +def : Pat<(stxr_1 (and GPR64:$val, 0xff), GPR64sp:$addr), + (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stxr_2 (and GPR64:$val, 0xffff), GPR64sp:$addr), + (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr), + (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; // Store-release-exclusives. @@ -279,28 +335,28 @@ def stlxr_8 : PatFrag<(ops node:$val, node:$ptr), }]>; -def : Pat<(stlxr_1 GPR64:$val, am_noindex:$addr), - (STLXRB (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; -def : Pat<(stlxr_2 GPR64:$val, am_noindex:$addr), - (STLXRH (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; -def : Pat<(stlxr_4 GPR64:$val, am_noindex:$addr), - (STLXRW (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; -def : Pat<(stlxr_8 GPR64:$val, am_noindex:$addr), - (STLXRX GPR64:$val, am_noindex:$addr)>; - -def : Pat<(stlxr_1 (zext (and GPR32:$val, 0xff)), am_noindex:$addr), - (STLXRB GPR32:$val, am_noindex:$addr)>; -def : Pat<(stlxr_2 (zext (and GPR32:$val, 0xffff)), am_noindex:$addr), - (STLXRH GPR32:$val, am_noindex:$addr)>; -def : Pat<(stlxr_4 (zext GPR32:$val), am_noindex:$addr), - (STLXRW GPR32:$val, am_noindex:$addr)>; - -def : Pat<(stlxr_1 (and GPR64:$val, 0xff), am_noindex:$addr), - (STLXRB (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; -def : Pat<(stlxr_2 (and GPR64:$val, 0xffff), am_noindex:$addr), - (STLXRH (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; -def : Pat<(stlxr_4 (and GPR64:$val, 0xffffffff), am_noindex:$addr), - (STLXRW (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; +def : Pat<(stlxr_1 GPR64:$val, GPR64sp:$addr), + (STLXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stlxr_2 GPR64:$val, GPR64sp:$addr), + (STLXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stlxr_4 GPR64:$val, GPR64sp:$addr), + (STLXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stlxr_8 GPR64:$val, GPR64sp:$addr), + (STLXRX GPR64:$val, GPR64sp:$addr)>; + +def : Pat<(stlxr_1 (zext (and GPR32:$val, 0xff)), GPR64sp:$addr), + (STLXRB GPR32:$val, GPR64sp:$addr)>; +def : Pat<(stlxr_2 (zext (and GPR32:$val, 0xffff)), GPR64sp:$addr), + (STLXRH GPR32:$val, GPR64sp:$addr)>; +def : Pat<(stlxr_4 (zext GPR32:$val), GPR64sp:$addr), + (STLXRW GPR32:$val, GPR64sp:$addr)>; + +def : Pat<(stlxr_1 (and GPR64:$val, 0xff), GPR64sp:$addr), + (STLXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stlxr_2 (and GPR64:$val, 0xffff), GPR64sp:$addr), + (STLXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stlxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr), + (STLXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; // And clear exclusive. diff --git a/lib/Target/ARM64/ARM64InstrFormats.td b/lib/Target/ARM64/ARM64InstrFormats.td index 0ac27e09358c..bf9fa2992b71 100644 --- a/lib/Target/ARM64/ARM64InstrFormats.td +++ b/lib/Target/ARM64/ARM64InstrFormats.td @@ -222,34 +222,27 @@ def simm9 : Operand, ImmLeaf= -256 && Imm < 256; }]> { let ParserMatchClass = SImm9Operand; } -// simm7s4 predicate - True if the immediate is a multiple of 4 in the range -// [-256, 252]. -def SImm7s4Operand : AsmOperandClass { - let Name = "SImm7s4"; - let DiagnosticType = "InvalidMemoryIndexed32SImm7"; +// simm7sN predicate - True if the immediate is a multiple of N in the range +// [-64 * N, 63 * N]. +class SImm7Scaled : AsmOperandClass { + let Name = "SImm7s" # Scale; + let DiagnosticType = "InvalidMemoryIndexed" # Scale # "SImm7"; } + +def SImm7s4Operand : SImm7Scaled<4>; +def SImm7s8Operand : SImm7Scaled<8>; +def SImm7s16Operand : SImm7Scaled<16>; + def simm7s4 : Operand { let ParserMatchClass = SImm7s4Operand; let PrintMethod = "printImmScale<4>"; } -// simm7s8 predicate - True if the immediate is a multiple of 8 in the range -// [-512, 504]. -def SImm7s8Operand : AsmOperandClass { - let Name = "SImm7s8"; - let DiagnosticType = "InvalidMemoryIndexed64SImm7"; -} def simm7s8 : Operand { let ParserMatchClass = SImm7s8Operand; let PrintMethod = "printImmScale<8>"; } -// simm7s16 predicate - True if the immediate is a multiple of 16 in the range -// [-1024, 1008]. -def SImm7s16Operand : AsmOperandClass { - let Name = "SImm7s16"; - let DiagnosticType = "InvalidMemoryIndexed64SImm7"; -} def simm7s16 : Operand { let ParserMatchClass = SImm7s16Operand; let PrintMethod = "printImmScale<16>"; @@ -639,17 +632,17 @@ def neg_addsub_shifted_imm64 : neg_addsub_shifted_imm; // {5-3} - extend type // {2-0} - imm3 def arith_extend : Operand { - let PrintMethod = "printExtend"; + let PrintMethod = "printArithExtend"; let ParserMatchClass = ExtendOperand; } def arith_extend64 : Operand { - let PrintMethod = "printExtend"; + let PrintMethod = "printArithExtend"; let ParserMatchClass = ExtendOperand64; } // 'extend' that's a lsl of a 64-bit register. def arith_extendlsl64 : Operand { - let PrintMethod = "printExtend"; + let PrintMethod = "printArithExtend"; let ParserMatchClass = ExtendOperandLSL64; } @@ -2178,96 +2171,46 @@ def maski16_or_more : Operand, // (unsigned immediate) // Indexed for 8-bit registers. offset is in range [0,4095]. -def MemoryIndexed8Operand : AsmOperandClass { - let Name = "MemoryIndexed8"; - let DiagnosticType = "InvalidMemoryIndexed8"; -} -def am_indexed8 : Operand, - ComplexPattern { - let PrintMethod = "printAMIndexed<8>"; - let EncoderMethod - = "getAMIndexed8OpValue"; - let ParserMatchClass = MemoryIndexed8Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} - -// Indexed for 16-bit registers. offset is multiple of 2 in range [0,8190], -// stored as immval/2 (the 12-bit literal that encodes directly into the insn). -def MemoryIndexed16Operand : AsmOperandClass { - let Name = "MemoryIndexed16"; - let DiagnosticType = "InvalidMemoryIndexed16"; -} -def am_indexed16 : Operand, - ComplexPattern { - let PrintMethod = "printAMIndexed<16>"; - let EncoderMethod - = "getAMIndexed8OpValue"; - let ParserMatchClass = MemoryIndexed16Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} - -// Indexed for 32-bit registers. offset is multiple of 4 in range [0,16380], -// stored as immval/4 (the 12-bit literal that encodes directly into the insn). -def MemoryIndexed32Operand : AsmOperandClass { - let Name = "MemoryIndexed32"; - let DiagnosticType = "InvalidMemoryIndexed32"; -} -def am_indexed32 : Operand, - ComplexPattern { - let PrintMethod = "printAMIndexed<32>"; - let EncoderMethod - = "getAMIndexed8OpValue"; - let ParserMatchClass = MemoryIndexed32Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} - -// Indexed for 64-bit registers. offset is multiple of 8 in range [0,32760], -// stored as immval/8 (the 12-bit literal that encodes directly into the insn). -def MemoryIndexed64Operand : AsmOperandClass { - let Name = "MemoryIndexed64"; - let DiagnosticType = "InvalidMemoryIndexed64"; -} -def am_indexed64 : Operand, - ComplexPattern { - let PrintMethod = "printAMIndexed<64>"; +def am_indexed8 : ComplexPattern; +def am_indexed16 : ComplexPattern; +def am_indexed32 : ComplexPattern; +def am_indexed64 : ComplexPattern; +def am_indexed128 : ComplexPattern; + +class UImm12OffsetOperand : AsmOperandClass { + let Name = "UImm12Offset" # Scale; + let RenderMethod = "addUImm12OffsetOperands<" # Scale # ">"; + let PredicateMethod = "isUImm12Offset<" # Scale # ">"; + let DiagnosticType = "InvalidMemoryIndexed" # Scale; +} + +def UImm12OffsetScale1Operand : UImm12OffsetOperand<1>; +def UImm12OffsetScale2Operand : UImm12OffsetOperand<2>; +def UImm12OffsetScale4Operand : UImm12OffsetOperand<4>; +def UImm12OffsetScale8Operand : UImm12OffsetOperand<8>; +def UImm12OffsetScale16Operand : UImm12OffsetOperand<16>; + +class uimm12_scaled : Operand { + let ParserMatchClass + = !cast("UImm12OffsetScale" # Scale # "Operand"); let EncoderMethod - = "getAMIndexed8OpValue"; - let ParserMatchClass = MemoryIndexed64Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); + = "getLdStUImm12OpValue"; + let PrintMethod = "printUImm12Offset<" # Scale # ">"; } -// Indexed for 128-bit registers. offset is multiple of 16 in range [0,65520], -// stored as immval/16 (the 12-bit literal that encodes directly into the insn). -def MemoryIndexed128Operand : AsmOperandClass { - let Name = "MemoryIndexed128"; - let DiagnosticType = "InvalidMemoryIndexed128"; -} -def am_indexed128 : Operand, - ComplexPattern { - let PrintMethod = "printAMIndexed<128>"; - let EncoderMethod - = "getAMIndexed8OpValue"; - let ParserMatchClass = MemoryIndexed128Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} - -// No offset. -def MemoryNoIndexOperand : AsmOperandClass { let Name = "MemoryNoIndex"; } -def am_noindex : Operand, - ComplexPattern { - let PrintMethod = "printAMNoIndex"; - let ParserMatchClass = MemoryNoIndexOperand; - let MIOperandInfo = (ops GPR64sp:$base); -} +def uimm12s1 : uimm12_scaled<1>; +def uimm12s2 : uimm12_scaled<2>; +def uimm12s4 : uimm12_scaled<4>; +def uimm12s8 : uimm12_scaled<8>; +def uimm12s16 : uimm12_scaled<16>; class BaseLoadStoreUI sz, bit V, bits<2> opc, dag oops, dag iops, string asm, list pattern> - : I { - bits<5> dst; + : I { + bits<5> Rt; - bits<17> addr; - bits<5> base = addr{4-0}; - bits<12> offset = addr{16-5}; + bits<5> Rn; + bits<12> offset; let Inst{31-30} = sz; let Inst{29-27} = 0b111; @@ -2275,25 +2218,35 @@ class BaseLoadStoreUI sz, bit V, bits<2> opc, dag oops, dag iops, let Inst{25-24} = 0b01; let Inst{23-22} = opc; let Inst{21-10} = offset; - let Inst{9-5} = base; - let Inst{4-0} = dst; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; let DecoderMethod = "DecodeUnsignedLdStInstruction"; } -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -class LoadUI sz, bit V, bits<2> opc, RegisterClass regtype, - Operand indextype, string asm, list pattern> - : BaseLoadStoreUI, - Sched<[WriteLD]>; +multiclass LoadUI sz, bit V, bits<2> opc, RegisterClass regtype, + Operand indextype, string asm, list pattern> { + let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in + def ui : BaseLoadStoreUI, + Sched<[WriteLD]>; -let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in -class StoreUI sz, bit V, bits<2> opc, RegisterClass regtype, - Operand indextype, string asm, list pattern> - : BaseLoadStoreUI, - Sched<[WriteST]>; + def : InstAlias(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +multiclass StoreUI sz, bit V, bits<2> opc, RegisterClass regtype, + Operand indextype, string asm, list pattern> { + let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in + def ui : BaseLoadStoreUI, + Sched<[WriteST]>; + + def : InstAlias(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>; +} def PrefetchOperand : AsmOperandClass { let Name = "Prefetch"; @@ -2307,7 +2260,8 @@ def prfop : Operand { let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in class PrefetchUI sz, bit V, bits<2> opc, string asm, list pat> : BaseLoadStoreUI, + (outs), (ins prfop:$Rt, GPR64sp:$Rn, uimm12s8:$offset), + asm, pat>, Sched<[WriteLD]>; //--- @@ -2357,317 +2311,511 @@ class PrefetchLiteral opc, bit V, string asm, list pat> // Load/store register offset //--- -class MemROAsmOperand : AsmOperandClass { - let Name = "MemoryRegisterOffset"#sz; - let DiagnosticType = "InvalidMemoryIndexed"; -} - -def MemROAsmOperand8 : MemROAsmOperand<8>; -def MemROAsmOperand16 : MemROAsmOperand<16>; -def MemROAsmOperand32 : MemROAsmOperand<32>; -def MemROAsmOperand64 : MemROAsmOperand<64>; -def MemROAsmOperand128 : MemROAsmOperand<128>; - -class ro_indexed : Operand { // ComplexPattern<...> - let PrintMethod = "printMemoryRegOffset<" # sz # ">"; - let MIOperandInfo = (ops GPR64sp:$base, GPR64:$offset, i32imm:$extend); -} - -def ro_indexed8 : ro_indexed<8>, ComplexPattern { - let ParserMatchClass = MemROAsmOperand8; -} - -def ro_indexed16 : ro_indexed<16>, ComplexPattern { - let ParserMatchClass = MemROAsmOperand16; -} - -def ro_indexed32 : ro_indexed<32>, ComplexPattern { - let ParserMatchClass = MemROAsmOperand32; -} - -def ro_indexed64 : ro_indexed<64>, ComplexPattern { - let ParserMatchClass = MemROAsmOperand64; -} - -def ro_indexed128 : ro_indexed<128>, ComplexPattern { - let ParserMatchClass = MemROAsmOperand128; -} +def ro_Xindexed8 : ComplexPattern", []>; +def ro_Xindexed16 : ComplexPattern", []>; +def ro_Xindexed32 : ComplexPattern", []>; +def ro_Xindexed64 : ComplexPattern", []>; +def ro_Xindexed128 : ComplexPattern", []>; + +def ro_Windexed8 : ComplexPattern", []>; +def ro_Windexed16 : ComplexPattern", []>; +def ro_Windexed32 : ComplexPattern", []>; +def ro_Windexed64 : ComplexPattern", []>; +def ro_Windexed128 : ComplexPattern", []>; + +class MemExtendOperand : AsmOperandClass { + let Name = "Mem" # Reg # "Extend" # Width; + let PredicateMethod = "isMem" # Reg # "Extend<" # Width # ">"; + let RenderMethod = "addMemExtendOperands"; + let DiagnosticType = "InvalidMemory" # Reg # "Extend" # Width; +} + +def MemWExtend8Operand : MemExtendOperand<"W", 8> { + // The address "[x0, x1, lsl #0]" actually maps to the variant which performs + // the trivial shift. + let RenderMethod = "addMemExtend8Operands"; +} +def MemWExtend16Operand : MemExtendOperand<"W", 16>; +def MemWExtend32Operand : MemExtendOperand<"W", 32>; +def MemWExtend64Operand : MemExtendOperand<"W", 64>; +def MemWExtend128Operand : MemExtendOperand<"W", 128>; + +def MemXExtend8Operand : MemExtendOperand<"X", 8> { + // The address "[x0, x1, lsl #0]" actually maps to the variant which performs + // the trivial shift. + let RenderMethod = "addMemExtend8Operands"; +} +def MemXExtend16Operand : MemExtendOperand<"X", 16>; +def MemXExtend32Operand : MemExtendOperand<"X", 32>; +def MemXExtend64Operand : MemExtendOperand<"X", 64>; +def MemXExtend128Operand : MemExtendOperand<"X", 128>; + +class ro_extend + : Operand { + let ParserMatchClass = ParserClass; + let PrintMethod = "printMemExtend<'" # Reg # "', " # Width # ">"; + let DecoderMethod = "DecodeMemExtend"; + let EncoderMethod = "getMemExtendOpValue"; + let MIOperandInfo = (ops i32imm:$signed, i32imm:$doshift); +} + +def ro_Wextend8 : ro_extend; +def ro_Wextend16 : ro_extend; +def ro_Wextend32 : ro_extend; +def ro_Wextend64 : ro_extend; +def ro_Wextend128 : ro_extend; + +def ro_Xextend8 : ro_extend; +def ro_Xextend16 : ro_extend; +def ro_Xextend32 : ro_extend; +def ro_Xextend64 : ro_extend; +def ro_Xextend128 : ro_extend; + +class ROAddrMode { + // CodeGen-level pattern covering the entire addressing mode. + ComplexPattern Wpat = windex; + ComplexPattern Xpat = xindex; + + // Asm-level Operand covering the valid "uxtw #3" style syntax. + Operand Wext = wextend; + Operand Xext = xextend; +} + +def ro8 : ROAddrMode; +def ro16 : ROAddrMode; +def ro32 : ROAddrMode; +def ro64 : ROAddrMode; +def ro128 : ROAddrMode; class LoadStore8RO sz, bit V, bits<2> opc, RegisterClass regtype, string asm, dag ins, dag outs, list pat> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> base; - bits<5> offset; - bits<4> extend; + : I { + bits<5> Rt; + bits<5> Rn; + bits<5> Rm; + bits<2> extend; let Inst{31-30} = sz; let Inst{29-27} = 0b111; let Inst{26} = V; let Inst{25-24} = 0b00; let Inst{23-22} = opc; let Inst{21} = 1; - let Inst{20-16} = offset; - let Inst{15-13} = extend{3-1}; - - let Inst{12} = extend{0}; + let Inst{20-16} = Rm; + let Inst{15} = extend{1}; // sign extend Rm? + let Inst{14} = 1; + let Inst{12} = extend{0}; // do shift? let Inst{11-10} = 0b10; - let Inst{9-5} = base; - let Inst{4-0} = dst; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +class ROInstAlias + : InstAlias; + +multiclass Load8RO sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, ValueType Ty, SDPatternOperator loadop> { + let AddedComplexity = 10 in + def roW : LoadStore8RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10 in + def roX : LoadStore8RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } - let DecoderMethod = "DecodeRegOffsetLdStInstruction"; + def : ROInstAlias(NAME # "roX")>; } -class Load8RO sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list pat> - : LoadStore8RO, - Sched<[WriteLDIdx, ReadAdrBase]>; +multiclass Store8RO sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, ValueType Ty, SDPatternOperator storeop> { + let AddedComplexity = 10 in + def roW : LoadStore8RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } -class Store8RO sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list pat> - : LoadStore8RO, - Sched<[WriteSTIdx, ReadAdrBase]>; + let AddedComplexity = 10 in + def roX : LoadStore8RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; +} class LoadStore16RO sz, bit V, bits<2> opc, RegisterClass regtype, string asm, dag ins, dag outs, list pat> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> base; - bits<5> offset; - bits<4> extend; + : I { + bits<5> Rt; + bits<5> Rn; + bits<5> Rm; + bits<2> extend; let Inst{31-30} = sz; let Inst{29-27} = 0b111; let Inst{26} = V; let Inst{25-24} = 0b00; let Inst{23-22} = opc; let Inst{21} = 1; - let Inst{20-16} = offset; - let Inst{15-13} = extend{3-1}; - - let Inst{12} = extend{0}; + let Inst{20-16} = Rm; + let Inst{15} = extend{1}; // sign extend Rm? + let Inst{14} = 1; + let Inst{12} = extend{0}; // do shift? let Inst{11-10} = 0b10; - let Inst{9-5} = base; - let Inst{4-0} = dst; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} - let DecoderMethod = "DecodeRegOffsetLdStInstruction"; +multiclass Load16RO sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, ValueType Ty, SDPatternOperator loadop> { + let AddedComplexity = 10 in + def roW : LoadStore16RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10 in + def roX : LoadStore16RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; } -class Load16RO sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list pat> - : LoadStore16RO, - Sched<[WriteLDIdx, ReadAdrBase]>; +multiclass Store16RO sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, ValueType Ty, SDPatternOperator storeop> { + let AddedComplexity = 10 in + def roW : LoadStore16RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } -class Store16RO sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list pat> - : LoadStore16RO, - Sched<[WriteSTIdx, ReadAdrBase]>; + let AddedComplexity = 10 in + def roX : LoadStore16RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; +} class LoadStore32RO sz, bit V, bits<2> opc, RegisterClass regtype, string asm, dag ins, dag outs, list pat> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> base; - bits<5> offset; - bits<4> extend; + : I { + bits<5> Rt; + bits<5> Rn; + bits<5> Rm; + bits<2> extend; let Inst{31-30} = sz; let Inst{29-27} = 0b111; let Inst{26} = V; let Inst{25-24} = 0b00; let Inst{23-22} = opc; let Inst{21} = 1; - let Inst{20-16} = offset; - let Inst{15-13} = extend{3-1}; - - let Inst{12} = extend{0}; + let Inst{20-16} = Rm; + let Inst{15} = extend{1}; // sign extend Rm? + let Inst{14} = 1; + let Inst{12} = extend{0}; // do shift? let Inst{11-10} = 0b10; - let Inst{9-5} = base; - let Inst{4-0} = dst; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} - let DecoderMethod = "DecodeRegOffsetLdStInstruction"; +multiclass Load32RO sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, ValueType Ty, SDPatternOperator loadop> { + let AddedComplexity = 10 in + def roW : LoadStore32RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10 in + def roX : LoadStore32RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; } -class Load32RO sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list pat> - : LoadStore32RO, - Sched<[WriteLDIdx, ReadAdrBase]>; +multiclass Store32RO sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, ValueType Ty, SDPatternOperator storeop> { + let AddedComplexity = 10 in + def roW : LoadStore32RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } -class Store32RO sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list pat> - : LoadStore32RO, - Sched<[WriteSTIdx, ReadAdrBase]>; + let AddedComplexity = 10 in + def roX : LoadStore32RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; +} class LoadStore64RO sz, bit V, bits<2> opc, RegisterClass regtype, string asm, dag ins, dag outs, list pat> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> base; - bits<5> offset; - bits<4> extend; + : I { + bits<5> Rt; + bits<5> Rn; + bits<5> Rm; + bits<2> extend; let Inst{31-30} = sz; let Inst{29-27} = 0b111; let Inst{26} = V; let Inst{25-24} = 0b00; let Inst{23-22} = opc; let Inst{21} = 1; - let Inst{20-16} = offset; - let Inst{15-13} = extend{3-1}; - - let Inst{12} = extend{0}; + let Inst{20-16} = Rm; + let Inst{15} = extend{1}; // sign extend Rm? + let Inst{14} = 1; + let Inst{12} = extend{0}; // do shift? let Inst{11-10} = 0b10; - let Inst{9-5} = base; - let Inst{4-0} = dst; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass Load64RO sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, ValueType Ty, SDPatternOperator loadop> { + let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in + def roW : LoadStore64RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } - let DecoderMethod = "DecodeRegOffsetLdStInstruction"; + let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in + def roX : LoadStore64RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; } -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -class Load64RO sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list pat> - : LoadStore64RO, - Sched<[WriteLDIdx, ReadAdrBase]>; +multiclass Store64RO sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, ValueType Ty, SDPatternOperator storeop> { + let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in + def roW : LoadStore64RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } -let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in -class Store64RO sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list pat> - : LoadStore64RO, - Sched<[WriteSTIdx, ReadAdrBase]>; + let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in + def roX : LoadStore64RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + def : ROInstAlias(NAME # "roX")>; +} class LoadStore128RO sz, bit V, bits<2> opc, RegisterClass regtype, string asm, dag ins, dag outs, list pat> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> base; - bits<5> offset; - bits<4> extend; + : I { + bits<5> Rt; + bits<5> Rn; + bits<5> Rm; + bits<2> extend; let Inst{31-30} = sz; let Inst{29-27} = 0b111; let Inst{26} = V; let Inst{25-24} = 0b00; let Inst{23-22} = opc; let Inst{21} = 1; - let Inst{20-16} = offset; - let Inst{15-13} = extend{3-1}; - - let Inst{12} = extend{0}; + let Inst{20-16} = Rm; + let Inst{15} = extend{1}; // sign extend Rm? + let Inst{14} = 1; + let Inst{12} = extend{0}; // do shift? let Inst{11-10} = 0b10; - let Inst{9-5} = base; - let Inst{4-0} = dst; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} - let DecoderMethod = "DecodeRegOffsetLdStInstruction"; +multiclass Load128RO sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, ValueType Ty, SDPatternOperator loadop> { + let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in + def roW : LoadStore128RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in + def roX : LoadStore128RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; } -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -class Load128RO sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list pat> - : LoadStore128RO, - Sched<[WriteLDIdx, ReadAdrBase]>; +multiclass Store128RO sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, ValueType Ty, SDPatternOperator storeop> { + let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in + def roW : LoadStore128RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } -let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in -class Store128RO sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list pat> - : LoadStore128RO, - Sched<[WriteSTIdx, ReadAdrBase]>; + let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in + def roX : LoadStore128RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; +} let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in -class PrefetchRO sz, bit V, bits<2> opc, string asm, list pat> - : I<(outs), (ins prfop:$Rt, ro_indexed64:$addr), asm, - "\t$Rt, $addr", "", pat>, +class BasePrefetchRO sz, bit V, bits<2> opc, dag outs, dag ins, + string asm, list pat> + : I, Sched<[WriteLD]> { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> base; - bits<5> offset; - bits<4> extend; + bits<5> Rt; + bits<5> Rn; + bits<5> Rm; + bits<2> extend; let Inst{31-30} = sz; let Inst{29-27} = 0b111; let Inst{26} = V; let Inst{25-24} = 0b00; let Inst{23-22} = opc; let Inst{21} = 1; - let Inst{20-16} = offset; - let Inst{15-13} = extend{3-1}; - - let Inst{12} = extend{0}; + let Inst{20-16} = Rm; + let Inst{15} = extend{1}; // sign extend Rm? + let Inst{14} = 1; + let Inst{12} = extend{0}; // do shift? let Inst{11-10} = 0b10; - let Inst{9-5} = base; - let Inst{4-0} = dst; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass PrefetchRO sz, bit V, bits<2> opc, string asm> { + def roW : BasePrefetchRO { + let Inst{13} = 0b0; + } - let DecoderMethod = "DecodeRegOffsetLdStInstruction"; + def roX : BasePrefetchRO { + let Inst{13} = 0b1; + } + + def : InstAlias<"prfm $Rt, [$Rn, $Rm]", + (!cast(NAME # "roX") prfop:$Rt, + GPR64sp:$Rn, GPR64:$Rm, 0, 0)>; } //--- // Load/store unscaled immediate //--- -def MemoryUnscaledOperand : AsmOperandClass { - let Name = "MemoryUnscaled"; - let DiagnosticType = "InvalidMemoryIndexedSImm9"; -} -class am_unscaled_operand : Operand { - let PrintMethod = "printAMIndexed<8>"; - let ParserMatchClass = MemoryUnscaledOperand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} -class am_unscaled_wb_operand : Operand { - let PrintMethod = "printAMIndexedWB<8>"; - let ParserMatchClass = MemoryUnscaledOperand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} -def am_unscaled : am_unscaled_operand; -def am_unscaled_wb: am_unscaled_wb_operand; -def am_unscaled8 : am_unscaled_operand, - ComplexPattern; -def am_unscaled16 : am_unscaled_operand, - ComplexPattern; -def am_unscaled32 : am_unscaled_operand, - ComplexPattern; -def am_unscaled64 : am_unscaled_operand, - ComplexPattern; -def am_unscaled128 : am_unscaled_operand, - ComplexPattern; +def am_unscaled8 : ComplexPattern; +def am_unscaled16 : ComplexPattern; +def am_unscaled32 : ComplexPattern; +def am_unscaled64 : ComplexPattern; +def am_unscaled128 :ComplexPattern; class BaseLoadStoreUnscale sz, bit V, bits<2> opc, dag oops, dag iops, string asm, list pattern> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> base; + : I { + bits<5> Rt; + bits<5> Rn; bits<9> offset; let Inst{31-30} = sz; let Inst{29-27} = 0b111; @@ -2677,31 +2825,46 @@ class BaseLoadStoreUnscale sz, bit V, bits<2> opc, dag oops, dag iops, let Inst{21} = 0; let Inst{20-12} = offset; let Inst{11-10} = 0b00; - let Inst{9-5} = base; - let Inst{4-0} = dst; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; let DecoderMethod = "DecodeSignedLdStInstruction"; } -let AddedComplexity = 1 in // try this before LoadUI -class LoadUnscaled sz, bit V, bits<2> opc, RegisterClass regtype, - Operand amtype, string asm, list pattern> - : BaseLoadStoreUnscale, - Sched<[WriteLD]>; +multiclass LoadUnscaled sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, list pattern> { + let AddedComplexity = 1 in // try this before LoadUI + def i : BaseLoadStoreUnscale, + Sched<[WriteLD]>; -let AddedComplexity = 1 in // try this before StoreUI -class StoreUnscaled sz, bit V, bits<2> opc, RegisterClass regtype, - Operand amtype, string asm, list pattern> - : BaseLoadStoreUnscale, - Sched<[WriteST]>; + def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; +} -let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in -class PrefetchUnscaled sz, bit V, bits<2> opc, string asm, list pat> - : BaseLoadStoreUnscale, - Sched<[WriteLD]>; +multiclass StoreUnscaled sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, list pattern> { + let AddedComplexity = 1 in // try this before StoreUI + def i : BaseLoadStoreUnscale, + Sched<[WriteST]>; + + def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +multiclass PrefetchUnscaled sz, bit V, bits<2> opc, string asm, + list pat> { + let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in + def i : BaseLoadStoreUnscale, + Sched<[WriteLD]>; + + def : InstAlias(NAME # "i") prfop:$Rt, GPR64sp:$Rn, 0)>; +} //--- // Load/store unscaled immediate, unprivileged @@ -2709,13 +2872,9 @@ class PrefetchUnscaled sz, bit V, bits<2> opc, string asm, list pat class BaseLoadStoreUnprivileged sz, bit V, bits<2> opc, dag oops, dag iops, string asm> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> base; + : I { + bits<5> Rt; + bits<5> Rn; bits<9> offset; let Inst{31-30} = sz; let Inst{29-27} = 0b111; @@ -2725,26 +2884,33 @@ class BaseLoadStoreUnprivileged sz, bit V, bits<2> opc, let Inst{21} = 0; let Inst{20-12} = offset; let Inst{11-10} = 0b10; - let Inst{9-5} = base; - let Inst{4-0} = dst; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; let DecoderMethod = "DecodeSignedLdStInstruction"; } -let mayStore = 0, mayLoad = 1, hasSideEffects = 0 in { -class LoadUnprivileged sz, bit V, bits<2> opc, RegisterClass regtype, - string asm> - : BaseLoadStoreUnprivileged, - Sched<[WriteLD]>; +multiclass LoadUnprivileged sz, bit V, bits<2> opc, + RegisterClass regtype, string asm> { + let mayStore = 0, mayLoad = 1, hasSideEffects = 0 in + def i : BaseLoadStoreUnprivileged, + Sched<[WriteLD]>; + + def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; } -let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in { -class StoreUnprivileged sz, bit V, bits<2> opc, RegisterClass regtype, - string asm> - : BaseLoadStoreUnprivileged, - Sched<[WriteST]>; +multiclass StoreUnprivileged sz, bit V, bits<2> opc, + RegisterClass regtype, string asm> { + let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in + def i : BaseLoadStoreUnprivileged, + Sched<[WriteST]>; + + def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; } //--- @@ -2753,12 +2919,9 @@ class StoreUnprivileged sz, bit V, bits<2> opc, RegisterClass regtype, class BaseLoadStorePreIdx sz, bit V, bits<2> opc, dag oops, dag iops, string asm, string cstr> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. - bits<5> dst; - bits<5> base; + : I { + bits<5> Rt; + bits<5> Rn; bits<9> offset; let Inst{31-30} = sz; let Inst{29-27} = 0b111; @@ -2768,24 +2931,26 @@ class BaseLoadStorePreIdx sz, bit V, bits<2> opc, dag oops, dag iops, let Inst{21} = 0; let Inst{20-12} = offset; let Inst{11-10} = 0b11; - let Inst{9-5} = base; - let Inst{4-0} = dst; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; let DecoderMethod = "DecodeSignedLdStInstruction"; } let hasSideEffects = 0 in { let mayStore = 0, mayLoad = 1 in -// FIXME: Modeling the write-back of these instructions for isel is tricky. -// we need the complex addressing mode for the memory reference, but -// we also need the write-back specified as a tied operand to the -// base register. That combination does not play nicely with -// the asm matcher and friends. +// FIXME: Modeling the write-back of these instructions for isel used +// to be tricky. we need the complex addressing mode for the memory +// reference, but we also need the write-back specified as a tied +// operand to the base register. It should work now, but needs to be +// done as a separate patch. This would allow us to be rid of the +// codegenonly pseudoinstructions below too. class LoadPreIdx sz, bit V, bits<2> opc, RegisterClass regtype, string asm> : BaseLoadStorePreIdx, + (ins GPR64sp:$Rn, simm9:$offset), asm, + ""/*"$Rn = $wback"*/>, Sched<[WriteLD, WriteAdr]>; let mayStore = 1, mayLoad = 0 in @@ -2793,8 +2958,8 @@ class StorePreIdx sz, bit V, bits<2> opc, RegisterClass regtype, string asm> : BaseLoadStorePreIdx, + (ins regtype:$Rt, GPR64sp:$Rn, simm9:$offset), + asm, ""/*"$Rn = $wback"*/>, Sched<[WriteAdr, WriteST]>; } // hasSideEffects = 0 @@ -2812,25 +2977,25 @@ class StorePreIdx sz, bit V, bits<2> opc, RegisterClass regtype, let mayStore = 0, mayLoad = 1, hasSideEffects = 0 in { class LoadPreIdxPseudo : Pseudo<(outs regtype:$Rt, GPR64sp:$wback), - (ins am_noindex:$addr, simm9:$offset), [], - "$addr.base = $wback,@earlyclobber $wback">, + (ins GPR64sp:$addr, simm9:$offset), [], + "$addr = $wback,@earlyclobber $wback">, Sched<[WriteLD, WriteAdr]>; class LoadPostIdxPseudo : Pseudo<(outs regtype:$Rt, GPR64sp:$wback), - (ins am_noindex:$addr, simm9:$offset), [], - "$addr.base = $wback,@earlyclobber $wback">, + (ins GPR64sp:$addr, simm9:$offset), [], + "$addr = $wback,@earlyclobber $wback">, Sched<[WriteLD, WriteI]>; } multiclass StorePreIdxPseudo { let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in def _isel: Pseudo<(outs GPR64sp:$wback), - (ins regtype:$Rt, am_noindex:$addr, simm9:$offset), [], - "$addr.base = $wback,@earlyclobber $wback">, + (ins regtype:$Rt, GPR64sp:$addr, simm9:$offset), [], + "$addr = $wback,@earlyclobber $wback">, Sched<[WriteAdr, WriteST]>; - def : Pat<(OpNode (Ty regtype:$Rt), am_noindex:$addr, simm9:$offset), - (!cast(NAME#_isel) regtype:$Rt, am_noindex:$addr, + def : Pat<(OpNode (Ty regtype:$Rt), GPR64sp:$addr, simm9:$offset), + (!cast(NAME#_isel) regtype:$Rt, GPR64sp:$addr, simm9:$offset)>; } @@ -2841,12 +3006,9 @@ multiclass StorePreIdxPseudo sz, bit V, bits<2> opc, dag oops, dag iops, string asm, string cstr> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. - bits<5> dst; - bits<5> base; + : I { + bits<5> Rt; + bits<5> Rn; bits<9> offset; let Inst{31-30} = sz; let Inst{29-27} = 0b111; @@ -2856,24 +3018,25 @@ class BaseLoadStorePostIdx sz, bit V, bits<2> opc, dag oops, dag iops, let Inst{21} = 0b0; let Inst{20-12} = offset; let Inst{11-10} = 0b01; - let Inst{9-5} = base; - let Inst{4-0} = dst; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; let DecoderMethod = "DecodeSignedLdStInstruction"; } let hasSideEffects = 0 in { let mayStore = 0, mayLoad = 1 in -// FIXME: Modeling the write-back of these instructions for isel is tricky. -// we need the complex addressing mode for the memory reference, but -// we also need the write-back specified as a tied operand to the -// base register. That combination does not play nicely with -// the asm matcher and friends. +// FIXME: Modeling the write-back of these instructions for isel used +// to be tricky. we need the complex addressing mode for the memory +// reference, but we also need the write-back specified as a tied +// operand to the base register. It should work now, but needs to be +// done as a separate patch. This would allow us to be rid of the +// codegenonly pseudoinstructions below too. class LoadPostIdx sz, bit V, bits<2> opc, RegisterClass regtype, string asm> : BaseLoadStorePostIdx, Sched<[WriteLD, WriteI]>; @@ -2882,7 +3045,7 @@ class StorePostIdx sz, bit V, bits<2> opc, RegisterClass regtype, string asm> : BaseLoadStorePostIdx, Sched<[WriteAdr, WriteST, ReadAdrBase]>; } // hasSideEffects = 0 @@ -2899,13 +3062,13 @@ multiclass StorePostIdxPseudo { let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in def _isel: Pseudo<(outs GPR64sp:$wback), - (ins regtype:$Rt, am_noindex:$addr, simm9:$idx), [], - "$addr.base = $wback,@earlyclobber $wback">, - PseudoInstExpansion<(Insn regtype:$Rt, am_noindex:$addr, simm9:$idx)>, + (ins regtype:$Rt, GPR64sp:$Rn, simm9:$idx), [], + "$Rn = $wback,@earlyclobber $wback">, + PseudoInstExpansion<(Insn regtype:$Rt, GPR64sp:$Rn, simm9:$idx)>, Sched<[WriteAdr, WriteST, ReadAdrBase]>; - def : Pat<(OpNode (Ty regtype:$Rt), am_noindex:$addr, simm9:$idx), - (!cast(NAME#_isel) regtype:$Rt, am_noindex:$addr, + def : Pat<(OpNode (Ty regtype:$Rt), GPR64sp:$Rn, simm9:$idx), + (!cast(NAME#_isel) regtype:$Rt, GPR64sp:$Rn, simm9:$idx)>; } @@ -2917,14 +3080,10 @@ multiclass StorePostIdxPseudo opc, bit V, bit L, dag oops, dag iops, string asm> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> dst2; - bits<5> base; + : I { + bits<5> Rt; + bits<5> Rt2; + bits<5> Rn; bits<7> offset; let Inst{31-30} = opc; let Inst{29-27} = 0b101; @@ -2932,88 +3091,48 @@ class BaseLoadStorePairOffset opc, bit V, bit L, dag oops, dag iops, let Inst{25-23} = 0b010; let Inst{22} = L; let Inst{21-15} = offset; - let Inst{14-10} = dst2; - let Inst{9-5} = base; - let Inst{4-0} = dst; + let Inst{14-10} = Rt2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; let DecoderMethod = "DecodePairLdStInstruction"; } -let hasSideEffects = 0 in { -let mayStore = 0, mayLoad = 1 in -class LoadPairOffset opc, bit V, RegisterClass regtype, - Operand indextype, string asm> - : BaseLoadStorePairOffset, - Sched<[WriteLD, WriteLDHi]>; +multiclass LoadPairOffset opc, bit V, RegisterClass regtype, + Operand indextype, string asm> { + let hasSideEffects = 0, mayStore = 0, mayLoad = 1 in + def i : BaseLoadStorePairOffset, + Sched<[WriteLD, WriteLDHi]>; -let mayLoad = 0, mayStore = 1 in -class StorePairOffset opc, bit V, RegisterClass regtype, - Operand indextype, string asm> - : BaseLoadStorePairOffset, - Sched<[WriteSTP]>; -} // hasSideEffects = 0 - -// (pre-indexed) - -def MemoryIndexed32SImm7 : AsmOperandClass { - let Name = "MemoryIndexed32SImm7"; - let DiagnosticType = "InvalidMemoryIndexed32SImm7"; -} -def am_indexed32simm7 : Operand { // ComplexPattern<...> - let PrintMethod = "printAMIndexed<32>"; - let ParserMatchClass = MemoryIndexed32SImm7; - let MIOperandInfo = (ops GPR64sp:$base, i32imm:$offset); -} -def am_indexed32simm7_wb : Operand { // ComplexPattern<...> - let PrintMethod = "printAMIndexedWB<32>"; - let ParserMatchClass = MemoryIndexed32SImm7; - let MIOperandInfo = (ops GPR64sp:$base, i32imm:$offset); + def : InstAlias(NAME # "i") regtype:$Rt, regtype:$Rt2, + GPR64sp:$Rn, 0)>; } -def MemoryIndexed64SImm7 : AsmOperandClass { - let Name = "MemoryIndexed64SImm7"; - let DiagnosticType = "InvalidMemoryIndexed64SImm7"; -} -def am_indexed64simm7 : Operand { // ComplexPattern<...> - let PrintMethod = "printAMIndexed<64>"; - let ParserMatchClass = MemoryIndexed64SImm7; - let MIOperandInfo = (ops GPR64sp:$base, i32imm:$offset); -} -def am_indexed64simm7_wb : Operand { // ComplexPattern<...> - let PrintMethod = "printAMIndexedWB<64>"; - let ParserMatchClass = MemoryIndexed64SImm7; - let MIOperandInfo = (ops GPR64sp:$base, i32imm:$offset); -} -def MemoryIndexed128SImm7 : AsmOperandClass { - let Name = "MemoryIndexed128SImm7"; - let DiagnosticType = "InvalidMemoryIndexed128SImm7"; -} -def am_indexed128simm7 : Operand { // ComplexPattern<...> - let PrintMethod = "printAMIndexed<128>"; - let ParserMatchClass = MemoryIndexed128SImm7; - let MIOperandInfo = (ops GPR64sp:$base, i32imm:$offset); -} -def am_indexed128simm7_wb : Operand { // ComplexPattern<...> - let PrintMethod = "printAMIndexedWB<128>"; - let ParserMatchClass = MemoryIndexed128SImm7; - let MIOperandInfo = (ops GPR64sp:$base, i32imm:$offset); +multiclass StorePairOffset opc, bit V, RegisterClass regtype, + Operand indextype, string asm> { + let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in + def i : BaseLoadStorePairOffset, + Sched<[WriteSTP]>; + + def : InstAlias(NAME # "i") regtype:$Rt, regtype:$Rt2, + GPR64sp:$Rn, 0)>; } +// (pre-indexed) class BaseLoadStorePairPreIdx opc, bit V, bit L, dag oops, dag iops, string asm> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> dst2; - bits<5> base; + : I { + bits<5> Rt; + bits<5> Rt2; + bits<5> Rn; bits<7> offset; let Inst{31-30} = opc; let Inst{29-27} = 0b101; @@ -3021,9 +3140,9 @@ class BaseLoadStorePairPreIdx opc, bit V, bit L, dag oops, dag iops, let Inst{25-23} = 0b011; let Inst{22} = L; let Inst{21-15} = offset; - let Inst{14-10} = dst2; - let Inst{9-5} = base; - let Inst{4-0} = dst; + let Inst{14-10} = Rt2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; let DecoderMethod = "DecodePairLdStInstruction"; } @@ -3031,17 +3150,18 @@ class BaseLoadStorePairPreIdx opc, bit V, bit L, dag oops, dag iops, let hasSideEffects = 0 in { let mayStore = 0, mayLoad = 1 in class LoadPairPreIdx opc, bit V, RegisterClass regtype, - Operand addrmode, string asm> + Operand indextype, string asm> : BaseLoadStorePairPreIdx, + (ins GPR64sp:$Rn, indextype:$offset), asm>, Sched<[WriteLD, WriteLDHi, WriteAdr]>; let mayStore = 1, mayLoad = 0 in class StorePairPreIdx opc, bit V, RegisterClass regtype, - Operand addrmode, string asm> + Operand indextype, string asm> : BaseLoadStorePairPreIdx, Sched<[WriteAdr, WriteSTP]>; } // hasSideEffects = 0 @@ -3050,14 +3170,10 @@ class StorePairPreIdx opc, bit V, RegisterClass regtype, class BaseLoadStorePairPostIdx opc, bit V, bit L, dag oops, dag iops, string asm> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> dst2; - bits<5> base; + : I { + bits<5> Rt; + bits<5> Rt2; + bits<5> Rn; bits<7> offset; let Inst{31-30} = opc; let Inst{29-27} = 0b101; @@ -3065,9 +3181,9 @@ class BaseLoadStorePairPostIdx opc, bit V, bit L, dag oops, dag iops, let Inst{25-23} = 0b001; let Inst{22} = L; let Inst{21-15} = offset; - let Inst{14-10} = dst2; - let Inst{9-5} = base; - let Inst{4-0} = dst; + let Inst{14-10} = Rt2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; let DecoderMethod = "DecodePairLdStInstruction"; } @@ -3078,7 +3194,7 @@ class LoadPairPostIdx opc, bit V, RegisterClass regtype, Operand idxtype, string asm> : BaseLoadStorePairPostIdx, + (ins GPR64sp:$Rn, idxtype:$offset), asm>, Sched<[WriteLD, WriteLDHi, WriteAdr]>; let mayStore = 1, mayLoad = 0 in @@ -3086,7 +3202,7 @@ class StorePairPostIdx opc, bit V, RegisterClass regtype, Operand idxtype, string asm> : BaseLoadStorePairPostIdx, Sched<[WriteAdr, WriteSTP]>; } // hasSideEffects = 0 @@ -3095,14 +3211,10 @@ class StorePairPostIdx opc, bit V, RegisterClass regtype, class BaseLoadStorePairNoAlloc opc, bit V, bit L, dag oops, dag iops, string asm> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> dst2; - bits<5> base; + : I { + bits<5> Rt; + bits<5> Rt2; + bits<5> Rn; bits<7> offset; let Inst{31-30} = opc; let Inst{29-27} = 0b101; @@ -3110,30 +3222,40 @@ class BaseLoadStorePairNoAlloc opc, bit V, bit L, dag oops, dag iops, let Inst{25-23} = 0b000; let Inst{22} = L; let Inst{21-15} = offset; - let Inst{14-10} = dst2; - let Inst{9-5} = base; - let Inst{4-0} = dst; + let Inst{14-10} = Rt2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; let DecoderMethod = "DecodePairLdStInstruction"; } -let hasSideEffects = 0 in { -let mayStore = 0, mayLoad = 1 in -class LoadPairNoAlloc opc, bit V, RegisterClass regtype, - Operand indextype, string asm> - : BaseLoadStorePairNoAlloc, - Sched<[WriteLD, WriteLDHi]>; +multiclass LoadPairNoAlloc opc, bit V, RegisterClass regtype, + Operand indextype, string asm> { + let hasSideEffects = 0, mayStore = 0, mayLoad = 1 in + def i : BaseLoadStorePairNoAlloc, + Sched<[WriteLD, WriteLDHi]>; -let mayStore = 1, mayLoad = 0 in -class StorePairNoAlloc opc, bit V, RegisterClass regtype, - Operand indextype, string asm> - : BaseLoadStorePairNoAlloc, - Sched<[WriteSTP]>; -} // hasSideEffects = 0 + + def : InstAlias(NAME # "i") regtype:$Rt, regtype:$Rt2, + GPR64sp:$Rn, 0)>; +} + +multiclass StorePairNoAlloc opc, bit V, RegisterClass regtype, + Operand indextype, string asm> { + let hasSideEffects = 0, mayStore = 1, mayLoad = 0 in + def i : BaseLoadStorePairNoAlloc, + Sched<[WriteSTP]>; + + def : InstAlias(NAME # "i") regtype:$Rt, regtype:$Rt2, + GPR64sp:$Rn, 0)>; +} //--- // Load/store exclusive @@ -3172,10 +3294,10 @@ class BaseLoadStoreExclusive sz, bit o2, bit L, bit o1, bit o0, class LoadStoreExclusiveSimple sz, bit o2, bit L, bit o1, bit o0, dag oops, dag iops, string asm, string operands> : BaseLoadStoreExclusive { - bits<5> reg; - bits<5> base; - let Inst{9-5} = base; - let Inst{4-0} = reg; + bits<5> Rt; + bits<5> Rn; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; let PostEncoderMethod = "fixLoadStoreExclusive<0,0>"; } @@ -3185,28 +3307,28 @@ let mayLoad = 1, mayStore = 0 in class LoadAcquire sz, bit o2, bit L, bit o1, bit o0, RegisterClass regtype, string asm> : LoadStoreExclusiveSimple, + (ins GPR64sp0:$Rn), asm, "\t$Rt, [$Rn]">, Sched<[WriteLD]>; class LoadExclusive sz, bit o2, bit L, bit o1, bit o0, RegisterClass regtype, string asm> : LoadStoreExclusiveSimple, + (ins GPR64sp0:$Rn), asm, "\t$Rt, [$Rn]">, Sched<[WriteLD]>; class LoadExclusivePair sz, bit o2, bit L, bit o1, bit o0, RegisterClass regtype, string asm> : BaseLoadStoreExclusive, + (ins GPR64sp0:$Rn), asm, + "\t$Rt, $Rt2, [$Rn]">, Sched<[WriteLD, WriteLDHi]> { - bits<5> dst1; - bits<5> dst2; - bits<5> base; - let Inst{14-10} = dst2; - let Inst{9-5} = base; - let Inst{4-0} = dst1; + bits<5> Rt; + bits<5> Rt2; + bits<5> Rn; + let Inst{14-10} = Rt2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; let PostEncoderMethod = "fixLoadStoreExclusive<0,1>"; } @@ -3216,23 +3338,23 @@ let mayLoad = 0, mayStore = 1 in class StoreRelease sz, bit o2, bit L, bit o1, bit o0, RegisterClass regtype, string asm> : LoadStoreExclusiveSimple, + (ins regtype:$Rt, GPR64sp0:$Rn), + asm, "\t$Rt, [$Rn]">, Sched<[WriteST]>; let mayLoad = 1, mayStore = 1 in class StoreExclusive sz, bit o2, bit L, bit o1, bit o0, RegisterClass regtype, string asm> : BaseLoadStoreExclusive, + (ins regtype:$Rt, GPR64sp0:$Rn), + asm, "\t$Ws, $Rt, [$Rn]">, Sched<[WriteSTX]> { - bits<5> status; - bits<5> reg; - bits<5> base; - let Inst{20-16} = status; - let Inst{9-5} = base; - let Inst{4-0} = reg; + bits<5> Ws; + bits<5> Rt; + bits<5> Rn; + let Inst{20-16} = Ws; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; let Constraints = "@earlyclobber $Ws"; let PostEncoderMethod = "fixLoadStoreExclusive<1,0>"; @@ -3242,17 +3364,17 @@ class StoreExclusivePair sz, bit o2, bit L, bit o1, bit o0, RegisterClass regtype, string asm> : BaseLoadStoreExclusive, + (ins regtype:$Rt, regtype:$Rt2, GPR64sp0:$Rn), + asm, "\t$Ws, $Rt, $Rt2, [$Rn]">, Sched<[WriteSTX]> { - bits<5> status; - bits<5> dst1; - bits<5> dst2; - bits<5> base; - let Inst{20-16} = status; - let Inst{14-10} = dst2; - let Inst{9-5} = base; - let Inst{4-0} = dst1; + bits<5> Ws; + bits<5> Rt; + bits<5> Rt2; + bits<5> Rn; + let Inst{20-16} = Ws; + let Inst{14-10} = Rt2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; let Constraints = "@earlyclobber $Ws"; } @@ -3916,18 +4038,6 @@ multiclass FPMoveImmediate { // AdvSIMD //---------------------------------------------------------------------------- -def MemorySIMDNoIndexOperand : AsmOperandClass { - let Name = "MemorySIMDNoIndex"; - let ParserMethod = "tryParseNoIndexMemory"; -} -def am_simdnoindex : Operand, - ComplexPattern { - let PrintMethod = "printAMNoIndex"; - let ParserMatchClass = MemorySIMDNoIndexOperand; - let MIOperandInfo = (ops GPR64sp:$base); - let DecoderMethod = "DecodeGPR64spRegisterClass"; -} - let Predicates = [HasNEON] in { //---------------------------------------------------------------------------- @@ -7573,13 +7683,13 @@ multiclass SIMDVectorLShiftLongBHSD opc, string asm, // SIMD ldX/stX no-index memory references don't allow the optional // ", #0" constant and handle post-indexing explicitly, so we use // a more specialized parse method for them. Otherwise, it's the same as -// the general am_noindex handling. +// the general GPR64sp handling. class BaseSIMDLdSt opcode, bits<2> size, string asm, dag oops, dag iops, list pattern> - : I { + : I { bits<5> Vt; - bits<5> vaddr; + bits<5> Rn; let Inst{31} = 0; let Inst{30} = Q; let Inst{29-23} = 0b0011000; @@ -7587,15 +7697,15 @@ class BaseSIMDLdSt opcode, bits<2> size, let Inst{21-16} = 0b000000; let Inst{15-12} = opcode; let Inst{11-10} = size; - let Inst{9-5} = vaddr; + let Inst{9-5} = Rn; let Inst{4-0} = Vt; } class BaseSIMDLdStPost opcode, bits<2> size, string asm, dag oops, dag iops> - : I { + : I { bits<5> Vt; - bits<5> vaddr; + bits<5> Rn; bits<5> Xm; let Inst{31} = 0; let Inst{30} = Q; @@ -7605,7 +7715,7 @@ class BaseSIMDLdStPost opcode, bits<2> size, let Inst{20-16} = Xm; let Inst{15-12} = opcode; let Inst{11-10} = size; - let Inst{9-5} = vaddr; + let Inst{9-5} = Rn; let Inst{4-0} = Vt; } @@ -7614,41 +7724,41 @@ class BaseSIMDLdStPost opcode, bits<2> size, multiclass SIMDLdStAliases { // E.g. "ld1 { v0.8b, v1.8b }, [x1], #16" - // "ld1\t$Vt, $vaddr, #16" + // "ld1\t$Vt, [$Rn], #16" // may get mapped to - // (LD1Twov8b_POST VecListTwo8b:$Vt, am_simdnoindex:$vaddr, XZR) - def : InstAlias(NAME # Count # "v" # layout # "_POST") - am_simdnoindex:$vaddr, + GPR64sp:$Rn, !cast("VecList" # Count # layout):$Vt, XZR), 1>; // E.g. "ld1.8b { v0, v1 }, [x1], #16" - // "ld1.8b\t$Vt, $vaddr, #16" + // "ld1.8b\t$Vt, [$Rn], #16" // may get mapped to - // (LD1Twov8b_POST VecListTwo64:$Vt, am_simdnoindex:$vaddr, XZR) - def : InstAlias(NAME # Count # "v" # layout # "_POST") - am_simdnoindex:$vaddr, + GPR64sp:$Rn, !cast("VecList" # Count # Size):$Vt, XZR), 0>; // E.g. "ld1.8b { v0, v1 }, [x1]" - // "ld1\t$Vt, $vaddr" + // "ld1\t$Vt, [$Rn]" // may get mapped to - // (LD1Twov8b VecListTwo64:$Vt, am_simdnoindex:$vaddr) - def : InstAlias(NAME # Count # "v" # layout) !cast("VecList" # Count # Size):$Vt, - am_simdnoindex:$vaddr), 0>; + GPR64sp:$Rn), 0>; // E.g. "ld1.8b { v0, v1 }, [x1], x2" - // "ld1\t$Vt, $vaddr, $Xm" + // "ld1\t$Vt, [$Rn], $Xm" // may get mapped to - // (LD1Twov8b_POST VecListTwo64:$Vt, am_simdnoindex:$vaddr, GPR64pi8:$Xm) - def : InstAlias(NAME # Count # "v" # layout # "_POST") - am_simdnoindex:$vaddr, + GPR64sp:$Rn, !cast("VecList" # Count # Size):$Vt, !cast("GPR64pi" # Offset):$Xm), 0>; } @@ -7658,61 +7768,61 @@ multiclass BaseSIMDLdN(veclist # "16b"):$Vt), - (ins am_simdnoindex:$vaddr), []>; + (ins GPR64sp:$Rn), []>; def v8h : BaseSIMDLdSt<1, 1, opcode, 0b01, asm, (outs !cast(veclist # "8h"):$Vt), - (ins am_simdnoindex:$vaddr), []>; + (ins GPR64sp:$Rn), []>; def v4s : BaseSIMDLdSt<1, 1, opcode, 0b10, asm, (outs !cast(veclist # "4s"):$Vt), - (ins am_simdnoindex:$vaddr), []>; + (ins GPR64sp:$Rn), []>; def v2d : BaseSIMDLdSt<1, 1, opcode, 0b11, asm, (outs !cast(veclist # "2d"):$Vt), - (ins am_simdnoindex:$vaddr), []>; + (ins GPR64sp:$Rn), []>; def v8b : BaseSIMDLdSt<0, 1, opcode, 0b00, asm, (outs !cast(veclist # "8b"):$Vt), - (ins am_simdnoindex:$vaddr), []>; + (ins GPR64sp:$Rn), []>; def v4h : BaseSIMDLdSt<0, 1, opcode, 0b01, asm, (outs !cast(veclist # "4h"):$Vt), - (ins am_simdnoindex:$vaddr), []>; + (ins GPR64sp:$Rn), []>; def v2s : BaseSIMDLdSt<0, 1, opcode, 0b10, asm, (outs !cast(veclist # "2s"):$Vt), - (ins am_simdnoindex:$vaddr), []>; + (ins GPR64sp:$Rn), []>; def v16b_POST: BaseSIMDLdStPost<1, 1, opcode, 0b00, asm, - (outs am_simdnoindex:$wback, + (outs GPR64sp:$wback, !cast(veclist # "16b"):$Vt), - (ins am_simdnoindex:$vaddr, + (ins GPR64sp:$Rn, !cast("GPR64pi" # Offset128):$Xm)>; def v8h_POST : BaseSIMDLdStPost<1, 1, opcode, 0b01, asm, - (outs am_simdnoindex:$wback, + (outs GPR64sp:$wback, !cast(veclist # "8h"):$Vt), - (ins am_simdnoindex:$vaddr, + (ins GPR64sp:$Rn, !cast("GPR64pi" # Offset128):$Xm)>; def v4s_POST : BaseSIMDLdStPost<1, 1, opcode, 0b10, asm, - (outs am_simdnoindex:$wback, + (outs GPR64sp:$wback, !cast(veclist # "4s"):$Vt), - (ins am_simdnoindex:$vaddr, + (ins GPR64sp:$Rn, !cast("GPR64pi" # Offset128):$Xm)>; def v2d_POST : BaseSIMDLdStPost<1, 1, opcode, 0b11, asm, - (outs am_simdnoindex:$wback, + (outs GPR64sp:$wback, !cast(veclist # "2d"):$Vt), - (ins am_simdnoindex:$vaddr, + (ins GPR64sp:$Rn, !cast("GPR64pi" # Offset128):$Xm)>; def v8b_POST : BaseSIMDLdStPost<0, 1, opcode, 0b00, asm, - (outs am_simdnoindex:$wback, + (outs GPR64sp:$wback, !cast(veclist # "8b"):$Vt), - (ins am_simdnoindex:$vaddr, + (ins GPR64sp:$Rn, !cast("GPR64pi" # Offset64):$Xm)>; def v4h_POST : BaseSIMDLdStPost<0, 1, opcode, 0b01, asm, - (outs am_simdnoindex:$wback, + (outs GPR64sp:$wback, !cast(veclist # "4h"):$Vt), - (ins am_simdnoindex:$vaddr, + (ins GPR64sp:$Rn, !cast("GPR64pi" # Offset64):$Xm)>; def v2s_POST : BaseSIMDLdStPost<0, 1, opcode, 0b10, asm, - (outs am_simdnoindex:$wback, + (outs GPR64sp:$wback, !cast(veclist # "2s"):$Vt), - (ins am_simdnoindex:$vaddr, + (ins GPR64sp:$Rn, !cast("GPR64pi" # Offset64):$Xm)>; } @@ -7731,60 +7841,60 @@ multiclass BaseSIMDStN(veclist # "16b"):$Vt, - am_simdnoindex:$vaddr), []>; + GPR64sp:$Rn), []>; def v8h : BaseSIMDLdSt<1, 0, opcode, 0b01, asm, (outs), (ins !cast(veclist # "8h"):$Vt, - am_simdnoindex:$vaddr), []>; + GPR64sp:$Rn), []>; def v4s : BaseSIMDLdSt<1, 0, opcode, 0b10, asm, (outs), (ins !cast(veclist # "4s"):$Vt, - am_simdnoindex:$vaddr), []>; + GPR64sp:$Rn), []>; def v2d : BaseSIMDLdSt<1, 0, opcode, 0b11, asm, (outs), (ins !cast(veclist # "2d"):$Vt, - am_simdnoindex:$vaddr), []>; + GPR64sp:$Rn), []>; def v8b : BaseSIMDLdSt<0, 0, opcode, 0b00, asm, (outs), (ins !cast(veclist # "8b"):$Vt, - am_simdnoindex:$vaddr), []>; + GPR64sp:$Rn), []>; def v4h : BaseSIMDLdSt<0, 0, opcode, 0b01, asm, (outs), (ins !cast(veclist # "4h"):$Vt, - am_simdnoindex:$vaddr), []>; + GPR64sp:$Rn), []>; def v2s : BaseSIMDLdSt<0, 0, opcode, 0b10, asm, (outs), (ins !cast(veclist # "2s"):$Vt, - am_simdnoindex:$vaddr), []>; + GPR64sp:$Rn), []>; def v16b_POST : BaseSIMDLdStPost<1, 0, opcode, 0b00, asm, - (outs am_simdnoindex:$wback), + (outs GPR64sp:$wback), (ins !cast(veclist # "16b"):$Vt, - am_simdnoindex:$vaddr, + GPR64sp:$Rn, !cast("GPR64pi" # Offset128):$Xm)>; def v8h_POST : BaseSIMDLdStPost<1, 0, opcode, 0b01, asm, - (outs am_simdnoindex:$wback), + (outs GPR64sp:$wback), (ins !cast(veclist # "8h"):$Vt, - am_simdnoindex:$vaddr, + GPR64sp:$Rn, !cast("GPR64pi" # Offset128):$Xm)>; def v4s_POST : BaseSIMDLdStPost<1, 0, opcode, 0b10, asm, - (outs am_simdnoindex:$wback), + (outs GPR64sp:$wback), (ins !cast(veclist # "4s"):$Vt, - am_simdnoindex:$vaddr, + GPR64sp:$Rn, !cast("GPR64pi" # Offset128):$Xm)>; def v2d_POST : BaseSIMDLdStPost<1, 0, opcode, 0b11, asm, - (outs am_simdnoindex:$wback), + (outs GPR64sp:$wback), (ins !cast(veclist # "2d"):$Vt, - am_simdnoindex:$vaddr, + GPR64sp:$Rn, !cast("GPR64pi" # Offset128):$Xm)>; def v8b_POST : BaseSIMDLdStPost<0, 0, opcode, 0b00, asm, - (outs am_simdnoindex:$wback), + (outs GPR64sp:$wback), (ins !cast(veclist # "8b"):$Vt, - am_simdnoindex:$vaddr, + GPR64sp:$Rn, !cast("GPR64pi" # Offset64):$Xm)>; def v4h_POST : BaseSIMDLdStPost<0, 0, opcode, 0b01, asm, - (outs am_simdnoindex:$wback), + (outs GPR64sp:$wback), (ins !cast(veclist # "4h"):$Vt, - am_simdnoindex:$vaddr, + GPR64sp:$Rn, !cast("GPR64pi" # Offset64):$Xm)>; def v2s_POST : BaseSIMDLdStPost<0, 0, opcode, 0b10, asm, - (outs am_simdnoindex:$wback), + (outs GPR64sp:$wback), (ins !cast(veclist # "2s"):$Vt, - am_simdnoindex:$vaddr, + GPR64sp:$Rn, !cast("GPR64pi" # Offset64):$Xm)>; } @@ -7805,12 +7915,12 @@ multiclass BaseSIMDLd1(veclist # "1d"):$Vt), - (ins am_simdnoindex:$vaddr), []>; + (ins GPR64sp:$Rn), []>; def v1d_POST : BaseSIMDLdStPost<0, 1, opcode, 0b11, asm, - (outs am_simdnoindex:$wback, + (outs GPR64sp:$wback, !cast(veclist # "1d"):$Vt), - (ins am_simdnoindex:$vaddr, + (ins GPR64sp:$Rn, !cast("GPR64pi" # Offset64):$Xm)>; } @@ -7825,12 +7935,12 @@ multiclass BaseSIMDSt1(veclist # "1d"):$Vt, - am_simdnoindex:$vaddr), []>; + GPR64sp:$Rn), []>; def v1d_POST : BaseSIMDLdStPost<0, 0, opcode, 0b11, asm, - (outs am_simdnoindex:$wback), + (outs GPR64sp:$wback), (ins !cast(veclist # "1d"):$Vt, - am_simdnoindex:$vaddr, + GPR64sp:$Rn, !cast("GPR64pi" # Offset64):$Xm)>; } @@ -7884,13 +7994,13 @@ class BaseSIMDLdStSingle opcode, dag oops, dag iops, list pattern> : I { bits<5> Vt; - bits<5> vaddr; + bits<5> Rn; let Inst{31} = 0; let Inst{29-24} = 0b001101; let Inst{22} = L; let Inst{21} = R; let Inst{15-13} = opcode; - let Inst{9-5} = vaddr; + let Inst{9-5} = Rn; let Inst{4-0} = Vt; } @@ -7899,13 +8009,13 @@ class BaseSIMDLdStSingleTied opcode, dag oops, dag iops, list pattern> : I { bits<5> Vt; - bits<5> vaddr; + bits<5> Rn; let Inst{31} = 0; let Inst{29-24} = 0b001101; let Inst{22} = L; let Inst{21} = R; let Inst{15-13} = opcode; - let Inst{9-5} = vaddr; + let Inst{9-5} = Rn; let Inst{4-0} = Vt; } @@ -7913,8 +8023,8 @@ class BaseSIMDLdStSingleTied opcode, let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in class BaseSIMDLdR opcode, bit S, bits<2> size, string asm, Operand listtype> - : BaseSIMDLdStSingle<1, R, opcode, asm, "\t$Vt, $vaddr", "", - (outs listtype:$Vt), (ins am_simdnoindex:$vaddr), + : BaseSIMDLdStSingle<1, R, opcode, asm, "\t$Vt, [$Rn]", "", + (outs listtype:$Vt), (ins GPR64sp:$Rn), []> { let Inst{30} = Q; let Inst{23} = 0; @@ -7925,10 +8035,10 @@ class BaseSIMDLdR opcode, bit S, bits<2> size, string asm, let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in class BaseSIMDLdRPost opcode, bit S, bits<2> size, string asm, Operand listtype, Operand GPR64pi> - : BaseSIMDLdStSingle<1, R, opcode, asm, "\t$Vt, $vaddr, $Xm", - "$vaddr = $wback", - (outs am_simdnoindex:$wback, listtype:$Vt), - (ins am_simdnoindex:$vaddr, GPR64pi:$Xm), []> { + : BaseSIMDLdStSingle<1, R, opcode, asm, "\t$Vt, [$Rn], $Xm", + "$Rn = $wback", + (outs GPR64sp:$wback, listtype:$Vt), + (ins GPR64sp:$Rn, GPR64pi:$Xm), []> { bits<5> Xm; let Inst{30} = Q; let Inst{23} = 1; @@ -7940,41 +8050,41 @@ class BaseSIMDLdRPost opcode, bit S, bits<2> size, multiclass SIMDLdrAliases { // E.g. "ld1r { v0.8b }, [x1], #1" - // "ld1r.8b\t$Vt, $vaddr, #1" + // "ld1r.8b\t$Vt, [$Rn], #1" // may get mapped to - // (LD1Rv8b_POST VecListOne8b:$Vt, am_simdnoindex:$vaddr, XZR) - def : InstAlias(NAME # "v" # layout # "_POST") - am_simdnoindex:$vaddr, + GPR64sp:$Rn, !cast("VecList" # Count # layout):$Vt, XZR), 1>; // E.g. "ld1r.8b { v0 }, [x1], #1" - // "ld1r.8b\t$Vt, $vaddr, #1" + // "ld1r.8b\t$Vt, [$Rn], #1" // may get mapped to - // (LD1Rv8b_POST VecListOne64:$Vt, am_simdnoindex:$vaddr, XZR) - def : InstAlias(NAME # "v" # layout # "_POST") - am_simdnoindex:$vaddr, + GPR64sp:$Rn, !cast("VecList" # Count # Size):$Vt, XZR), 0>; // E.g. "ld1r.8b { v0 }, [x1]" - // "ld1r.8b\t$Vt, $vaddr" + // "ld1r.8b\t$Vt, [$Rn]" // may get mapped to - // (LD1Rv8b VecListOne64:$Vt, am_simdnoindex:$vaddr) - def : InstAlias(NAME # "v" # layout) !cast("VecList" # Count # Size):$Vt, - am_simdnoindex:$vaddr), 0>; + GPR64sp:$Rn), 0>; // E.g. "ld1r.8b { v0 }, [x1], x2" - // "ld1r.8b\t$Vt, $vaddr, $Xm" + // "ld1r.8b\t$Vt, [$Rn], $Xm" // may get mapped to - // (LD1Rv8b_POST VecListOne64:$Vt, am_simdnoindex:$vaddr, GPR64pi1:$Xm) - def : InstAlias(NAME # "v" # layout # "_POST") - am_simdnoindex:$vaddr, + GPR64sp:$Rn, !cast("VecList" # Count # Size):$Vt, !cast("GPR64pi" # Offset):$Xm), 0>; } @@ -8035,7 +8145,7 @@ multiclass SIMDLdR opcode, bit S, string asm, string Count, class SIMDLdStSingleB opcode, string asm, dag oops, dag iops, list pattern> - : BaseSIMDLdStSingle { // idx encoded in Q:S:size fields. bits<4> idx; @@ -8047,7 +8157,7 @@ class SIMDLdStSingleB opcode, string asm, } class SIMDLdStSingleBTied opcode, string asm, dag oops, dag iops, list pattern> - : BaseSIMDLdStSingleTied { // idx encoded in Q:S:size fields. bits<4> idx; @@ -8059,8 +8169,8 @@ class SIMDLdStSingleBTied opcode, string asm, } class SIMDLdStSingleBPost opcode, string asm, dag oops, dag iops> - : BaseSIMDLdStSingle { + : BaseSIMDLdStSingle { // idx encoded in Q:S:size fields. bits<4> idx; bits<5> Xm; @@ -8072,8 +8182,8 @@ class SIMDLdStSingleBPost opcode, string asm, } class SIMDLdStSingleBTiedPost opcode, string asm, dag oops, dag iops> - : BaseSIMDLdStSingleTied { + : BaseSIMDLdStSingleTied { // idx encoded in Q:S:size fields. bits<4> idx; bits<5> Xm; @@ -8086,7 +8196,7 @@ class SIMDLdStSingleBTiedPost opcode, string asm, class SIMDLdStSingleH opcode, bit size, string asm, dag oops, dag iops, list pattern> - : BaseSIMDLdStSingle { // idx encoded in Q:S:size<1> fields. bits<3> idx; @@ -8099,7 +8209,7 @@ class SIMDLdStSingleH opcode, bit size, string asm, } class SIMDLdStSingleHTied opcode, bit size, string asm, dag oops, dag iops, list pattern> - : BaseSIMDLdStSingleTied { // idx encoded in Q:S:size<1> fields. bits<3> idx; @@ -8113,8 +8223,8 @@ class SIMDLdStSingleHTied opcode, bit size, string asm, class SIMDLdStSingleHPost opcode, bit size, string asm, dag oops, dag iops> - : BaseSIMDLdStSingle { + : BaseSIMDLdStSingle { // idx encoded in Q:S:size<1> fields. bits<3> idx; bits<5> Xm; @@ -8127,8 +8237,8 @@ class SIMDLdStSingleHPost opcode, bit size, string asm, } class SIMDLdStSingleHTiedPost opcode, bit size, string asm, dag oops, dag iops> - : BaseSIMDLdStSingleTied { + : BaseSIMDLdStSingleTied { // idx encoded in Q:S:size<1> fields. bits<3> idx; bits<5> Xm; @@ -8141,7 +8251,7 @@ class SIMDLdStSingleHTiedPost opcode, bit size, string asm } class SIMDLdStSingleS opcode, bits<2> size, string asm, dag oops, dag iops, list pattern> - : BaseSIMDLdStSingle { // idx encoded in Q:S fields. bits<2> idx; @@ -8153,7 +8263,7 @@ class SIMDLdStSingleS opcode, bits<2> size, string asm, } class SIMDLdStSingleSTied opcode, bits<2> size, string asm, dag oops, dag iops, list pattern> - : BaseSIMDLdStSingleTied { // idx encoded in Q:S fields. bits<2> idx; @@ -8165,8 +8275,8 @@ class SIMDLdStSingleSTied opcode, bits<2> size, string asm } class SIMDLdStSingleSPost opcode, bits<2> size, string asm, dag oops, dag iops> - : BaseSIMDLdStSingle { + : BaseSIMDLdStSingle { // idx encoded in Q:S fields. bits<2> idx; bits<5> Xm; @@ -8178,8 +8288,8 @@ class SIMDLdStSingleSPost opcode, bits<2> size, } class SIMDLdStSingleSTiedPost opcode, bits<2> size, string asm, dag oops, dag iops> - : BaseSIMDLdStSingleTied { + : BaseSIMDLdStSingleTied { // idx encoded in Q:S fields. bits<2> idx; bits<5> Xm; @@ -8191,7 +8301,7 @@ class SIMDLdStSingleSTiedPost opcode, bits<2> size, } class SIMDLdStSingleD opcode, bits<2> size, string asm, dag oops, dag iops, list pattern> - : BaseSIMDLdStSingle { // idx encoded in Q field. bits<1> idx; @@ -8203,7 +8313,7 @@ class SIMDLdStSingleD opcode, bits<2> size, string asm, } class SIMDLdStSingleDTied opcode, bits<2> size, string asm, dag oops, dag iops, list pattern> - : BaseSIMDLdStSingleTied { // idx encoded in Q field. bits<1> idx; @@ -8215,8 +8325,8 @@ class SIMDLdStSingleDTied opcode, bits<2> size, string asm } class SIMDLdStSingleDPost opcode, bits<2> size, string asm, dag oops, dag iops> - : BaseSIMDLdStSingle { + : BaseSIMDLdStSingle { // idx encoded in Q field. bits<1> idx; bits<5> Xm; @@ -8228,8 +8338,8 @@ class SIMDLdStSingleDPost opcode, bits<2> size, } class SIMDLdStSingleDTiedPost opcode, bits<2> size, string asm, dag oops, dag iops> - : BaseSIMDLdStSingleTied { + : BaseSIMDLdStSingleTied { // idx encoded in Q field. bits<1> idx; bits<5> Xm; @@ -8247,12 +8357,12 @@ multiclass SIMDLdSingleBTied opcode, string asm, def i8 : SIMDLdStSingleBTied<1, R, opcode, asm, (outs listtype:$dst), (ins listtype:$Vt, VectorIndexB:$idx, - am_simdnoindex:$vaddr), []>; + GPR64sp:$Rn), []>; def i8_POST : SIMDLdStSingleBTiedPost<1, R, opcode, asm, - (outs am_simdnoindex:$wback, listtype:$dst), + (outs GPR64sp:$wback, listtype:$dst), (ins listtype:$Vt, VectorIndexB:$idx, - am_simdnoindex:$vaddr, GPR64pi:$Xm)>; + GPR64sp:$Rn, GPR64pi:$Xm)>; } let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in multiclass SIMDLdSingleHTied opcode, bit size, string asm, @@ -8261,12 +8371,12 @@ multiclass SIMDLdSingleHTied opcode, bit size, string asm, def i16 : SIMDLdStSingleHTied<1, R, opcode, size, asm, (outs listtype:$dst), (ins listtype:$Vt, VectorIndexH:$idx, - am_simdnoindex:$vaddr), []>; + GPR64sp:$Rn), []>; def i16_POST : SIMDLdStSingleHTiedPost<1, R, opcode, size, asm, - (outs am_simdnoindex:$wback, listtype:$dst), + (outs GPR64sp:$wback, listtype:$dst), (ins listtype:$Vt, VectorIndexH:$idx, - am_simdnoindex:$vaddr, GPR64pi:$Xm)>; + GPR64sp:$Rn, GPR64pi:$Xm)>; } let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in multiclass SIMDLdSingleSTied opcode, bits<2> size,string asm, @@ -8275,12 +8385,12 @@ multiclass SIMDLdSingleSTied opcode, bits<2> size,string asm, def i32 : SIMDLdStSingleSTied<1, R, opcode, size, asm, (outs listtype:$dst), (ins listtype:$Vt, VectorIndexS:$idx, - am_simdnoindex:$vaddr), []>; + GPR64sp:$Rn), []>; def i32_POST : SIMDLdStSingleSTiedPost<1, R, opcode, size, asm, - (outs am_simdnoindex:$wback, listtype:$dst), + (outs GPR64sp:$wback, listtype:$dst), (ins listtype:$Vt, VectorIndexS:$idx, - am_simdnoindex:$vaddr, GPR64pi:$Xm)>; + GPR64sp:$Rn, GPR64pi:$Xm)>; } let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in multiclass SIMDLdSingleDTied opcode, bits<2> size, string asm, @@ -8288,100 +8398,100 @@ multiclass SIMDLdSingleDTied opcode, bits<2> size, string asm, def i64 : SIMDLdStSingleDTied<1, R, opcode, size, asm, (outs listtype:$dst), (ins listtype:$Vt, VectorIndexD:$idx, - am_simdnoindex:$vaddr), []>; + GPR64sp:$Rn), []>; def i64_POST : SIMDLdStSingleDTiedPost<1, R, opcode, size, asm, - (outs am_simdnoindex:$wback, listtype:$dst), + (outs GPR64sp:$wback, listtype:$dst), (ins listtype:$Vt, VectorIndexD:$idx, - am_simdnoindex:$vaddr, GPR64pi:$Xm)>; + GPR64sp:$Rn, GPR64pi:$Xm)>; } let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in multiclass SIMDStSingleB opcode, string asm, RegisterOperand listtype, RegisterOperand GPR64pi> { def i8 : SIMDLdStSingleB<0, R, opcode, asm, (outs), (ins listtype:$Vt, VectorIndexB:$idx, - am_simdnoindex:$vaddr), []>; + GPR64sp:$Rn), []>; def i8_POST : SIMDLdStSingleBPost<0, R, opcode, asm, - (outs am_simdnoindex:$wback), + (outs GPR64sp:$wback), (ins listtype:$Vt, VectorIndexB:$idx, - am_simdnoindex:$vaddr, GPR64pi:$Xm)>; + GPR64sp:$Rn, GPR64pi:$Xm)>; } let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in multiclass SIMDStSingleH opcode, bit size, string asm, RegisterOperand listtype, RegisterOperand GPR64pi> { def i16 : SIMDLdStSingleH<0, R, opcode, size, asm, (outs), (ins listtype:$Vt, VectorIndexH:$idx, - am_simdnoindex:$vaddr), []>; + GPR64sp:$Rn), []>; def i16_POST : SIMDLdStSingleHPost<0, R, opcode, size, asm, - (outs am_simdnoindex:$wback), + (outs GPR64sp:$wback), (ins listtype:$Vt, VectorIndexH:$idx, - am_simdnoindex:$vaddr, GPR64pi:$Xm)>; + GPR64sp:$Rn, GPR64pi:$Xm)>; } let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in multiclass SIMDStSingleS opcode, bits<2> size,string asm, RegisterOperand listtype, RegisterOperand GPR64pi> { def i32 : SIMDLdStSingleS<0, R, opcode, size, asm, (outs), (ins listtype:$Vt, VectorIndexS:$idx, - am_simdnoindex:$vaddr), []>; + GPR64sp:$Rn), []>; def i32_POST : SIMDLdStSingleSPost<0, R, opcode, size, asm, - (outs am_simdnoindex:$wback), + (outs GPR64sp:$wback), (ins listtype:$Vt, VectorIndexS:$idx, - am_simdnoindex:$vaddr, GPR64pi:$Xm)>; + GPR64sp:$Rn, GPR64pi:$Xm)>; } let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in multiclass SIMDStSingleD opcode, bits<2> size, string asm, RegisterOperand listtype, RegisterOperand GPR64pi> { def i64 : SIMDLdStSingleD<0, R, opcode, size, asm, (outs), (ins listtype:$Vt, VectorIndexD:$idx, - am_simdnoindex:$vaddr), []>; + GPR64sp:$Rn), []>; def i64_POST : SIMDLdStSingleDPost<0, R, opcode, size, asm, - (outs am_simdnoindex:$wback), + (outs GPR64sp:$wback), (ins listtype:$Vt, VectorIndexD:$idx, - am_simdnoindex:$vaddr, GPR64pi:$Xm)>; + GPR64sp:$Rn, GPR64pi:$Xm)>; } multiclass SIMDLdStSingleAliases { // E.g. "ld1 { v0.8b }[0], [x1], #1" - // "ld1\t$Vt, $vaddr, #1" + // "ld1\t$Vt, [$Rn], #1" // may get mapped to - // (LD1Rv8b_POST VecListOne8b:$Vt, am_simdnoindex:$vaddr, XZR) - def : InstAlias(NAME # Type # "_POST") - am_simdnoindex:$vaddr, + GPR64sp:$Rn, !cast("VecList" # Count # layout):$Vt, idxtype:$idx, XZR), 1>; // E.g. "ld1.8b { v0 }[0], [x1], #1" - // "ld1.8b\t$Vt, $vaddr, #1" + // "ld1.8b\t$Vt, [$Rn], #1" // may get mapped to - // (LD1Rv8b_POST VecListOne64:$Vt, am_simdnoindex:$vaddr, XZR) - def : InstAlias(NAME # Type # "_POST") - am_simdnoindex:$vaddr, + GPR64sp:$Rn, !cast("VecList" # Count # "128"):$Vt, idxtype:$idx, XZR), 0>; // E.g. "ld1.8b { v0 }[0], [x1]" - // "ld1.8b\t$Vt, $vaddr" + // "ld1.8b\t$Vt, [$Rn]" // may get mapped to - // (LD1Rv8b VecListOne64:$Vt, am_simdnoindex:$vaddr) - def : InstAlias(NAME # Type) !cast("VecList" # Count # "128"):$Vt, - idxtype:$idx, am_simdnoindex:$vaddr), 0>; + idxtype:$idx, GPR64sp:$Rn), 0>; // E.g. "ld1.8b { v0 }[0], [x1], x2" - // "ld1.8b\t$Vt, $vaddr, $Xm" + // "ld1.8b\t$Vt, [$Rn], $Xm" // may get mapped to - // (LD1Rv8b_POST VecListOne64:$Vt, am_simdnoindex:$vaddr, GPR64pi1:$Xm) - def : InstAlias(NAME # Type # "_POST") - am_simdnoindex:$vaddr, + GPR64sp:$Rn, !cast("VecList" # Count # "128"):$Vt, idxtype:$idx, !cast("GPR64pi" # Offset):$Xm), 0>; diff --git a/lib/Target/ARM64/ARM64InstrInfo.cpp b/lib/Target/ARM64/ARM64InstrInfo.cpp index 5643fb0ce2c8..e1f9667841c8 100644 --- a/lib/Target/ARM64/ARM64InstrInfo.cpp +++ b/lib/Target/ARM64/ARM64InstrInfo.cpp @@ -1039,29 +1039,53 @@ bool ARM64InstrInfo::isScaledAddr(const MachineInstr *MI) const { switch (MI->getOpcode()) { default: break; - case ARM64::LDRBBro: - case ARM64::LDRBro: - case ARM64::LDRDro: - case ARM64::LDRHHro: - case ARM64::LDRHro: - case ARM64::LDRQro: - case ARM64::LDRSBWro: - case ARM64::LDRSBXro: - case ARM64::LDRSHWro: - case ARM64::LDRSHXro: - case ARM64::LDRSWro: - case ARM64::LDRSro: - case ARM64::LDRWro: - case ARM64::LDRXro: - case ARM64::STRBBro: - case ARM64::STRBro: - case ARM64::STRDro: - case ARM64::STRHHro: - case ARM64::STRHro: - case ARM64::STRQro: - case ARM64::STRSro: - case ARM64::STRWro: - case ARM64::STRXro: + case ARM64::LDRBBroW: + case ARM64::LDRBroW: + case ARM64::LDRDroW: + case ARM64::LDRHHroW: + case ARM64::LDRHroW: + case ARM64::LDRQroW: + case ARM64::LDRSBWroW: + case ARM64::LDRSBXroW: + case ARM64::LDRSHWroW: + case ARM64::LDRSHXroW: + case ARM64::LDRSWroW: + case ARM64::LDRSroW: + case ARM64::LDRWroW: + case ARM64::LDRXroW: + case ARM64::STRBBroW: + case ARM64::STRBroW: + case ARM64::STRDroW: + case ARM64::STRHHroW: + case ARM64::STRHroW: + case ARM64::STRQroW: + case ARM64::STRSroW: + case ARM64::STRWroW: + case ARM64::STRXroW: + case ARM64::LDRBBroX: + case ARM64::LDRBroX: + case ARM64::LDRDroX: + case ARM64::LDRHHroX: + case ARM64::LDRHroX: + case ARM64::LDRQroX: + case ARM64::LDRSBWroX: + case ARM64::LDRSBXroX: + case ARM64::LDRSHWroX: + case ARM64::LDRSHXroX: + case ARM64::LDRSWroX: + case ARM64::LDRSroX: + case ARM64::LDRWroX: + case ARM64::LDRXroX: + case ARM64::STRBBroX: + case ARM64::STRBroX: + case ARM64::STRDroX: + case ARM64::STRHHroX: + case ARM64::STRHroX: + case ARM64::STRQroX: + case ARM64::STRSroX: + case ARM64::STRWroX: + case ARM64::STRXroX: + unsigned Val = MI->getOperand(3).getImm(); ARM64_AM::ShiftExtendType ExtType = ARM64_AM::getMemExtendType(Val); return (ExtType != ARM64_AM::UXTX) || ARM64_AM::getMemDoShift(Val); diff --git a/lib/Target/ARM64/ARM64InstrInfo.td b/lib/Target/ARM64/ARM64InstrInfo.td index 4c735c057a27..94a39c111224 100644 --- a/lib/Target/ARM64/ARM64InstrInfo.td +++ b/lib/Target/ARM64/ARM64InstrInfo.td @@ -1064,22 +1064,22 @@ def : InstAlias<"dcps3", (DCPS3 0)>; //===----------------------------------------------------------------------===// // Pair (indexed, offset) -def LDPWi : LoadPairOffset<0b00, 0, GPR32, am_indexed32simm7, "ldp">; -def LDPXi : LoadPairOffset<0b10, 0, GPR64, am_indexed64simm7, "ldp">; -def LDPSi : LoadPairOffset<0b00, 1, FPR32, am_indexed32simm7, "ldp">; -def LDPDi : LoadPairOffset<0b01, 1, FPR64, am_indexed64simm7, "ldp">; -def LDPQi : LoadPairOffset<0b10, 1, FPR128, am_indexed128simm7, "ldp">; +defm LDPW : LoadPairOffset<0b00, 0, GPR32, simm7s4, "ldp">; +defm LDPX : LoadPairOffset<0b10, 0, GPR64, simm7s8, "ldp">; +defm LDPS : LoadPairOffset<0b00, 1, FPR32, simm7s4, "ldp">; +defm LDPD : LoadPairOffset<0b01, 1, FPR64, simm7s8, "ldp">; +defm LDPQ : LoadPairOffset<0b10, 1, FPR128, simm7s16, "ldp">; -def LDPSWi : LoadPairOffset<0b01, 0, GPR64, am_indexed32simm7, "ldpsw">; +defm LDPSW : LoadPairOffset<0b01, 0, GPR64, simm7s4, "ldpsw">; // Pair (pre-indexed) -def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32, am_indexed32simm7_wb, "ldp">; -def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64, am_indexed64simm7_wb, "ldp">; -def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32, am_indexed32simm7_wb, "ldp">; -def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64, am_indexed64simm7_wb, "ldp">; -def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128, am_indexed128simm7_wb, "ldp">; +def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32, simm7s4, "ldp">; +def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64, simm7s8, "ldp">; +def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32, simm7s4, "ldp">; +def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64, simm7s8, "ldp">; +def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128, simm7s16, "ldp">; -def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64, am_indexed32simm7_wb, "ldpsw">; +def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64, simm7s4, "ldpsw">; // Pair (post-indexed) def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32, simm7s4, "ldp">; @@ -1092,259 +1092,351 @@ def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64, simm7s4, "ldpsw">; // Pair (no allocate) -def LDNPWi : LoadPairNoAlloc<0b00, 0, GPR32, am_indexed32simm7, "ldnp">; -def LDNPXi : LoadPairNoAlloc<0b10, 0, GPR64, am_indexed64simm7, "ldnp">; -def LDNPSi : LoadPairNoAlloc<0b00, 1, FPR32, am_indexed32simm7, "ldnp">; -def LDNPDi : LoadPairNoAlloc<0b01, 1, FPR64, am_indexed64simm7, "ldnp">; -def LDNPQi : LoadPairNoAlloc<0b10, 1, FPR128, am_indexed128simm7, "ldnp">; +defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32, simm7s4, "ldnp">; +defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64, simm7s8, "ldnp">; +defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32, simm7s4, "ldnp">; +defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64, simm7s8, "ldnp">; +defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128, simm7s16, "ldnp">; //--- // (register offset) //--- -let AddedComplexity = 10 in { // Integer -def LDRBBro : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", - [(set GPR32:$Rt, (zextloadi8 ro_indexed8:$addr))]>; -def LDRHHro : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", - [(set GPR32:$Rt, (zextloadi16 ro_indexed16:$addr))]>; -def LDRWro : Load32RO<0b10, 0, 0b01, GPR32, "ldr", - [(set GPR32:$Rt, (load ro_indexed32:$addr))]>; -def LDRXro : Load64RO<0b11, 0, 0b01, GPR64, "ldr", - [(set GPR64:$Rt, (load ro_indexed64:$addr))]>; +defm LDRBB : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", i32, zextloadi8>; +defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>; +defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>; +defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>; // Floating-point -def LDRBro : Load8RO<0b00, 1, 0b01, FPR8, "ldr", - [(set FPR8:$Rt, (load ro_indexed8:$addr))]>; -def LDRHro : Load16RO<0b01, 1, 0b01, FPR16, "ldr", - [(set (f16 FPR16:$Rt), (load ro_indexed16:$addr))]>; -def LDRSro : Load32RO<0b10, 1, 0b01, FPR32, "ldr", - [(set (f32 FPR32:$Rt), (load ro_indexed32:$addr))]>; -def LDRDro : Load64RO<0b11, 1, 0b01, FPR64, "ldr", - [(set (f64 FPR64:$Rt), (load ro_indexed64:$addr))]>; -def LDRQro : Load128RO<0b00, 1, 0b11, FPR128, "ldr", []> { - let mayLoad = 1; -} +defm LDRB : Load8RO<0b00, 1, 0b01, FPR8, "ldr", untyped, load>; +defm LDRH : Load16RO<0b01, 1, 0b01, FPR16, "ldr", f16, load>; +defm LDRS : Load32RO<0b10, 1, 0b01, FPR32, "ldr", f32, load>; +defm LDRD : Load64RO<0b11, 1, 0b01, FPR64, "ldr", f64, load>; +defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128, "ldr", f128, load>; + +// Load sign-extended half-word +defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>; +defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>; + +// Load sign-extended byte +defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>; +defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>; + +// Load sign-extended word +defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>; + +// Pre-fetch. +defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">; // For regular load, we do not have any alignment requirement. // Thus, it is safe to directly map the vector loads with interesting // addressing modes. // FIXME: We could do the same for bitconvert to floating point vectors. -def : Pat <(v8i8 (scalar_to_vector (i32 (extloadi8 ro_indexed8:$addr)))), - (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), - (LDRBro ro_indexed8:$addr), bsub)>; -def : Pat <(v16i8 (scalar_to_vector (i32 (extloadi8 ro_indexed8:$addr)))), - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (LDRBro ro_indexed8:$addr), bsub)>; -def : Pat <(v4i16 (scalar_to_vector (i32 (extloadi16 ro_indexed16:$addr)))), - (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), - (LDRHro ro_indexed16:$addr), hsub)>; -def : Pat <(v8i16 (scalar_to_vector (i32 (extloadi16 ro_indexed16:$addr)))), - (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), - (LDRHro ro_indexed16:$addr), hsub)>; -def : Pat <(v2i32 (scalar_to_vector (i32 (load ro_indexed32:$addr)))), - (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), - (LDRSro ro_indexed32:$addr), ssub)>; -def : Pat <(v4i32 (scalar_to_vector (i32 (load ro_indexed32:$addr)))), - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), - (LDRSro ro_indexed32:$addr), ssub)>; -def : Pat <(v1i64 (scalar_to_vector (i64 (load ro_indexed64:$addr)))), - (LDRDro ro_indexed64:$addr)>; -def : Pat <(v2i64 (scalar_to_vector (i64 (load ro_indexed64:$addr)))), - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), - (LDRDro ro_indexed64:$addr), dsub)>; +multiclass ScalToVecROLoadPat { + def : Pat<(VecTy (scalar_to_vector (ScalTy + (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))), + (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), + (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset), + sub)>; + + def : Pat<(VecTy (scalar_to_vector (ScalTy + (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))), + (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), + (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset), + sub)>; +} + +let AddedComplexity = 10 in { +defm : ScalToVecROLoadPat; +defm : ScalToVecROLoadPat; + +defm : ScalToVecROLoadPat; +defm : ScalToVecROLoadPat; + +defm : ScalToVecROLoadPat; +defm : ScalToVecROLoadPat; + +defm : ScalToVecROLoadPat; +defm : ScalToVecROLoadPat; + +defm : ScalToVecROLoadPat; + +defm : ScalToVecROLoadPat; + + +def : Pat <(v1i64 (scalar_to_vector (i64 + (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend64:$extend))))), + (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; + +def : Pat <(v1i64 (scalar_to_vector (i64 + (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend64:$extend))))), + (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; +} // Match all load 64 bits width whose type is compatible with FPR64 +multiclass VecROLoadPat { + + def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), + (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; + + def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), + (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; +} + +let AddedComplexity = 10 in { let Predicates = [IsLE] in { // We must do vector loads with LD1 in big-endian. - def : Pat<(v2f32 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>; - def : Pat<(v8i8 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>; - def : Pat<(v4i16 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>; - def : Pat<(v2i32 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>; + defm : VecROLoadPat; + defm : VecROLoadPat; + defm : VecROLoadPat; + defm : VecROLoadPat; } -def : Pat<(v1f64 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>; -def : Pat<(v1i64 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>; + +defm : VecROLoadPat; +defm : VecROLoadPat; // Match all load 128 bits width whose type is compatible with FPR128 let Predicates = [IsLE] in { // We must do vector loads with LD1 in big-endian. - def : Pat<(v4f32 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>; - def : Pat<(v2f64 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>; - def : Pat<(v16i8 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>; - def : Pat<(v8i16 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>; - def : Pat<(v4i32 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>; - def : Pat<(v2i64 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>; + defm : VecROLoadPat; + defm : VecROLoadPat; + defm : VecROLoadPat; + defm : VecROLoadPat; + defm : VecROLoadPat; + defm : VecROLoadPat; } -def : Pat<(f128 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>; +} // AddedComplexity = 10 -// Load sign-extended half-word -def LDRSHWro : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", - [(set GPR32:$Rt, (sextloadi16 ro_indexed16:$addr))]>; -def LDRSHXro : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", - [(set GPR64:$Rt, (sextloadi16 ro_indexed16:$addr))]>; +// zextload -> i64 +multiclass ExtLoadTo64ROPat { + def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), + (SUBREG_TO_REG (i64 0), + (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), + sub_32)>; + + def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), + (SUBREG_TO_REG (i64 0), + (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), + sub_32)>; +} -// Load sign-extended byte -def LDRSBWro : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", - [(set GPR32:$Rt, (sextloadi8 ro_indexed8:$addr))]>; -def LDRSBXro : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", - [(set GPR64:$Rt, (sextloadi8 ro_indexed8:$addr))]>; +let AddedComplexity = 10 in { + defm : ExtLoadTo64ROPat; + defm : ExtLoadTo64ROPat; + defm : ExtLoadTo64ROPat; -// Load sign-extended word -def LDRSWro : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", - [(set GPR64:$Rt, (sextloadi32 ro_indexed32:$addr))]>; + // zextloadi1 -> zextloadi8 + defm : ExtLoadTo64ROPat; + + // extload -> zextload + defm : ExtLoadTo64ROPat; + defm : ExtLoadTo64ROPat; + defm : ExtLoadTo64ROPat; + + // extloadi1 -> zextloadi8 + defm : ExtLoadTo64ROPat; +} -// Pre-fetch. -def PRFMro : PrefetchRO<0b11, 0, 0b10, "prfm", - [(ARM64Prefetch imm:$Rt, ro_indexed64:$addr)]>; // zextload -> i64 -def : Pat<(i64 (zextloadi8 ro_indexed8:$addr)), - (SUBREG_TO_REG (i64 0), (LDRBBro ro_indexed8:$addr), sub_32)>; -def : Pat<(i64 (zextloadi16 ro_indexed16:$addr)), - (SUBREG_TO_REG (i64 0), (LDRHHro ro_indexed16:$addr), sub_32)>; -def : Pat<(i64 (zextloadi32 ro_indexed32:$addr)), - (SUBREG_TO_REG (i64 0), (LDRWro ro_indexed32:$addr), sub_32)>; +multiclass ExtLoadTo32ROPat { + def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), + (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; -// zextloadi1 -> zextloadi8 -def : Pat<(i32 (zextloadi1 ro_indexed8:$addr)), (LDRBBro ro_indexed8:$addr)>; -def : Pat<(i64 (zextloadi1 ro_indexed8:$addr)), - (SUBREG_TO_REG (i64 0), (LDRBBro ro_indexed8:$addr), sub_32)>; + def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), + (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; -// extload -> zextload -def : Pat<(i32 (extloadi16 ro_indexed16:$addr)), (LDRHHro ro_indexed16:$addr)>; -def : Pat<(i32 (extloadi8 ro_indexed8:$addr)), (LDRBBro ro_indexed8:$addr)>; -def : Pat<(i32 (extloadi1 ro_indexed8:$addr)), (LDRBBro ro_indexed8:$addr)>; -def : Pat<(i64 (extloadi32 ro_indexed32:$addr)), - (SUBREG_TO_REG (i64 0), (LDRWro ro_indexed32:$addr), sub_32)>; -def : Pat<(i64 (extloadi16 ro_indexed16:$addr)), - (SUBREG_TO_REG (i64 0), (LDRHHro ro_indexed16:$addr), sub_32)>; -def : Pat<(i64 (extloadi8 ro_indexed8:$addr)), - (SUBREG_TO_REG (i64 0), (LDRBBro ro_indexed8:$addr), sub_32)>; -def : Pat<(i64 (extloadi1 ro_indexed8:$addr)), - (SUBREG_TO_REG (i64 0), (LDRBBro ro_indexed8:$addr), sub_32)>; +} -} // AddedComplexity = 10 +let AddedComplexity = 10 in { + // extload -> zextload + defm : ExtLoadTo32ROPat; + defm : ExtLoadTo32ROPat; + defm : ExtLoadTo32ROPat; + + // zextloadi1 -> zextloadi8 + defm : ExtLoadTo32ROPat; +} //--- // (unsigned immediate) //--- -def LDRXui : LoadUI<0b11, 0, 0b01, GPR64, am_indexed64, "ldr", - [(set GPR64:$Rt, (load am_indexed64:$addr))]>; -def LDRWui : LoadUI<0b10, 0, 0b01, GPR32, am_indexed32, "ldr", - [(set GPR32:$Rt, (load am_indexed32:$addr))]>; -def LDRBui : LoadUI<0b00, 1, 0b01, FPR8, am_indexed8, "ldr", - [(set FPR8:$Rt, (load am_indexed8:$addr))]>; -def LDRHui : LoadUI<0b01, 1, 0b01, FPR16, am_indexed16, "ldr", - [(set (f16 FPR16:$Rt), (load am_indexed16:$addr))]>; -def LDRSui : LoadUI<0b10, 1, 0b01, FPR32, am_indexed32, "ldr", - [(set (f32 FPR32:$Rt), (load am_indexed32:$addr))]>; -def LDRDui : LoadUI<0b11, 1, 0b01, FPR64, am_indexed64, "ldr", - [(set (f64 FPR64:$Rt), (load am_indexed64:$addr))]>; -def LDRQui : LoadUI<0b00, 1, 0b11, FPR128, am_indexed128, "ldr", - [(set (f128 FPR128:$Rt), (load am_indexed128:$addr))]>; +defm LDRX : LoadUI<0b11, 0, 0b01, GPR64, uimm12s8, "ldr", + [(set GPR64:$Rt, + (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; +defm LDRW : LoadUI<0b10, 0, 0b01, GPR32, uimm12s4, "ldr", + [(set GPR32:$Rt, + (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; +defm LDRB : LoadUI<0b00, 1, 0b01, FPR8, uimm12s1, "ldr", + [(set FPR8:$Rt, + (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>; +defm LDRH : LoadUI<0b01, 1, 0b01, FPR16, uimm12s2, "ldr", + [(set (f16 FPR16:$Rt), + (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>; +defm LDRS : LoadUI<0b10, 1, 0b01, FPR32, uimm12s4, "ldr", + [(set (f32 FPR32:$Rt), + (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; +defm LDRD : LoadUI<0b11, 1, 0b01, FPR64, uimm12s8, "ldr", + [(set (f64 FPR64:$Rt), + (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; +defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128, uimm12s16, "ldr", + [(set (f128 FPR128:$Rt), + (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>; // For regular load, we do not have any alignment requirement. // Thus, it is safe to directly map the vector loads with interesting // addressing modes. // FIXME: We could do the same for bitconvert to floating point vectors. -def : Pat <(v8i8 (scalar_to_vector (i32 (extloadi8 am_indexed8:$addr)))), +def : Pat <(v8i8 (scalar_to_vector (i32 + (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), - (LDRBui am_indexed8:$addr), bsub)>; -def : Pat <(v16i8 (scalar_to_vector (i32 (extloadi8 am_indexed8:$addr)))), + (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; +def : Pat <(v16i8 (scalar_to_vector (i32 + (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (LDRBui am_indexed8:$addr), bsub)>; -def : Pat <(v4i16 (scalar_to_vector (i32 (extloadi16 am_indexed16:$addr)))), + (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; +def : Pat <(v4i16 (scalar_to_vector (i32 + (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), - (LDRHui am_indexed16:$addr), hsub)>; -def : Pat <(v8i16 (scalar_to_vector (i32 (extloadi16 am_indexed16:$addr)))), + (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; +def : Pat <(v8i16 (scalar_to_vector (i32 + (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), - (LDRHui am_indexed16:$addr), hsub)>; -def : Pat <(v2i32 (scalar_to_vector (i32 (load am_indexed32:$addr)))), + (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; +def : Pat <(v2i32 (scalar_to_vector (i32 + (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), - (LDRSui am_indexed32:$addr), ssub)>; -def : Pat <(v4i32 (scalar_to_vector (i32 (load am_indexed32:$addr)))), + (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; +def : Pat <(v4i32 (scalar_to_vector (i32 + (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), - (LDRSui am_indexed32:$addr), ssub)>; -def : Pat <(v1i64 (scalar_to_vector (i64 (load am_indexed64:$addr)))), - (LDRDui am_indexed64:$addr)>; -def : Pat <(v2i64 (scalar_to_vector (i64 (load am_indexed64:$addr)))), + (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; +def : Pat <(v1i64 (scalar_to_vector (i64 + (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; +def : Pat <(v2i64 (scalar_to_vector (i64 + (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), - (LDRDui am_indexed64:$addr), dsub)>; + (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>; // Match all load 64 bits width whose type is compatible with FPR64 let Predicates = [IsLE] in { // We must use LD1 to perform vector loads in big-endian. - def : Pat<(v2f32 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>; - def : Pat<(v8i8 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>; - def : Pat<(v4i16 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>; - def : Pat<(v2i32 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>; + def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; } -def : Pat<(v1f64 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>; -def : Pat<(v1i64 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>; +def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; +def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; // Match all load 128 bits width whose type is compatible with FPR128 let Predicates = [IsLE] in { // We must use LD1 to perform vector loads in big-endian. - def : Pat<(v4f32 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>; - def : Pat<(v2f64 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>; - def : Pat<(v16i8 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>; - def : Pat<(v8i16 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>; - def : Pat<(v4i32 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>; - def : Pat<(v2i64 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>; + def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; } -def : Pat<(f128 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>; - -def LDRHHui : LoadUI<0b01, 0, 0b01, GPR32, am_indexed16, "ldrh", - [(set GPR32:$Rt, (zextloadi16 am_indexed16:$addr))]>; -def LDRBBui : LoadUI<0b00, 0, 0b01, GPR32, am_indexed8, "ldrb", - [(set GPR32:$Rt, (zextloadi8 am_indexed8:$addr))]>; +def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + +defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh", + [(set GPR32:$Rt, + (zextloadi16 (am_indexed16 GPR64sp:$Rn, + uimm12s2:$offset)))]>; +defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb", + [(set GPR32:$Rt, + (zextloadi8 (am_indexed8 GPR64sp:$Rn, + uimm12s1:$offset)))]>; // zextload -> i64 -def : Pat<(i64 (zextloadi8 am_indexed8:$addr)), - (SUBREG_TO_REG (i64 0), (LDRBBui am_indexed8:$addr), sub_32)>; -def : Pat<(i64 (zextloadi16 am_indexed16:$addr)), - (SUBREG_TO_REG (i64 0), (LDRHHui am_indexed16:$addr), sub_32)>; +def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; +def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), + (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; // zextloadi1 -> zextloadi8 -def : Pat<(i32 (zextloadi1 am_indexed8:$addr)), (LDRBBui am_indexed8:$addr)>; -def : Pat<(i64 (zextloadi1 am_indexed8:$addr)), - (SUBREG_TO_REG (i64 0), (LDRBBui am_indexed8:$addr), sub_32)>; +def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; +def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; // extload -> zextload -def : Pat<(i32 (extloadi16 am_indexed16:$addr)), (LDRHHui am_indexed16:$addr)>; -def : Pat<(i32 (extloadi8 am_indexed8:$addr)), (LDRBBui am_indexed8:$addr)>; -def : Pat<(i32 (extloadi1 am_indexed8:$addr)), (LDRBBui am_indexed8:$addr)>; -def : Pat<(i64 (extloadi32 am_indexed32:$addr)), - (SUBREG_TO_REG (i64 0), (LDRWui am_indexed32:$addr), sub_32)>; -def : Pat<(i64 (extloadi16 am_indexed16:$addr)), - (SUBREG_TO_REG (i64 0), (LDRHHui am_indexed16:$addr), sub_32)>; -def : Pat<(i64 (extloadi8 am_indexed8:$addr)), - (SUBREG_TO_REG (i64 0), (LDRBBui am_indexed8:$addr), sub_32)>; -def : Pat<(i64 (extloadi1 am_indexed8:$addr)), - (SUBREG_TO_REG (i64 0), (LDRBBui am_indexed8:$addr), sub_32)>; +def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), + (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; +def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; +def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; +def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), + (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; +def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), + (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; +def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; +def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; // load sign-extended half-word -def LDRSHWui : LoadUI<0b01, 0, 0b11, GPR32, am_indexed16, "ldrsh", - [(set GPR32:$Rt, (sextloadi16 am_indexed16:$addr))]>; -def LDRSHXui : LoadUI<0b01, 0, 0b10, GPR64, am_indexed16, "ldrsh", - [(set GPR64:$Rt, (sextloadi16 am_indexed16:$addr))]>; +defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh", + [(set GPR32:$Rt, + (sextloadi16 (am_indexed16 GPR64sp:$Rn, + uimm12s2:$offset)))]>; +defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh", + [(set GPR64:$Rt, + (sextloadi16 (am_indexed16 GPR64sp:$Rn, + uimm12s2:$offset)))]>; // load sign-extended byte -def LDRSBWui : LoadUI<0b00, 0, 0b11, GPR32, am_indexed8, "ldrsb", - [(set GPR32:$Rt, (sextloadi8 am_indexed8:$addr))]>; -def LDRSBXui : LoadUI<0b00, 0, 0b10, GPR64, am_indexed8, "ldrsb", - [(set GPR64:$Rt, (sextloadi8 am_indexed8:$addr))]>; +defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb", + [(set GPR32:$Rt, + (sextloadi8 (am_indexed8 GPR64sp:$Rn, + uimm12s1:$offset)))]>; +defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb", + [(set GPR64:$Rt, + (sextloadi8 (am_indexed8 GPR64sp:$Rn, + uimm12s1:$offset)))]>; // load sign-extended word -def LDRSWui : LoadUI<0b10, 0, 0b10, GPR64, am_indexed32, "ldrsw", - [(set GPR64:$Rt, (sextloadi32 am_indexed32:$addr))]>; +defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw", + [(set GPR64:$Rt, + (sextloadi32 (am_indexed32 GPR64sp:$Rn, + uimm12s4:$offset)))]>; // load zero-extended word -def : Pat<(i64 (zextloadi32 am_indexed32:$addr)), - (SUBREG_TO_REG (i64 0), (LDRWui am_indexed32:$addr), sub_32)>; +def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), + (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; // Pre-fetch. def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm", - [(ARM64Prefetch imm:$Rt, am_indexed64:$addr)]>; + [(ARM64Prefetch imm:$Rt, + (am_indexed64 GPR64sp:$Rn, + uimm12s8:$offset))]>; + +def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>; //--- // (literal) @@ -1363,76 +1455,99 @@ def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>; //--- // (unscaled immediate) -def LDURXi : LoadUnscaled<0b11, 0, 0b01, GPR64, am_unscaled64, "ldur", - [(set GPR64:$Rt, (load am_unscaled64:$addr))]>; -def LDURWi : LoadUnscaled<0b10, 0, 0b01, GPR32, am_unscaled32, "ldur", - [(set GPR32:$Rt, (load am_unscaled32:$addr))]>; -def LDURBi : LoadUnscaled<0b00, 1, 0b01, FPR8, am_unscaled8, "ldur", - [(set FPR8:$Rt, (load am_unscaled8:$addr))]>; -def LDURHi : LoadUnscaled<0b01, 1, 0b01, FPR16, am_unscaled16, "ldur", - [(set (f16 FPR16:$Rt), (load am_unscaled16:$addr))]>; -def LDURSi : LoadUnscaled<0b10, 1, 0b01, FPR32, am_unscaled32, "ldur", - [(set (f32 FPR32:$Rt), (load am_unscaled32:$addr))]>; -def LDURDi : LoadUnscaled<0b11, 1, 0b01, FPR64, am_unscaled64, "ldur", - [(set (f64 FPR64:$Rt), (load am_unscaled64:$addr))]>; -def LDURQi : LoadUnscaled<0b00, 1, 0b11, FPR128, am_unscaled128, "ldur", - [(set (f128 FPR128:$Rt), (load am_unscaled128:$addr))]>; - -def LDURHHi - : LoadUnscaled<0b01, 0, 0b01, GPR32, am_unscaled16, "ldurh", - [(set GPR32:$Rt, (zextloadi16 am_unscaled16:$addr))]>; -def LDURBBi - : LoadUnscaled<0b00, 0, 0b01, GPR32, am_unscaled8, "ldurb", - [(set GPR32:$Rt, (zextloadi8 am_unscaled8:$addr))]>; +defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64, "ldur", + [(set GPR64:$Rt, + (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32, "ldur", + [(set GPR32:$Rt, + (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8, "ldur", + [(set FPR8:$Rt, + (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16, "ldur", + [(set FPR16:$Rt, + (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32, "ldur", + [(set (f32 FPR32:$Rt), + (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64, "ldur", + [(set (f64 FPR64:$Rt), + (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128, "ldur", + [(set (f128 FPR128:$Rt), + (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>; + +defm LDURHH + : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh", + [(set GPR32:$Rt, + (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURBB + : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb", + [(set GPR32:$Rt, + (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; // Match all load 64 bits width whose type is compatible with FPR64 let Predicates = [IsLE] in { - def : Pat<(v2f32 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>; - def : Pat<(v8i8 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>; - def : Pat<(v4i16 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>; - def : Pat<(v2i32 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>; + def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; } -def : Pat<(v1f64 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>; -def : Pat<(v1i64 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>; +def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; // Match all load 128 bits width whose type is compatible with FPR128 let Predicates = [IsLE] in { - def : Pat<(v4f32 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>; - def : Pat<(v2f64 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>; - def : Pat<(v16i8 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>; - def : Pat<(v8i16 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>; - def : Pat<(v4i32 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>; - def : Pat<(v2i64 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>; - def : Pat<(v2f64 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>; + def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; } // anyext -> zext -def : Pat<(i32 (extloadi16 am_unscaled16:$addr)), (LDURHHi am_unscaled16:$addr)>; -def : Pat<(i32 (extloadi8 am_unscaled8:$addr)), (LDURBBi am_unscaled8:$addr)>; -def : Pat<(i32 (extloadi1 am_unscaled8:$addr)), (LDURBBi am_unscaled8:$addr)>; -def : Pat<(i64 (extloadi32 am_unscaled32:$addr)), - (SUBREG_TO_REG (i64 0), (LDURWi am_unscaled32:$addr), sub_32)>; -def : Pat<(i64 (extloadi16 am_unscaled16:$addr)), - (SUBREG_TO_REG (i64 0), (LDURHHi am_unscaled16:$addr), sub_32)>; -def : Pat<(i64 (extloadi8 am_unscaled8:$addr)), - (SUBREG_TO_REG (i64 0), (LDURBBi am_unscaled8:$addr), sub_32)>; -def : Pat<(i64 (extloadi1 am_unscaled8:$addr)), - (SUBREG_TO_REG (i64 0), (LDURBBi am_unscaled8:$addr), sub_32)>; +def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), + (LDURHHi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (LDURBBi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (LDURBBi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; // unscaled zext -def : Pat<(i32 (zextloadi16 am_unscaled16:$addr)), - (LDURHHi am_unscaled16:$addr)>; -def : Pat<(i32 (zextloadi8 am_unscaled8:$addr)), - (LDURBBi am_unscaled8:$addr)>; -def : Pat<(i32 (zextloadi1 am_unscaled8:$addr)), - (LDURBBi am_unscaled8:$addr)>; -def : Pat<(i64 (zextloadi32 am_unscaled32:$addr)), - (SUBREG_TO_REG (i64 0), (LDURWi am_unscaled32:$addr), sub_32)>; -def : Pat<(i64 (zextloadi16 am_unscaled16:$addr)), - (SUBREG_TO_REG (i64 0), (LDURHHi am_unscaled16:$addr), sub_32)>; -def : Pat<(i64 (zextloadi8 am_unscaled8:$addr)), - (SUBREG_TO_REG (i64 0), (LDURBBi am_unscaled8:$addr), sub_32)>; -def : Pat<(i64 (zextloadi1 am_unscaled8:$addr)), - (SUBREG_TO_REG (i64 0), (LDURBBi am_unscaled8:$addr), sub_32)>; +def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), + (LDURHHi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (LDURBBi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (LDURBBi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; //--- @@ -1442,119 +1557,122 @@ def : Pat<(i64 (zextloadi1 am_unscaled8:$addr)), // the don't otherwise match the scaled addressing mode for LDR/STR. Don't // associate a DiagnosticType either, as we want the diagnostic for the // canonical form (the scaled operand) to take precedence. -def MemoryUnscaledFB8Operand : AsmOperandClass { - let Name = "MemoryUnscaledFB8"; - let RenderMethod = "addMemoryUnscaledOperands"; -} -def MemoryUnscaledFB16Operand : AsmOperandClass { - let Name = "MemoryUnscaledFB16"; - let RenderMethod = "addMemoryUnscaledOperands"; +class SImm9OffsetOperand : AsmOperandClass { + let Name = "SImm9OffsetFB" # Width; + let PredicateMethod = "isSImm9OffsetFB<" # Width # ">"; + let RenderMethod = "addImmOperands"; } -def MemoryUnscaledFB32Operand : AsmOperandClass { - let Name = "MemoryUnscaledFB32"; - let RenderMethod = "addMemoryUnscaledOperands"; -} -def MemoryUnscaledFB64Operand : AsmOperandClass { - let Name = "MemoryUnscaledFB64"; - let RenderMethod = "addMemoryUnscaledOperands"; -} -def MemoryUnscaledFB128Operand : AsmOperandClass { - let Name = "MemoryUnscaledFB128"; - let RenderMethod = "addMemoryUnscaledOperands"; -} -def am_unscaled_fb8 : Operand { - let ParserMatchClass = MemoryUnscaledFB8Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); + +def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>; +def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>; +def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>; +def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>; +def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>; + +def simm9_offset_fb8 : Operand { + let ParserMatchClass = SImm9OffsetFB8Operand; } -def am_unscaled_fb16 : Operand { - let ParserMatchClass = MemoryUnscaledFB16Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); +def simm9_offset_fb16 : Operand { + let ParserMatchClass = SImm9OffsetFB16Operand; } -def am_unscaled_fb32 : Operand { - let ParserMatchClass = MemoryUnscaledFB32Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); +def simm9_offset_fb32 : Operand { + let ParserMatchClass = SImm9OffsetFB32Operand; } -def am_unscaled_fb64 : Operand { - let ParserMatchClass = MemoryUnscaledFB64Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); +def simm9_offset_fb64 : Operand { + let ParserMatchClass = SImm9OffsetFB64Operand; } -def am_unscaled_fb128 : Operand { - let ParserMatchClass = MemoryUnscaledFB128Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); +def simm9_offset_fb128 : Operand { + let ParserMatchClass = SImm9OffsetFB128Operand; } -def : InstAlias<"ldr $Rt, $addr", (LDURXi GPR64:$Rt, am_unscaled_fb64:$addr), 0>; -def : InstAlias<"ldr $Rt, $addr", (LDURWi GPR32:$Rt, am_unscaled_fb32:$addr), 0>; -def : InstAlias<"ldr $Rt, $addr", (LDURBi FPR8:$Rt, am_unscaled_fb8:$addr), 0>; -def : InstAlias<"ldr $Rt, $addr", (LDURHi FPR16:$Rt, am_unscaled_fb16:$addr), 0>; -def : InstAlias<"ldr $Rt, $addr", (LDURSi FPR32:$Rt, am_unscaled_fb32:$addr), 0>; -def : InstAlias<"ldr $Rt, $addr", (LDURDi FPR64:$Rt, am_unscaled_fb64:$addr), 0>; -def : InstAlias<"ldr $Rt, $addr", (LDURQi FPR128:$Rt, am_unscaled_fb128:$addr), 0>; + +// FIXME: these don't work +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURBi FPR8:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURHi FPR16:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURSi FPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURDi FPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURQi FPR128:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; // zextload -> i64 -def : Pat<(i64 (zextloadi8 am_unscaled8:$addr)), - (SUBREG_TO_REG (i64 0), (LDURBBi am_unscaled8:$addr), sub_32)>; -def : Pat<(i64 (zextloadi16 am_unscaled16:$addr)), - (SUBREG_TO_REG (i64 0), (LDURHHi am_unscaled16:$addr), sub_32)>; +def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; // load sign-extended half-word -def LDURSHWi - : LoadUnscaled<0b01, 0, 0b11, GPR32, am_unscaled16, "ldursh", - [(set GPR32:$Rt, (sextloadi16 am_unscaled16:$addr))]>; -def LDURSHXi - : LoadUnscaled<0b01, 0, 0b10, GPR64, am_unscaled16, "ldursh", - [(set GPR64:$Rt, (sextloadi16 am_unscaled16:$addr))]>; +defm LDURSHW + : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh", + [(set GPR32:$Rt, + (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURSHX + : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh", + [(set GPR64:$Rt, + (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; // load sign-extended byte -def LDURSBWi - : LoadUnscaled<0b00, 0, 0b11, GPR32, am_unscaled8, "ldursb", - [(set GPR32:$Rt, (sextloadi8 am_unscaled8:$addr))]>; -def LDURSBXi - : LoadUnscaled<0b00, 0, 0b10, GPR64, am_unscaled8, "ldursb", - [(set GPR64:$Rt, (sextloadi8 am_unscaled8:$addr))]>; +defm LDURSBW + : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb", + [(set GPR32:$Rt, + (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURSBX + : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb", + [(set GPR64:$Rt, + (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; // load sign-extended word -def LDURSWi - : LoadUnscaled<0b10, 0, 0b10, GPR64, am_unscaled32, "ldursw", - [(set GPR64:$Rt, (sextloadi32 am_unscaled32:$addr))]>; +defm LDURSW + : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw", + [(set GPR64:$Rt, + (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; // zero and sign extending aliases from generic LDR* mnemonics to LDUR*. -def : InstAlias<"ldrb $Rt, $addr", - (LDURBBi GPR32:$Rt, am_unscaled_fb8:$addr), 0>; -def : InstAlias<"ldrh $Rt, $addr", - (LDURHHi GPR32:$Rt, am_unscaled_fb16:$addr), 0>; -def : InstAlias<"ldrsb $Rt, $addr", - (LDURSBWi GPR32:$Rt, am_unscaled_fb8:$addr), 0>; -def : InstAlias<"ldrsb $Rt, $addr", - (LDURSBXi GPR64:$Rt, am_unscaled_fb8:$addr), 0>; -def : InstAlias<"ldrsh $Rt, $addr", - (LDURSHWi GPR32:$Rt, am_unscaled_fb16:$addr), 0>; -def : InstAlias<"ldrsh $Rt, $addr", - (LDURSHXi GPR64:$Rt, am_unscaled_fb16:$addr), 0>; -def : InstAlias<"ldrsw $Rt, $addr", - (LDURSWi GPR64:$Rt, am_unscaled_fb32:$addr), 0>; +// FIXME: these don't work now +def : InstAlias<"ldrb $Rt, [$Rn, $offset]", + (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; +def : InstAlias<"ldrh $Rt, [$Rn, $offset]", + (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; +def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", + (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; +def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", + (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; +def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", + (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; +def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", + (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; +def : InstAlias<"ldrsw $Rt, [$Rn, $offset]", + (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; // Pre-fetch. -def PRFUMi : PrefetchUnscaled<0b11, 0, 0b10, "prfum", - [(ARM64Prefetch imm:$Rt, am_unscaled64:$addr)]>; +defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum", + [(ARM64Prefetch imm:$Rt, + (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; //--- // (unscaled immediate, unprivileged) -def LDTRXi : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">; -def LDTRWi : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">; +defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">; +defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">; -def LDTRHi : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">; -def LDTRBi : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">; +defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">; +defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">; // load sign-extended half-word -def LDTRSHWi : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">; -def LDTRSHXi : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">; +defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">; +defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">; // load sign-extended byte -def LDTRSBWi : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">; -def LDTRSBXi : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">; +defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">; +defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">; // load sign-extended word -def LDTRSWi : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">; +defm LDTRSW : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">; //--- // (immediate pre-indexed) @@ -1642,18 +1760,18 @@ def LDRSBXpost_isel : LoadPostIdxPseudo; // Pair (indexed, offset) // FIXME: Use dedicated range-checked addressing mode operand here. -def STPWi : StorePairOffset<0b00, 0, GPR32, am_indexed32simm7, "stp">; -def STPXi : StorePairOffset<0b10, 0, GPR64, am_indexed64simm7, "stp">; -def STPSi : StorePairOffset<0b00, 1, FPR32, am_indexed32simm7, "stp">; -def STPDi : StorePairOffset<0b01, 1, FPR64, am_indexed64simm7, "stp">; -def STPQi : StorePairOffset<0b10, 1, FPR128, am_indexed128simm7, "stp">; +defm STPW : StorePairOffset<0b00, 0, GPR32, simm7s4, "stp">; +defm STPX : StorePairOffset<0b10, 0, GPR64, simm7s8, "stp">; +defm STPS : StorePairOffset<0b00, 1, FPR32, simm7s4, "stp">; +defm STPD : StorePairOffset<0b01, 1, FPR64, simm7s8, "stp">; +defm STPQ : StorePairOffset<0b10, 1, FPR128, simm7s16, "stp">; // Pair (pre-indexed) -def STPWpre : StorePairPreIdx<0b00, 0, GPR32, am_indexed32simm7_wb, "stp">; -def STPXpre : StorePairPreIdx<0b10, 0, GPR64, am_indexed64simm7_wb, "stp">; -def STPSpre : StorePairPreIdx<0b00, 1, FPR32, am_indexed32simm7_wb, "stp">; -def STPDpre : StorePairPreIdx<0b01, 1, FPR64, am_indexed64simm7_wb, "stp">; -def STPQpre : StorePairPreIdx<0b10, 1, FPR128, am_indexed128simm7_wb, "stp">; +def STPWpre : StorePairPreIdx<0b00, 0, GPR32, simm7s4, "stp">; +def STPXpre : StorePairPreIdx<0b10, 0, GPR64, simm7s8, "stp">; +def STPSpre : StorePairPreIdx<0b00, 1, FPR32, simm7s4, "stp">; +def STPDpre : StorePairPreIdx<0b01, 1, FPR64, simm7s8, "stp">; +def STPQpre : StorePairPreIdx<0b10, 1, FPR128, simm7s16, "stp">; // Pair (pre-indexed) def STPWpost : StorePairPostIdx<0b00, 0, GPR32, simm7s4, "stp">; @@ -1663,248 +1781,294 @@ def STPDpost : StorePairPostIdx<0b01, 1, FPR64, simm7s8, "stp">; def STPQpost : StorePairPostIdx<0b10, 1, FPR128, simm7s16, "stp">; // Pair (no allocate) -def STNPWi : StorePairNoAlloc<0b00, 0, GPR32, am_indexed32simm7, "stnp">; -def STNPXi : StorePairNoAlloc<0b10, 0, GPR64, am_indexed64simm7, "stnp">; -def STNPSi : StorePairNoAlloc<0b00, 1, FPR32, am_indexed32simm7, "stnp">; -def STNPDi : StorePairNoAlloc<0b01, 1, FPR64, am_indexed64simm7, "stnp">; -def STNPQi : StorePairNoAlloc<0b10, 1, FPR128, am_indexed128simm7, "stnp">; +defm STNPW : StorePairNoAlloc<0b00, 0, GPR32, simm7s4, "stnp">; +defm STNPX : StorePairNoAlloc<0b10, 0, GPR64, simm7s8, "stnp">; +defm STNPS : StorePairNoAlloc<0b00, 1, FPR32, simm7s4, "stnp">; +defm STNPD : StorePairNoAlloc<0b01, 1, FPR64, simm7s8, "stnp">; +defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128, simm7s16, "stnp">; //--- // (Register offset) -let AddedComplexity = 10 in { - // Integer -def STRHHro : Store16RO<0b01, 0, 0b00, GPR32, "strh", - [(truncstorei16 GPR32:$Rt, ro_indexed16:$addr)]>; -def STRBBro : Store8RO<0b00, 0, 0b00, GPR32, "strb", - [(truncstorei8 GPR32:$Rt, ro_indexed8:$addr)]>; -def STRWro : Store32RO<0b10, 0, 0b00, GPR32, "str", - [(store GPR32:$Rt, ro_indexed32:$addr)]>; -def STRXro : Store64RO<0b11, 0, 0b00, GPR64, "str", - [(store GPR64:$Rt, ro_indexed64:$addr)]>; - -// truncstore i64 -def : Pat<(truncstorei8 GPR64:$Rt, ro_indexed8:$addr), - (STRBBro (EXTRACT_SUBREG GPR64:$Rt, sub_32), ro_indexed8:$addr)>; -def : Pat<(truncstorei16 GPR64:$Rt, ro_indexed16:$addr), - (STRHHro (EXTRACT_SUBREG GPR64:$Rt, sub_32), ro_indexed16:$addr)>; -def : Pat<(truncstorei32 GPR64:$Rt, ro_indexed32:$addr), - (STRWro (EXTRACT_SUBREG GPR64:$Rt, sub_32), ro_indexed32:$addr)>; +defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>; +defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>; +defm STRW : Store32RO<0b10, 0, 0b00, GPR32, "str", i32, store>; +defm STRX : Store64RO<0b11, 0, 0b00, GPR64, "str", i64, store>; // Floating-point -def STRBro : Store8RO<0b00, 1, 0b00, FPR8, "str", - [(store FPR8:$Rt, ro_indexed8:$addr)]>; -def STRHro : Store16RO<0b01, 1, 0b00, FPR16, "str", - [(store (f16 FPR16:$Rt), ro_indexed16:$addr)]>; -def STRSro : Store32RO<0b10, 1, 0b00, FPR32, "str", - [(store (f32 FPR32:$Rt), ro_indexed32:$addr)]>; -def STRDro : Store64RO<0b11, 1, 0b00, FPR64, "str", - [(store (f64 FPR64:$Rt), ro_indexed64:$addr)]>; -def STRQro : Store128RO<0b00, 1, 0b10, FPR128, "str", []> { - let mayStore = 1; +defm STRB : Store8RO< 0b00, 1, 0b00, FPR8, "str", untyped, store>; +defm STRH : Store16RO<0b01, 1, 0b00, FPR16, "str", f16, store>; +defm STRS : Store32RO<0b10, 1, 0b00, FPR32, "str", f32, store>; +defm STRD : Store64RO<0b11, 1, 0b00, FPR64, "str", f64, store>; +defm STRQ : Store128RO<0b00, 1, 0b10, FPR128, "str", f128, store>; + +multiclass TruncStoreFrom64ROPat { + + def : Pat<(storeop GPR64:$Rt, + (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), + (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32), + GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; + + def : Pat<(storeop GPR64:$Rt, + (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), + (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32), + GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; +} + +let AddedComplexity = 10 in { + // truncstore i64 + defm : TruncStoreFrom64ROPat; + defm : TruncStoreFrom64ROPat; + defm : TruncStoreFrom64ROPat; +} + +multiclass VecROStorePat { + def : Pat<(store (VecTy FPR:$Rt), + (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), + (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; + + def : Pat<(store (VecTy FPR:$Rt), + (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), + (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; } +let AddedComplexity = 10 in { // Match all store 64 bits width whose type is compatible with FPR64 let Predicates = [IsLE] in { // We must use ST1 to store vectors in big-endian. - def : Pat<(store (v2f32 FPR64:$Rn), ro_indexed64:$addr), - (STRDro FPR64:$Rn, ro_indexed64:$addr)>; - def : Pat<(store (v8i8 FPR64:$Rn), ro_indexed64:$addr), - (STRDro FPR64:$Rn, ro_indexed64:$addr)>; - def : Pat<(store (v4i16 FPR64:$Rn), ro_indexed64:$addr), - (STRDro FPR64:$Rn, ro_indexed64:$addr)>; - def : Pat<(store (v2i32 FPR64:$Rn), ro_indexed64:$addr), - (STRDro FPR64:$Rn, ro_indexed64:$addr)>; + defm : VecROStorePat; + defm : VecROStorePat; + defm : VecROStorePat; + defm : VecROStorePat; } -def : Pat<(store (v1f64 FPR64:$Rn), ro_indexed64:$addr), - (STRDro FPR64:$Rn, ro_indexed64:$addr)>; -def : Pat<(store (v1i64 FPR64:$Rn), ro_indexed64:$addr), - (STRDro FPR64:$Rn, ro_indexed64:$addr)>; + +defm : VecROStorePat; +defm : VecROStorePat; // Match all store 128 bits width whose type is compatible with FPR128 let Predicates = [IsLE] in { // We must use ST1 to store vectors in big-endian. - def : Pat<(store (v4f32 FPR128:$Rn), ro_indexed128:$addr), - (STRQro FPR128:$Rn, ro_indexed128:$addr)>; - def : Pat<(store (v2f64 FPR128:$Rn), ro_indexed128:$addr), - (STRQro FPR128:$Rn, ro_indexed128:$addr)>; - def : Pat<(store (v16i8 FPR128:$Rn), ro_indexed128:$addr), - (STRQro FPR128:$Rn, ro_indexed128:$addr)>; - def : Pat<(store (v8i16 FPR128:$Rn), ro_indexed128:$addr), - (STRQro FPR128:$Rn, ro_indexed128:$addr)>; - def : Pat<(store (v4i32 FPR128:$Rn), ro_indexed128:$addr), - (STRQro FPR128:$Rn, ro_indexed128:$addr)>; - def : Pat<(store (v2i64 FPR128:$Rn), ro_indexed128:$addr), - (STRQro FPR128:$Rn, ro_indexed128:$addr)>; + defm : VecROStorePat; + defm : VecROStorePat; + defm : VecROStorePat; + defm : VecROStorePat; + defm : VecROStorePat; + defm : VecROStorePat; } -def : Pat<(store (f128 FPR128:$Rn), ro_indexed128:$addr), - (STRQro FPR128:$Rn, ro_indexed128:$addr)>; +} // AddedComplexity = 10 //--- // (unsigned immediate) -def STRXui : StoreUI<0b11, 0, 0b00, GPR64, am_indexed64, "str", - [(store GPR64:$Rt, am_indexed64:$addr)]>; -def STRWui : StoreUI<0b10, 0, 0b00, GPR32, am_indexed32, "str", - [(store GPR32:$Rt, am_indexed32:$addr)]>; -def STRBui : StoreUI<0b00, 1, 0b00, FPR8, am_indexed8, "str", - [(store FPR8:$Rt, am_indexed8:$addr)]>; -def STRHui : StoreUI<0b01, 1, 0b00, FPR16, am_indexed16, "str", - [(store (f16 FPR16:$Rt), am_indexed16:$addr)]>; -def STRSui : StoreUI<0b10, 1, 0b00, FPR32, am_indexed32, "str", - [(store (f32 FPR32:$Rt), am_indexed32:$addr)]>; -def STRDui : StoreUI<0b11, 1, 0b00, FPR64, am_indexed64, "str", - [(store (f64 FPR64:$Rt), am_indexed64:$addr)]>; -def STRQui : StoreUI<0b00, 1, 0b10, FPR128, am_indexed128, "str", []> { - let mayStore = 1; -} +defm STRX : StoreUI<0b11, 0, 0b00, GPR64, uimm12s8, "str", + [(store GPR64:$Rt, + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; +defm STRW : StoreUI<0b10, 0, 0b00, GPR32, uimm12s4, "str", + [(store GPR32:$Rt, + (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; +defm STRB : StoreUI<0b00, 1, 0b00, FPR8, uimm12s1, "str", + [(store FPR8:$Rt, + (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>; +defm STRH : StoreUI<0b01, 1, 0b00, FPR16, uimm12s2, "str", + [(store (f16 FPR16:$Rt), + (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>; +defm STRS : StoreUI<0b10, 1, 0b00, FPR32, uimm12s4, "str", + [(store (f32 FPR32:$Rt), + (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; +defm STRD : StoreUI<0b11, 1, 0b00, FPR64, uimm12s8, "str", + [(store (f64 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; +defm STRQ : StoreUI<0b00, 1, 0b10, FPR128, uimm12s16, "str", []>; + +defm STRHH : StoreUI<0b01, 0, 0b00, GPR32, uimm12s2, "strh", + [(truncstorei16 GPR32:$Rt, + (am_indexed16 GPR64sp:$Rn, + uimm12s2:$offset))]>; +defm STRBB : StoreUI<0b00, 0, 0b00, GPR32, uimm12s1, "strb", + [(truncstorei8 GPR32:$Rt, + (am_indexed8 GPR64sp:$Rn, + uimm12s1:$offset))]>; // Match all store 64 bits width whose type is compatible with FPR64 +let AddedComplexity = 10 in { let Predicates = [IsLE] in { // We must use ST1 to store vectors in big-endian. - def : Pat<(store (v2f32 FPR64:$Rn), am_indexed64:$addr), - (STRDui FPR64:$Rn, am_indexed64:$addr)>; - def : Pat<(store (v8i8 FPR64:$Rn), am_indexed64:$addr), - (STRDui FPR64:$Rn, am_indexed64:$addr)>; - def : Pat<(store (v4i16 FPR64:$Rn), am_indexed64:$addr), - (STRDui FPR64:$Rn, am_indexed64:$addr)>; - def : Pat<(store (v2i32 FPR64:$Rn), am_indexed64:$addr), - (STRDui FPR64:$Rn, am_indexed64:$addr)>; + def : Pat<(store (v2f32 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(store (v8i8 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(store (v4i16 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(store (v2i32 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; } -def : Pat<(store (v1f64 FPR64:$Rn), am_indexed64:$addr), - (STRDui FPR64:$Rn, am_indexed64:$addr)>; -def : Pat<(store (v1i64 FPR64:$Rn), am_indexed64:$addr), - (STRDui FPR64:$Rn, am_indexed64:$addr)>; +def : Pat<(store (v1f64 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; +def : Pat<(store (v1i64 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; // Match all store 128 bits width whose type is compatible with FPR128 let Predicates = [IsLE] in { // We must use ST1 to store vectors in big-endian. - def : Pat<(store (v4f32 FPR128:$Rn), am_indexed128:$addr), - (STRQui FPR128:$Rn, am_indexed128:$addr)>; - def : Pat<(store (v2f64 FPR128:$Rn), am_indexed128:$addr), - (STRQui FPR128:$Rn, am_indexed128:$addr)>; - def : Pat<(store (v16i8 FPR128:$Rn), am_indexed128:$addr), - (STRQui FPR128:$Rn, am_indexed128:$addr)>; - def : Pat<(store (v8i16 FPR128:$Rn), am_indexed128:$addr), - (STRQui FPR128:$Rn, am_indexed128:$addr)>; - def : Pat<(store (v4i32 FPR128:$Rn), am_indexed128:$addr), - (STRQui FPR128:$Rn, am_indexed128:$addr)>; - def : Pat<(store (v2i64 FPR128:$Rn), am_indexed128:$addr), - (STRQui FPR128:$Rn, am_indexed128:$addr)>; + def : Pat<(store (v4f32 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(store (v2f64 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(store (v16i8 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(store (v8i16 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(store (v4i32 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(store (v2i64 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; } -def : Pat<(store (f128 FPR128:$Rn), am_indexed128:$addr), - (STRQui FPR128:$Rn, am_indexed128:$addr)>; - -def STRHHui : StoreUI<0b01, 0, 0b00, GPR32, am_indexed16, "strh", - [(truncstorei16 GPR32:$Rt, am_indexed16:$addr)]>; -def STRBBui : StoreUI<0b00, 0, 0b00, GPR32, am_indexed8, "strb", - [(truncstorei8 GPR32:$Rt, am_indexed8:$addr)]>; +def : Pat<(store (f128 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; // truncstore i64 -def : Pat<(truncstorei32 GPR64:$Rt, am_indexed32:$addr), - (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_indexed32:$addr)>; -def : Pat<(truncstorei16 GPR64:$Rt, am_indexed16:$addr), - (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_indexed16:$addr)>; -def : Pat<(truncstorei8 GPR64:$Rt, am_indexed8:$addr), - (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_indexed8:$addr)>; +def : Pat<(truncstorei32 GPR64:$Rt, + (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)), + (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>; +def : Pat<(truncstorei16 GPR64:$Rt, + (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), + (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>; +def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)), + (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>; } // AddedComplexity = 10 //--- // (unscaled immediate) -def STURXi : StoreUnscaled<0b11, 0, 0b00, GPR64, am_unscaled64, "stur", - [(store GPR64:$Rt, am_unscaled64:$addr)]>; -def STURWi : StoreUnscaled<0b10, 0, 0b00, GPR32, am_unscaled32, "stur", - [(store GPR32:$Rt, am_unscaled32:$addr)]>; -def STURBi : StoreUnscaled<0b00, 1, 0b00, FPR8, am_unscaled8, "stur", - [(store FPR8:$Rt, am_unscaled8:$addr)]>; -def STURHi : StoreUnscaled<0b01, 1, 0b00, FPR16, am_unscaled16, "stur", - [(store (f16 FPR16:$Rt), am_unscaled16:$addr)]>; -def STURSi : StoreUnscaled<0b10, 1, 0b00, FPR32, am_unscaled32, "stur", - [(store (f32 FPR32:$Rt), am_unscaled32:$addr)]>; -def STURDi : StoreUnscaled<0b11, 1, 0b00, FPR64, am_unscaled64, "stur", - [(store (f64 FPR64:$Rt), am_unscaled64:$addr)]>; -def STURQi : StoreUnscaled<0b00, 1, 0b10, FPR128, am_unscaled128, "stur", - [(store (f128 FPR128:$Rt), am_unscaled128:$addr)]>; -def STURHHi : StoreUnscaled<0b01, 0, 0b00, GPR32, am_unscaled16, "sturh", - [(truncstorei16 GPR32:$Rt, am_unscaled16:$addr)]>; -def STURBBi : StoreUnscaled<0b00, 0, 0b00, GPR32, am_unscaled8, "sturb", - [(truncstorei8 GPR32:$Rt, am_unscaled8:$addr)]>; +defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64, "stur", + [(store GPR64:$Rt, + (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; +defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32, "stur", + [(store GPR32:$Rt, + (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; +defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8, "stur", + [(store FPR8:$Rt, + (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; +defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16, "stur", + [(store (f16 FPR16:$Rt), + (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; +defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32, "stur", + [(store (f32 FPR32:$Rt), + (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; +defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64, "stur", + [(store (f64 FPR64:$Rt), + (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; +defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128, "stur", + [(store (f128 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>; +defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32, "sturh", + [(truncstorei16 GPR32:$Rt, + (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; +defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32, "sturb", + [(truncstorei8 GPR32:$Rt, + (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; // Match all store 64 bits width whose type is compatible with FPR64 let Predicates = [IsLE] in { // We must use ST1 to store vectors in big-endian. - def : Pat<(store (v2f32 FPR64:$Rn), am_unscaled64:$addr), - (STURDi FPR64:$Rn, am_unscaled64:$addr)>; - def : Pat<(store (v8i8 FPR64:$Rn), am_unscaled64:$addr), - (STURDi FPR64:$Rn, am_unscaled64:$addr)>; - def : Pat<(store (v4i16 FPR64:$Rn), am_unscaled64:$addr), - (STURDi FPR64:$Rn, am_unscaled64:$addr)>; - def : Pat<(store (v2i32 FPR64:$Rn), am_unscaled64:$addr), - (STURDi FPR64:$Rn, am_unscaled64:$addr)>; + def : Pat<(store (v2f32 FPR64:$Rt), + (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v8i8 FPR64:$Rt), + (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v4i16 FPR64:$Rt), + (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v2i32 FPR64:$Rt), + (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; } -def : Pat<(store (v1f64 FPR64:$Rn), am_unscaled64:$addr), - (STURDi FPR64:$Rn, am_unscaled64:$addr)>; -def : Pat<(store (v1i64 FPR64:$Rn), am_unscaled64:$addr), - (STURDi FPR64:$Rn, am_unscaled64:$addr)>; +def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; // Match all store 128 bits width whose type is compatible with FPR128 let Predicates = [IsLE] in { // We must use ST1 to store vectors in big-endian. - def : Pat<(store (v4f32 FPR128:$Rn), am_unscaled128:$addr), - (STURQi FPR128:$Rn, am_unscaled128:$addr)>; - def : Pat<(store (v2f64 FPR128:$Rn), am_unscaled128:$addr), - (STURQi FPR128:$Rn, am_unscaled128:$addr)>; - def : Pat<(store (v16i8 FPR128:$Rn), am_unscaled128:$addr), - (STURQi FPR128:$Rn, am_unscaled128:$addr)>; - def : Pat<(store (v8i16 FPR128:$Rn), am_unscaled128:$addr), - (STURQi FPR128:$Rn, am_unscaled128:$addr)>; - def : Pat<(store (v4i32 FPR128:$Rn), am_unscaled128:$addr), - (STURQi FPR128:$Rn, am_unscaled128:$addr)>; - def : Pat<(store (v2i64 FPR128:$Rn), am_unscaled128:$addr), - (STURQi FPR128:$Rn, am_unscaled128:$addr)>; - def : Pat<(store (v2f64 FPR128:$Rn), am_unscaled128:$addr), - (STURQi FPR128:$Rn, am_unscaled128:$addr)>; + def : Pat<(store (v4f32 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v2f64 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v16i8 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v8i16 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v4i32 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v2i64 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v2f64 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; } // unscaled i64 truncating stores -def : Pat<(truncstorei32 GPR64:$Rt, am_unscaled32:$addr), - (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_unscaled32:$addr)>; -def : Pat<(truncstorei16 GPR64:$Rt, am_unscaled16:$addr), - (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_unscaled16:$addr)>; -def : Pat<(truncstorei8 GPR64:$Rt, am_unscaled8:$addr), - (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_unscaled8:$addr)>; +def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)), + (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), + (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), + (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; //--- // STR mnemonics fall back to STUR for negative or unaligned offsets. -def : InstAlias<"str $Rt, $addr", - (STURXi GPR64:$Rt, am_unscaled_fb64:$addr), 0>; -def : InstAlias<"str $Rt, $addr", - (STURWi GPR32:$Rt, am_unscaled_fb32:$addr), 0>; -def : InstAlias<"str $Rt, $addr", - (STURBi FPR8:$Rt, am_unscaled_fb8:$addr), 0>; -def : InstAlias<"str $Rt, $addr", - (STURHi FPR16:$Rt, am_unscaled_fb16:$addr), 0>; -def : InstAlias<"str $Rt, $addr", - (STURSi FPR32:$Rt, am_unscaled_fb32:$addr), 0>; -def : InstAlias<"str $Rt, $addr", - (STURDi FPR64:$Rt, am_unscaled_fb64:$addr), 0>; -def : InstAlias<"str $Rt, $addr", - (STURQi FPR128:$Rt, am_unscaled_fb128:$addr), 0>; - -def : InstAlias<"strb $Rt, $addr", - (STURBBi GPR32:$Rt, am_unscaled_fb8:$addr), 0>; -def : InstAlias<"strh $Rt, $addr", - (STURHHi GPR32:$Rt, am_unscaled_fb16:$addr), 0>; +// FIXME: these don't work now. +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURBi FPR8:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURHi FPR16:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURSi FPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; + +def : InstAlias<"strb $Rt, [$Rn, $offset]", + (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; +def : InstAlias<"strh $Rt, [$Rn, $offset]", + (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; //--- // (unscaled immediate, unprivileged) -def STTRWi : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">; -def STTRXi : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">; +defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">; +defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">; -def STTRHi : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">; -def STTRBi : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">; +defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">; +defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">; //--- // (immediate pre-indexed) @@ -1928,41 +2092,41 @@ defm STRWpre : StorePreIdxPseudo; defm STRHHpre : StorePreIdxPseudo; defm STRBBpre : StorePreIdxPseudo; // truncstore i64 -def : Pat<(pre_truncsti32 GPR64:$Rt, am_noindex:$addr, simm9:$off), - (STRWpre_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr, +def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), + (STRWpre_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, simm9:$off)>; -def : Pat<(pre_truncsti16 GPR64:$Rt, am_noindex:$addr, simm9:$off), - (STRHHpre_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr, +def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), + (STRHHpre_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, simm9:$off)>; -def : Pat<(pre_truncsti8 GPR64:$Rt, am_noindex:$addr, simm9:$off), - (STRBBpre_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr, +def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), + (STRBBpre_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, simm9:$off)>; -def : Pat<(pre_store (v8i8 FPR64:$Rt), am_noindex:$addr, simm9:$off), - (STRDpre_isel FPR64:$Rt, am_noindex:$addr, simm9:$off)>; -def : Pat<(pre_store (v4i16 FPR64:$Rt), am_noindex:$addr, simm9:$off), - (STRDpre_isel FPR64:$Rt, am_noindex:$addr, simm9:$off)>; -def : Pat<(pre_store (v2i32 FPR64:$Rt), am_noindex:$addr, simm9:$off), - (STRDpre_isel FPR64:$Rt, am_noindex:$addr, simm9:$off)>; -def : Pat<(pre_store (v2f32 FPR64:$Rt), am_noindex:$addr, simm9:$off), - (STRDpre_isel FPR64:$Rt, am_noindex:$addr, simm9:$off)>; -def : Pat<(pre_store (v1i64 FPR64:$Rt), am_noindex:$addr, simm9:$off), - (STRDpre_isel FPR64:$Rt, am_noindex:$addr, simm9:$off)>; -def : Pat<(pre_store (v1f64 FPR64:$Rt), am_noindex:$addr, simm9:$off), - (STRDpre_isel FPR64:$Rt, am_noindex:$addr, simm9:$off)>; - -def : Pat<(pre_store (v16i8 FPR128:$Rt), am_noindex:$addr, simm9:$off), - (STRQpre_isel FPR128:$Rt, am_noindex:$addr, simm9:$off)>; -def : Pat<(pre_store (v8i16 FPR128:$Rt), am_noindex:$addr, simm9:$off), - (STRQpre_isel FPR128:$Rt, am_noindex:$addr, simm9:$off)>; -def : Pat<(pre_store (v4i32 FPR128:$Rt), am_noindex:$addr, simm9:$off), - (STRQpre_isel FPR128:$Rt, am_noindex:$addr, simm9:$off)>; -def : Pat<(pre_store (v4f32 FPR128:$Rt), am_noindex:$addr, simm9:$off), - (STRQpre_isel FPR128:$Rt, am_noindex:$addr, simm9:$off)>; -def : Pat<(pre_store (v2i64 FPR128:$Rt), am_noindex:$addr, simm9:$off), - (STRQpre_isel FPR128:$Rt, am_noindex:$addr, simm9:$off)>; -def : Pat<(pre_store (v2f64 FPR128:$Rt), am_noindex:$addr, simm9:$off), - (STRQpre_isel FPR128:$Rt, am_noindex:$addr, simm9:$off)>; +def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre_isel FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre_isel FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre_isel FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre_isel FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre_isel FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre_isel FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + +def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre_isel FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre_isel FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre_isel FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre_isel FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre_isel FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre_isel FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; //--- // (immediate post-indexed) @@ -1986,41 +2150,41 @@ defm STRWpost : StorePostIdxPseudo; defm STRHHpost : StorePostIdxPseudo; defm STRBBpost : StorePostIdxPseudo; // truncstore i64 -def : Pat<(post_truncsti32 GPR64:$Rt, am_noindex:$addr, simm9:$off), - (STRWpost_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr, +def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), + (STRWpost_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_truncsti16 GPR64:$Rt, am_noindex:$addr, simm9:$off), - (STRHHpost_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr, +def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), + (STRHHpost_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_truncsti8 GPR64:$Rt, am_noindex:$addr, simm9:$off), - (STRBBpost_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr, +def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), + (STRBBpost_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v8i8 FPR64:$Rt), am_noindex:$addr, simm9:$off), - (STRDpost_isel FPR64:$Rt, am_noindex:$addr, simm9:$off)>; -def : Pat<(post_store (v4i16 FPR64:$Rt), am_noindex:$addr, simm9:$off), - (STRDpost_isel FPR64:$Rt, am_noindex:$addr, simm9:$off)>; -def : Pat<(post_store (v2i32 FPR64:$Rt), am_noindex:$addr, simm9:$off), - (STRDpost_isel FPR64:$Rt, am_noindex:$addr, simm9:$off)>; -def : Pat<(post_store (v2f32 FPR64:$Rt), am_noindex:$addr, simm9:$off), - (STRDpost_isel FPR64:$Rt, am_noindex:$addr, simm9:$off)>; -def : Pat<(post_store (v1i64 FPR64:$Rt), am_noindex:$addr, simm9:$off), - (STRDpost_isel FPR64:$Rt, am_noindex:$addr, simm9:$off)>; -def : Pat<(post_store (v1f64 FPR64:$Rt), am_noindex:$addr, simm9:$off), - (STRDpost_isel FPR64:$Rt, am_noindex:$addr, simm9:$off)>; - -def : Pat<(post_store (v16i8 FPR128:$Rt), am_noindex:$addr, simm9:$off), - (STRQpost_isel FPR128:$Rt, am_noindex:$addr, simm9:$off)>; -def : Pat<(post_store (v8i16 FPR128:$Rt), am_noindex:$addr, simm9:$off), - (STRQpost_isel FPR128:$Rt, am_noindex:$addr, simm9:$off)>; -def : Pat<(post_store (v4i32 FPR128:$Rt), am_noindex:$addr, simm9:$off), - (STRQpost_isel FPR128:$Rt, am_noindex:$addr, simm9:$off)>; -def : Pat<(post_store (v4f32 FPR128:$Rt), am_noindex:$addr, simm9:$off), - (STRQpost_isel FPR128:$Rt, am_noindex:$addr, simm9:$off)>; -def : Pat<(post_store (v2i64 FPR128:$Rt), am_noindex:$addr, simm9:$off), - (STRQpost_isel FPR128:$Rt, am_noindex:$addr, simm9:$off)>; -def : Pat<(post_store (v2f64 FPR128:$Rt), am_noindex:$addr, simm9:$off), - (STRQpost_isel FPR128:$Rt, am_noindex:$addr, simm9:$off)>; +def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost_isel FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost_isel FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost_isel FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost_isel FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost_isel FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost_isel FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + +def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost_isel FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost_isel FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost_isel FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost_isel FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost_isel FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost_isel FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; //===----------------------------------------------------------------------===// // Load/store exclusive instructions. @@ -2845,25 +3009,46 @@ def : Pat<(v1f64 (int_arm64_neon_frsqrte (v1f64 FPR64:$Rn))), // just load it on the floating point unit. // Here are the patterns for 8 and 16-bits to float. // 8-bits -> float. -def : Pat <(f32 (uint_to_fp (i32 (zextloadi8 ro_indexed8:$addr)))), - (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), - (LDRBro ro_indexed8:$addr), bsub))>; -def : Pat <(f32 (uint_to_fp (i32 (zextloadi8 am_indexed8:$addr)))), +multiclass UIntToFPROLoadPat { + def : Pat<(DstTy (uint_to_fp (SrcTy + (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, + ro.Wext:$extend))))), + (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), + (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), + sub))>; + + def : Pat<(DstTy (uint_to_fp (SrcTy + (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, + ro.Wext:$extend))))), + (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), + (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), + sub))>; +} + +defm : UIntToFPROLoadPat; +def : Pat <(f32 (uint_to_fp (i32 + (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), - (LDRBui am_indexed8:$addr), bsub))>; -def : Pat <(f32 (uint_to_fp (i32 (zextloadi8 am_unscaled8:$addr)))), + (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; +def : Pat <(f32 (uint_to_fp (i32 + (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), - (LDURBi am_unscaled8:$addr), bsub))>; + (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; // 16-bits -> float. -def : Pat <(f32 (uint_to_fp (i32 (zextloadi16 ro_indexed16:$addr)))), - (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), - (LDRHro ro_indexed16:$addr), hsub))>; -def : Pat <(f32 (uint_to_fp (i32 (zextloadi16 am_indexed16:$addr)))), +defm : UIntToFPROLoadPat; +def : Pat <(f32 (uint_to_fp (i32 + (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), - (LDRHui am_indexed16:$addr), hsub))>; -def : Pat <(f32 (uint_to_fp (i32 (zextloadi16 am_unscaled16:$addr)))), + (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; +def : Pat <(f32 (uint_to_fp (i32 + (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), - (LDURHi am_unscaled16:$addr), hsub))>; + (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; // 32-bits are handled in target specific dag combine: // performIntToFpCombine. // 64-bits integer to 32-bits floating point, not possible with @@ -2872,35 +3057,38 @@ def : Pat <(f32 (uint_to_fp (i32 (zextloadi16 am_unscaled16:$addr)))), // Here are the patterns for 8, 16, 32, and 64-bits to double. // 8-bits -> double. -def : Pat <(f64 (uint_to_fp (i32 (zextloadi8 ro_indexed8:$addr)))), +defm : UIntToFPROLoadPat; +def : Pat <(f64 (uint_to_fp (i32 + (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRBro ro_indexed8:$addr), bsub))>; -def : Pat <(f64 (uint_to_fp (i32 (zextloadi8 am_indexed8:$addr)))), + (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; +def : Pat <(f64 (uint_to_fp (i32 + (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRBui am_indexed8:$addr), bsub))>; -def : Pat <(f64 (uint_to_fp (i32 (zextloadi8 am_unscaled8:$addr)))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDURBi am_unscaled8:$addr), bsub))>; + (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; // 16-bits -> double. -def : Pat <(f64 (uint_to_fp (i32 (zextloadi16 ro_indexed16:$addr)))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRHro ro_indexed16:$addr), hsub))>; -def : Pat <(f64 (uint_to_fp (i32 (zextloadi16 am_indexed16:$addr)))), +defm : UIntToFPROLoadPat; +def : Pat <(f64 (uint_to_fp (i32 + (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRHui am_indexed16:$addr), hsub))>; -def : Pat <(f64 (uint_to_fp (i32 (zextloadi16 am_unscaled16:$addr)))), + (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; +def : Pat <(f64 (uint_to_fp (i32 + (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDURHi am_unscaled16:$addr), hsub))>; + (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; // 32-bits -> double. -def : Pat <(f64 (uint_to_fp (i32 (load ro_indexed32:$addr)))), +defm : UIntToFPROLoadPat; +def : Pat <(f64 (uint_to_fp (i32 + (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRSro ro_indexed32:$addr), ssub))>; -def : Pat <(f64 (uint_to_fp (i32 (load am_indexed32:$addr)))), + (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>; +def : Pat <(f64 (uint_to_fp (i32 + (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))), (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRSui am_indexed32:$addr), ssub))>; -def : Pat <(f64 (uint_to_fp (i32 (load am_unscaled32:$addr)))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDURSi am_unscaled32:$addr), ssub))>; + (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>; // 64-bits -> double are handled in target specific dag combine: // performIntToFpCombine. @@ -4226,70 +4414,50 @@ def : InstAlias<"uxtl2 $dst.2d, $src1.4s", // and still being faster. // However, this is not good for code size. // 8-bits -> float. 2 sizes step-up. -def : Pat <(f32 (sint_to_fp (i32 (sextloadi8 ro_indexed8:$addr)))), - (SCVTFv1i32 (f32 (EXTRACT_SUBREG - (SSHLLv4i16_shift - (f64 - (EXTRACT_SUBREG - (SSHLLv8i8_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRBro ro_indexed8:$addr), - bsub), - 0), - dsub)), - 0), - ssub)))>, Requires<[NotForCodeSize]>; -def : Pat <(f32 (sint_to_fp (i32 (sextloadi8 am_indexed8:$addr)))), - (SCVTFv1i32 (f32 (EXTRACT_SUBREG - (SSHLLv4i16_shift - (f64 - (EXTRACT_SUBREG - (SSHLLv8i8_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRBui am_indexed8:$addr), - bsub), - 0), - dsub)), - 0), - ssub)))>, Requires<[NotForCodeSize]>; -def : Pat <(f32 (sint_to_fp (i32 (sextloadi8 am_unscaled8:$addr)))), - (SCVTFv1i32 (f32 (EXTRACT_SUBREG - (SSHLLv4i16_shift - (f64 - (EXTRACT_SUBREG - (SSHLLv8i8_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDURBi am_unscaled8:$addr), - bsub), - 0), - dsub)), +class SExtLoadi8CVTf32Pat + : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))), + (SCVTFv1i32 (f32 (EXTRACT_SUBREG + (SSHLLv4i16_shift + (f64 + (EXTRACT_SUBREG + (SSHLLv8i8_shift + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + INST, + bsub), + 0), + dsub)), 0), - ssub)))>, Requires<[NotForCodeSize]>; + ssub)))>, Requires<[NotForCodeSize]>; + +def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext), + (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>; +def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext), + (LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>; +def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset), + (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>; +def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset), + (LDURBi GPR64sp:$Rn, simm9:$offset)>; + // 16-bits -> float. 1 size step-up. -def : Pat <(f32 (sint_to_fp (i32 (sextloadi16 ro_indexed16:$addr)))), - (SCVTFv1i32 (f32 (EXTRACT_SUBREG - (SSHLLv4i16_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRHro ro_indexed16:$addr), - hsub), - 0), - ssub)))>, Requires<[NotForCodeSize]>; -def : Pat <(f32 (sint_to_fp (i32 (sextloadi16 am_indexed16:$addr)))), - (SCVTFv1i32 (f32 (EXTRACT_SUBREG - (SSHLLv4i16_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRHui am_indexed16:$addr), - hsub), - 0), - ssub)))>, Requires<[NotForCodeSize]>; -def : Pat <(f32 (sint_to_fp (i32 (sextloadi16 am_unscaled16:$addr)))), - (SCVTFv1i32 (f32 (EXTRACT_SUBREG - (SSHLLv4i16_shift +class SExtLoadi16CVTf32Pat + : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))), + (SCVTFv1i32 (f32 (EXTRACT_SUBREG + (SSHLLv4i16_shift (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDURHi am_unscaled16:$addr), - hsub), - 0), - ssub)))>, Requires<[NotForCodeSize]>; + INST, + hsub), + 0), + ssub)))>, Requires<[NotForCodeSize]>; + +def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), + (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; +def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), + (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; +def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), + (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; +def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), + (LDURHi GPR64sp:$Rn, simm9:$offset)>; + // 32-bits to 32-bits are handled in target specific dag combine: // performIntToFpCombine. // 64-bits integer to 32-bits floating point, not possible with @@ -4299,70 +4467,49 @@ def : Pat <(f32 (sint_to_fp (i32 (sextloadi16 am_unscaled16:$addr)))), // Here are the patterns for 8, 16, 32, and 64-bits to double. // 8-bits -> double. 3 size step-up: give up. // 16-bits -> double. 2 size step. -def : Pat <(f64 (sint_to_fp (i32 (sextloadi16 ro_indexed16:$addr)))), +class SExtLoadi16CVTf64Pat + : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))), (SCVTFv1i64 (f64 (EXTRACT_SUBREG (SSHLLv2i32_shift (f64 (EXTRACT_SUBREG (SSHLLv4i16_shift (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRHro ro_indexed16:$addr), - hsub), + INST, + hsub), 0), dsub)), 0), dsub)))>, Requires<[NotForCodeSize]>; -def : Pat <(f64 (sint_to_fp (i32 (sextloadi16 am_indexed16:$addr)))), - (SCVTFv1i64 (f64 (EXTRACT_SUBREG - (SSHLLv2i32_shift - (f64 - (EXTRACT_SUBREG - (SSHLLv4i16_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRHui am_indexed16:$addr), - hsub), - 0), - dsub)), - 0), - dsub)))>, Requires<[NotForCodeSize]>; -def : Pat <(f64 (sint_to_fp (i32 (sextloadi16 am_unscaled16:$addr)))), - (SCVTFv1i64 (f64 (EXTRACT_SUBREG - (SSHLLv2i32_shift - (f64 - (EXTRACT_SUBREG - (SSHLLv4i16_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDURHi am_unscaled16:$addr), - hsub), - 0), - dsub)), - 0), - dsub)))>, Requires<[NotForCodeSize]>; + +def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), + (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; +def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), + (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; +def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), + (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; +def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), + (LDURHi GPR64sp:$Rn, simm9:$offset)>; // 32-bits -> double. 1 size step-up. -def : Pat <(f64 (sint_to_fp (i32 (load ro_indexed32:$addr)))), - (SCVTFv1i64 (f64 (EXTRACT_SUBREG - (SSHLLv2i32_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRSro ro_indexed32:$addr), - ssub), - 0), - dsub)))>, Requires<[NotForCodeSize]>; -def : Pat <(f64 (sint_to_fp (i32 (load am_indexed32:$addr)))), - (SCVTFv1i64 (f64 (EXTRACT_SUBREG - (SSHLLv2i32_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRSui am_indexed32:$addr), - ssub), - 0), - dsub)))>, Requires<[NotForCodeSize]>; -def : Pat <(f64 (sint_to_fp (i32 (load am_unscaled32:$addr)))), +class SExtLoadi32CVTf64Pat + : Pat <(f64 (sint_to_fp (i32 (load addrmode)))), (SCVTFv1i64 (f64 (EXTRACT_SUBREG (SSHLLv2i32_shift (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDURSi am_unscaled32:$addr), - ssub), + INST, + ssub), 0), dsub)))>, Requires<[NotForCodeSize]>; + +def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext), + (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>; +def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext), + (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>; +def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), + (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>; +def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset), + (LDURSi GPR64sp:$Rn, simm9:$offset)>; + // 64-bits -> double are handled in target specific dag combine: // performIntToFpCombine. @@ -4381,7 +4528,7 @@ defm ST3 : SIMDSt3Multiple<"st3">; defm ST4 : SIMDSt4Multiple<"st4">; class Ld1Pat - : Pat<(ty (load am_simdnoindex:$vaddr)), (INST am_simdnoindex:$vaddr)>; + : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>; def : Ld1Pat; def : Ld1Pat; @@ -4393,8 +4540,8 @@ def : Ld1Pat; def : Ld1Pat; class St1Pat - : Pat<(store ty:$Vt, am_simdnoindex:$vaddr), - (INST ty:$Vt, am_simdnoindex:$vaddr)>; + : Pat<(store ty:$Vt, GPR64sp:$Rn), + (INST ty:$Vt, GPR64sp:$Rn)>; def : St1Pat; def : St1Pat; @@ -4432,37 +4579,37 @@ defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>; defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>; } -def : Pat<(v8i8 (ARM64dup (i32 (extloadi8 am_simdnoindex:$vaddr)))), - (LD1Rv8b am_simdnoindex:$vaddr)>; -def : Pat<(v16i8 (ARM64dup (i32 (extloadi8 am_simdnoindex:$vaddr)))), - (LD1Rv16b am_simdnoindex:$vaddr)>; -def : Pat<(v4i16 (ARM64dup (i32 (extloadi16 am_simdnoindex:$vaddr)))), - (LD1Rv4h am_simdnoindex:$vaddr)>; -def : Pat<(v8i16 (ARM64dup (i32 (extloadi16 am_simdnoindex:$vaddr)))), - (LD1Rv8h am_simdnoindex:$vaddr)>; -def : Pat<(v2i32 (ARM64dup (i32 (load am_simdnoindex:$vaddr)))), - (LD1Rv2s am_simdnoindex:$vaddr)>; -def : Pat<(v4i32 (ARM64dup (i32 (load am_simdnoindex:$vaddr)))), - (LD1Rv4s am_simdnoindex:$vaddr)>; -def : Pat<(v2i64 (ARM64dup (i64 (load am_simdnoindex:$vaddr)))), - (LD1Rv2d am_simdnoindex:$vaddr)>; -def : Pat<(v1i64 (ARM64dup (i64 (load am_simdnoindex:$vaddr)))), - (LD1Rv1d am_simdnoindex:$vaddr)>; +def : Pat<(v8i8 (ARM64dup (i32 (extloadi8 GPR64sp:$Rn)))), + (LD1Rv8b GPR64sp:$Rn)>; +def : Pat<(v16i8 (ARM64dup (i32 (extloadi8 GPR64sp:$Rn)))), + (LD1Rv16b GPR64sp:$Rn)>; +def : Pat<(v4i16 (ARM64dup (i32 (extloadi16 GPR64sp:$Rn)))), + (LD1Rv4h GPR64sp:$Rn)>; +def : Pat<(v8i16 (ARM64dup (i32 (extloadi16 GPR64sp:$Rn)))), + (LD1Rv8h GPR64sp:$Rn)>; +def : Pat<(v2i32 (ARM64dup (i32 (load GPR64sp:$Rn)))), + (LD1Rv2s GPR64sp:$Rn)>; +def : Pat<(v4i32 (ARM64dup (i32 (load GPR64sp:$Rn)))), + (LD1Rv4s GPR64sp:$Rn)>; +def : Pat<(v2i64 (ARM64dup (i64 (load GPR64sp:$Rn)))), + (LD1Rv2d GPR64sp:$Rn)>; +def : Pat<(v1i64 (ARM64dup (i64 (load GPR64sp:$Rn)))), + (LD1Rv1d GPR64sp:$Rn)>; // Grab the floating point version too -def : Pat<(v2f32 (ARM64dup (f32 (load am_simdnoindex:$vaddr)))), - (LD1Rv2s am_simdnoindex:$vaddr)>; -def : Pat<(v4f32 (ARM64dup (f32 (load am_simdnoindex:$vaddr)))), - (LD1Rv4s am_simdnoindex:$vaddr)>; -def : Pat<(v2f64 (ARM64dup (f64 (load am_simdnoindex:$vaddr)))), - (LD1Rv2d am_simdnoindex:$vaddr)>; -def : Pat<(v1f64 (ARM64dup (f64 (load am_simdnoindex:$vaddr)))), - (LD1Rv1d am_simdnoindex:$vaddr)>; +def : Pat<(v2f32 (ARM64dup (f32 (load GPR64sp:$Rn)))), + (LD1Rv2s GPR64sp:$Rn)>; +def : Pat<(v4f32 (ARM64dup (f32 (load GPR64sp:$Rn)))), + (LD1Rv4s GPR64sp:$Rn)>; +def : Pat<(v2f64 (ARM64dup (f64 (load GPR64sp:$Rn)))), + (LD1Rv2d GPR64sp:$Rn)>; +def : Pat<(v1f64 (ARM64dup (f64 (load GPR64sp:$Rn)))), + (LD1Rv1d GPR64sp:$Rn)>; class Ld1Lane128Pat : Pat<(vector_insert (VTy VecListOne128:$Rd), - (STy (scalar_load am_simdnoindex:$vaddr)), VecIndex:$idx), - (LD1 VecListOne128:$Rd, VecIndex:$idx, am_simdnoindex:$vaddr)>; + (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), + (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>; def : Ld1Lane128Pat; def : Ld1Lane128Pat; @@ -4474,10 +4621,10 @@ def : Ld1Lane128Pat; class Ld1Lane64Pat : Pat<(vector_insert (VTy VecListOne64:$Rd), - (STy (scalar_load am_simdnoindex:$vaddr)), VecIndex:$idx), + (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), (EXTRACT_SUBREG (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), - VecIndex:$idx, am_simdnoindex:$vaddr), + VecIndex:$idx, GPR64sp:$Rn), dsub)>; def : Ld1Lane64Pat; @@ -4497,13 +4644,13 @@ defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>; defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>; defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>; -let AddedComplexity = 8 in +let AddedComplexity = 15 in class St1Lane128Pat : Pat<(scalar_store (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), - am_simdnoindex:$vaddr), - (ST1 VecListOne128:$Vt, VecIndex:$idx, am_simdnoindex:$vaddr)>; + GPR64sp:$Rn), + (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>; def : St1Lane128Pat; def : St1Lane128Pat; @@ -4512,14 +4659,14 @@ def : St1Lane128Pat; def : St1Lane128Pat; def : St1Lane128Pat; -let AddedComplexity = 8 in +let AddedComplexity = 15 in class St1Lane64Pat : Pat<(scalar_store (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), - am_simdnoindex:$vaddr), + GPR64sp:$Rn), (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), - VecIndex:$idx, am_simdnoindex:$vaddr)>; + VecIndex:$idx, GPR64sp:$Rn)>; def : St1Lane64Pat; def : St1Lane64Pat; @@ -4531,15 +4678,15 @@ multiclass St1LanePost64Pat { def : Pat<(scalar_store (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), - am_simdnoindex:$vaddr, offset), + GPR64sp:$Rn, offset), (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), - VecIndex:$idx, am_simdnoindex:$vaddr, XZR)>; + VecIndex:$idx, GPR64sp:$Rn, XZR)>; def : Pat<(scalar_store (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), - am_simdnoindex:$vaddr, GPR64:$Rm), + GPR64sp:$Rn, GPR64:$Rm), (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), - VecIndex:$idx, am_simdnoindex:$vaddr, $Rm)>; + VecIndex:$idx, GPR64sp:$Rn, $Rm)>; } defm : St1LanePost64Pat; @@ -4555,13 +4702,13 @@ multiclass St1LanePost128Pat { def : Pat<(scalar_store (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), - am_simdnoindex:$vaddr, offset), - (ST1 VecListOne128:$Vt, VecIndex:$idx, am_simdnoindex:$vaddr, XZR)>; + GPR64sp:$Rn, offset), + (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>; def : Pat<(scalar_store (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), - am_simdnoindex:$vaddr, GPR64:$Rm), - (ST1 VecListOne128:$Vt, VecIndex:$idx, am_simdnoindex:$vaddr, $Rm)>; + GPR64sp:$Rn, GPR64:$Rm), + (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>; } defm : St1LanePost128Pat { def GPR32sponly : RegisterClass<"ARM64", [i32], 32, (add WSP)>; def GPR64sponly : RegisterClass<"ARM64", [i64], 64, (add SP)>; +def GPR64spPlus0Operand : AsmOperandClass { + let Name = "GPR64sp0"; + let RenderMethod = "addRegOperands"; + let ParserMethod = "tryParseGPR64sp0Operand"; +} + +def GPR64sp0 : RegisterOperand { + let ParserMatchClass = GPR64spPlus0Operand; +} + // GPR register classes which include WZR/XZR AND SP/WSP. This is not a // constraint used by any instructions, it is used as a common super-class. def GPR32all : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WZR, WSP)>; diff --git a/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp b/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp index cc301f60ede8..982690fe86c0 100644 --- a/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp +++ b/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp @@ -57,7 +57,6 @@ class ARM64AsmParser : public MCTargetAsmParser { int tryParseRegister(); int tryMatchVectorRegister(StringRef &Kind, bool expected); bool parseRegister(OperandVector &Operands); - bool parseMemory(OperandVector &Operands); bool parseSymbolicImmVal(const MCExpr *&ImmVal); bool parseVectorList(OperandVector &Operands); bool parseOperand(OperandVector &Operands, bool isCondCode, @@ -86,7 +85,6 @@ class ARM64AsmParser : public MCTargetAsmParser { /// } OperandMatchResultTy tryParseOptionalShiftExtend(OperandVector &Operands); - OperandMatchResultTy tryParseNoIndexMemory(OperandVector &Operands); OperandMatchResultTy tryParseBarrierOperand(OperandVector &Operands); OperandMatchResultTy tryParseMRSSystemRegister(OperandVector &Operands); OperandMatchResultTy tryParseSysReg(OperandVector &Operands); @@ -96,6 +94,7 @@ class ARM64AsmParser : public MCTargetAsmParser { OperandMatchResultTy tryParseAdrLabel(OperandVector &Operands); OperandMatchResultTy tryParseFPImm(OperandVector &Operands); OperandMatchResultTy tryParseAddSubImm(OperandVector &Operands); + OperandMatchResultTy tryParseGPR64sp0Operand(OperandVector &Operands); bool tryParseVectorRegister(OperandVector &Operands); public: @@ -133,18 +132,11 @@ namespace { /// ARM64Operand - Instances of this class represent a parsed ARM64 machine /// instruction. class ARM64Operand : public MCParsedAsmOperand { -public: - enum MemIdxKindTy { - ImmediateOffset, // pre-indexed, no writeback - RegisterOffset // register offset, with optional extend - }; - private: enum KindTy { k_Immediate, k_ShiftedImm, k_CondCode, - k_Memory, k_Register, k_VectorList, k_VectorIndex, @@ -157,7 +149,7 @@ class ARM64Operand : public MCParsedAsmOperand { k_Barrier } Kind; - SMLoc StartLoc, EndLoc, OffsetLoc; + SMLoc StartLoc, EndLoc; struct TokOp { const char *Data; @@ -221,22 +213,13 @@ class ARM64Operand : public MCParsedAsmOperand { struct ShiftExtendOp { ARM64_AM::ShiftExtendType Type; unsigned Amount; + bool HasExplicitAmount; }; struct ExtendOp { unsigned Val; }; - // This is for all forms of ARM64 address expressions - struct MemOp { - unsigned BaseRegNum, OffsetRegNum; - ARM64_AM::ShiftExtendType ExtType; - unsigned ShiftVal; - bool ExplicitShift; - const MCExpr *OffsetImm; - MemIdxKindTy Mode; - }; - union { struct TokOp Tok; struct RegOp Reg; @@ -251,7 +234,6 @@ class ARM64Operand : public MCParsedAsmOperand { struct SysCRImmOp SysCRImm; struct PrefetchOp Prefetch; struct ShiftExtendOp ShiftExtend; - struct MemOp Mem; }; // Keep the MCContext around as the MCExprs may need manipulated during @@ -303,9 +285,6 @@ class ARM64Operand : public MCParsedAsmOperand { case k_Prefetch: Prefetch = o.Prefetch; break; - case k_Memory: - Mem = o.Mem; - break; case k_ShiftExtend: ShiftExtend = o.ShiftExtend; break; @@ -316,8 +295,6 @@ class ARM64Operand : public MCParsedAsmOperand { SMLoc getStartLoc() const override { return StartLoc; } /// getEndLoc - Get the location of the last token of this operand. SMLoc getEndLoc() const override { return EndLoc; } - /// getOffsetLoc - Get the location of the offset of this memory operand. - SMLoc getOffsetLoc() const { return OffsetLoc; } StringRef getToken() const { assert(Kind == k_Token && "Invalid access!"); @@ -409,7 +386,13 @@ class ARM64Operand : public MCParsedAsmOperand { return ShiftExtend.Amount; } + bool hasShiftExtendAmount() const { + assert(Kind == k_ShiftExtend && "Invalid access!"); + return ShiftExtend.HasExplicitAmount; + } + bool isImm() const override { return Kind == k_Immediate; } + bool isMem() const override { return false; } bool isSImm9() const { if (!isImm()) return false; @@ -446,6 +429,52 @@ class ARM64Operand : public MCParsedAsmOperand { int64_t Val = MCE->getValue(); return (Val >= -1024 && Val <= 1008 && (Val & 15) == 0); } + + bool isSymbolicUImm12Offset(const MCExpr *Expr, unsigned Scale) const { + ARM64MCExpr::VariantKind ELFRefKind; + MCSymbolRefExpr::VariantKind DarwinRefKind; + int64_t Addend; + if (!ARM64AsmParser::classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, + Addend)) { + // If we don't understand the expression, assume the best and + // let the fixup and relocation code deal with it. + return true; + } + + if (DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF || + ELFRefKind == ARM64MCExpr::VK_LO12 || + ELFRefKind == ARM64MCExpr::VK_GOT_LO12 || + ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12 || + ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12_NC || + ELFRefKind == ARM64MCExpr::VK_TPREL_LO12 || + ELFRefKind == ARM64MCExpr::VK_TPREL_LO12_NC || + ELFRefKind == ARM64MCExpr::VK_GOTTPREL_LO12_NC || + ELFRefKind == ARM64MCExpr::VK_TLSDESC_LO12) { + // Note that we don't range-check the addend. It's adjusted modulo page + // size when converted, so there is no "out of range" condition when using + // @pageoff. + return Addend >= 0 && (Addend % Scale) == 0; + } else if (DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGEOFF || + DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF) { + // @gotpageoff/@tlvppageoff can only be used directly, not with an addend. + return Addend == 0; + } + + return false; + } + + template bool isUImm12Offset() const { + if (!isImm()) + return false; + + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return isSymbolicUImm12Offset(getImm(), Scale); + + int64_t Val = MCE->getValue(); + return (Val % Scale) == 0 && Val >= 0 && (Val / Scale) < 0x1000; + } + bool isImm0_7() const { if (!isImm()) return false; @@ -826,6 +855,11 @@ class ARM64Operand : public MCParsedAsmOperand { ARM64MCRegisterClasses[ARM64::GPR64RegClassID].contains(Reg.RegNum); } + bool isGPR64sp0() const { + return Kind == k_Register && !Reg.isVector && + ARM64MCRegisterClasses[ARM64::GPR64spRegClassID].contains(Reg.RegNum); + } + /// Is this a vector list with the type implicit (presumably attached to the /// instruction itself)? template bool isImplicitlyTypedVectorList() const { @@ -863,7 +897,6 @@ class ARM64Operand : public MCParsedAsmOperand { bool isTokenEqual(StringRef Str) const { return Kind == k_Token && getToken() == Str; } - bool isMem() const override { return Kind == k_Memory; } bool isSysCR() const { return Kind == k_SysCR; } bool isPrefetch() const { return Kind == k_Prefetch; } bool isShiftExtend() const { return Kind == k_ShiftExtend; } @@ -903,6 +936,24 @@ class ARM64Operand : public MCParsedAsmOperand { getShiftExtendAmount() <= 4; } + template bool isMemXExtend() const { + if (!isExtend()) + return false; + ARM64_AM::ShiftExtendType ET = getShiftExtendType(); + return (ET == ARM64_AM::LSL || ET == ARM64_AM::SXTX) && + (getShiftExtendAmount() == Log2_32(Width / 8) || + getShiftExtendAmount() == 0); + } + + template bool isMemWExtend() const { + if (!isExtend()) + return false; + ARM64_AM::ShiftExtendType ET = getShiftExtendType(); + return (ET == ARM64_AM::UXTW || ET == ARM64_AM::SXTW) && + (getShiftExtendAmount() == Log2_32(Width / 8) || + getShiftExtendAmount() == 0); + } + template bool isArithmeticShifter() const { if (!isShifter()) @@ -978,180 +1029,14 @@ class ARM64Operand : public MCParsedAsmOperand { return getShiftExtendType() == ARM64_AM::MSL && (Shift == 8 || Shift == 16); } - bool isMemoryRegisterOffset8() const { - return isMem() && Mem.Mode == RegisterOffset && Mem.ShiftVal == 0; - } - - bool isMemoryRegisterOffset16() const { - return isMem() && Mem.Mode == RegisterOffset && - (Mem.ShiftVal == 0 || Mem.ShiftVal == 1); - } - - bool isMemoryRegisterOffset32() const { - return isMem() && Mem.Mode == RegisterOffset && - (Mem.ShiftVal == 0 || Mem.ShiftVal == 2); - } - - bool isMemoryRegisterOffset64() const { - return isMem() && Mem.Mode == RegisterOffset && - (Mem.ShiftVal == 0 || Mem.ShiftVal == 3); - } - - bool isMemoryRegisterOffset128() const { - return isMem() && Mem.Mode == RegisterOffset && - (Mem.ShiftVal == 0 || Mem.ShiftVal == 4); - } - - bool isMemoryUnscaled() const { - if (!isMem()) - return false; - if (Mem.Mode != ImmediateOffset) - return false; - if (!Mem.OffsetImm) - return true; - // Make sure the immediate value is valid. - const MCConstantExpr *CE = dyn_cast(Mem.OffsetImm); - if (!CE) - return false; - // The offset must fit in a signed 9-bit unscaled immediate. - int64_t Value = CE->getValue(); - return (Value >= -256 && Value < 256); - } // Fallback unscaled operands are for aliases of LDR/STR that fall back // to LDUR/STUR when the offset is not legal for the former but is for // the latter. As such, in addition to checking for being a legal unscaled // address, also check that it is not a legal scaled address. This avoids // ambiguity in the matcher. - bool isMemoryUnscaledFB8() const { - return isMemoryUnscaled() && !isMemoryIndexed8(); - } - bool isMemoryUnscaledFB16() const { - return isMemoryUnscaled() && !isMemoryIndexed16(); - } - bool isMemoryUnscaledFB32() const { - return isMemoryUnscaled() && !isMemoryIndexed32(); - } - bool isMemoryUnscaledFB64() const { - return isMemoryUnscaled() && !isMemoryIndexed64(); - } - bool isMemoryUnscaledFB128() const { - return isMemoryUnscaled() && !isMemoryIndexed128(); - } - bool isMemoryIndexed(unsigned Scale) const { - if (!isMem()) - return false; - if (Mem.Mode != ImmediateOffset) - return false; - if (!Mem.OffsetImm) - return true; - // Make sure the immediate value is valid. - const MCConstantExpr *CE = dyn_cast(Mem.OffsetImm); - - if (CE) { - // The offset must be a positive multiple of the scale and in range of - // encoding with a 12-bit immediate. - int64_t Value = CE->getValue(); - return (Value >= 0 && (Value % Scale) == 0 && Value <= (4095 * Scale)); - } - - // If it's not a constant, check for some expressions we know. - const MCExpr *Expr = Mem.OffsetImm; - ARM64MCExpr::VariantKind ELFRefKind; - MCSymbolRefExpr::VariantKind DarwinRefKind; - int64_t Addend; - if (!ARM64AsmParser::classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, - Addend)) { - // If we don't understand the expression, assume the best and - // let the fixup and relocation code deal with it. - return true; - } - - if (DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF || - ELFRefKind == ARM64MCExpr::VK_LO12 || - ELFRefKind == ARM64MCExpr::VK_GOT_LO12 || - ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12 || - ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12_NC || - ELFRefKind == ARM64MCExpr::VK_TPREL_LO12 || - ELFRefKind == ARM64MCExpr::VK_TPREL_LO12_NC || - ELFRefKind == ARM64MCExpr::VK_GOTTPREL_LO12_NC || - ELFRefKind == ARM64MCExpr::VK_TLSDESC_LO12) { - // Note that we don't range-check the addend. It's adjusted modulo page - // size when converted, so there is no "out of range" condition when using - // @pageoff. - return Addend >= 0 && (Addend % Scale) == 0; - } else if (DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGEOFF || - DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF) { - // @gotpageoff/@tlvppageoff can only be used directly, not with an addend. - return Addend == 0; - } - - return false; - } - bool isMemoryIndexed128() const { return isMemoryIndexed(16); } - bool isMemoryIndexed64() const { return isMemoryIndexed(8); } - bool isMemoryIndexed32() const { return isMemoryIndexed(4); } - bool isMemoryIndexed16() const { return isMemoryIndexed(2); } - bool isMemoryIndexed8() const { return isMemoryIndexed(1); } - bool isMemoryNoIndex() const { - if (!isMem()) - return false; - if (Mem.Mode != ImmediateOffset) - return false; - if (!Mem.OffsetImm) - return true; - - // Make sure the immediate value is valid. Only zero is allowed. - const MCConstantExpr *CE = dyn_cast(Mem.OffsetImm); - if (!CE || CE->getValue() != 0) - return false; - return true; - } - bool isMemorySIMDNoIndex() const { - if (!isMem()) - return false; - if (Mem.Mode != ImmediateOffset) - return false; - return Mem.OffsetImm == nullptr; - } - bool isMemoryIndexedSImm9() const { - if (!isMem() || Mem.Mode != ImmediateOffset) - return false; - if (!Mem.OffsetImm) - return true; - const MCConstantExpr *CE = dyn_cast(Mem.OffsetImm); - assert(CE && "Non-constant pre-indexed offset!"); - int64_t Value = CE->getValue(); - return Value >= -256 && Value <= 255; - } - bool isMemoryIndexed32SImm7() const { - if (!isMem() || Mem.Mode != ImmediateOffset) - return false; - if (!Mem.OffsetImm) - return true; - const MCConstantExpr *CE = dyn_cast(Mem.OffsetImm); - assert(CE && "Non-constant pre-indexed offset!"); - int64_t Value = CE->getValue(); - return ((Value % 4) == 0) && Value >= -256 && Value <= 252; - } - bool isMemoryIndexed64SImm7() const { - if (!isMem() || Mem.Mode != ImmediateOffset) - return false; - if (!Mem.OffsetImm) - return true; - const MCConstantExpr *CE = dyn_cast(Mem.OffsetImm); - assert(CE && "Non-constant pre-indexed offset!"); - int64_t Value = CE->getValue(); - return ((Value % 8) == 0) && Value >= -512 && Value <= 504; - } - bool isMemoryIndexed128SImm7() const { - if (!isMem() || Mem.Mode != ImmediateOffset) - return false; - if (!Mem.OffsetImm) - return true; - const MCConstantExpr *CE = dyn_cast(Mem.OffsetImm); - assert(CE && "Non-constant pre-indexed offset!"); - int64_t Value = CE->getValue(); - return ((Value % 16) == 0) && Value >= -1024 && Value <= 1008; + template + bool isSImm9OffsetFB() const { + return isSImm9() && !isUImm12Offset(); } bool isAdrpLabel() const { @@ -1313,6 +1198,18 @@ class ARM64Operand : public MCParsedAsmOperand { addImmOperands(Inst, N); } + template + void addUImm12OffsetOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + + if (!MCE) { + Inst.addOperand(MCOperand::CreateExpr(getImm())); + return; + } + Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / Scale)); + } + void addSImm9Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = dyn_cast(getImm()); @@ -1577,6 +1474,26 @@ class ARM64Operand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::CreateImm(Imm)); } + void addMemExtendOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + ARM64_AM::ShiftExtendType ET = getShiftExtendType(); + bool IsSigned = ET == ARM64_AM::SXTW || ET == ARM64_AM::SXTX; + Inst.addOperand(MCOperand::CreateImm(IsSigned)); + Inst.addOperand(MCOperand::CreateImm(getShiftExtendAmount() != 0)); + } + + // For 8-bit load/store instructions with a register offset, both the + // "DoShift" and "NoShift" variants have a shift of 0. Because of this, + // they're disambiguated by whether the shift was explicit or implicit rather + // than its size. + void addMemExtend8Operands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + ARM64_AM::ShiftExtendType ET = getShiftExtendType(); + bool IsSigned = ET == ARM64_AM::SXTW || ET == ARM64_AM::SXTX; + Inst.addOperand(MCOperand::CreateImm(IsSigned)); + Inst.addOperand(MCOperand::CreateImm(hasShiftExtendAmount())); + } + template void addMOVZMovAliasOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); @@ -1595,168 +1512,6 @@ class ARM64Operand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::CreateImm((~Value >> Shift) & 0xffff)); } - void addMemoryRegisterOffsetOperands(MCInst &Inst, unsigned N, bool DoShift) { - assert(N == 3 && "Invalid number of operands!"); - - Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); - Inst.addOperand(MCOperand::CreateReg(getXRegFromWReg(Mem.OffsetRegNum))); - unsigned ExtendImm = ARM64_AM::getMemExtendImm(Mem.ExtType, DoShift); - Inst.addOperand(MCOperand::CreateImm(ExtendImm)); - } - - void addMemoryRegisterOffset8Operands(MCInst &Inst, unsigned N) { - addMemoryRegisterOffsetOperands(Inst, N, Mem.ExplicitShift); - } - - void addMemoryRegisterOffset16Operands(MCInst &Inst, unsigned N) { - addMemoryRegisterOffsetOperands(Inst, N, Mem.ShiftVal == 1); - } - - void addMemoryRegisterOffset32Operands(MCInst &Inst, unsigned N) { - addMemoryRegisterOffsetOperands(Inst, N, Mem.ShiftVal == 2); - } - - void addMemoryRegisterOffset64Operands(MCInst &Inst, unsigned N) { - addMemoryRegisterOffsetOperands(Inst, N, Mem.ShiftVal == 3); - } - - void addMemoryRegisterOffset128Operands(MCInst &Inst, unsigned N) { - addMemoryRegisterOffsetOperands(Inst, N, Mem.ShiftVal == 4); - } - - void addMemoryIndexedOperands(MCInst &Inst, unsigned N, - unsigned Scale) const { - // Add the base register operand. - Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); - - if (!Mem.OffsetImm) { - // There isn't an offset. - Inst.addOperand(MCOperand::CreateImm(0)); - return; - } - - // Add the offset operand. - if (const MCConstantExpr *CE = dyn_cast(Mem.OffsetImm)) { - assert(CE->getValue() % Scale == 0 && - "Offset operand must be multiple of the scale!"); - - // The MCInst offset operand doesn't include the low bits (like the - // instruction encoding). - Inst.addOperand(MCOperand::CreateImm(CE->getValue() / Scale)); - } - - // If this is a pageoff symrefexpr with an addend, the linker will - // do the scaling of the addend. - // - // Otherwise we don't know what this is, so just add the scaling divide to - // the expression and let the MC fixup evaluation code deal with it. - const MCExpr *Expr = Mem.OffsetImm; - ARM64MCExpr::VariantKind ELFRefKind; - MCSymbolRefExpr::VariantKind DarwinRefKind; - int64_t Addend; - if (Scale > 1 && - (!ARM64AsmParser::classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, - Addend) || - (Addend != 0 && DarwinRefKind != MCSymbolRefExpr::VK_PAGEOFF))) { - Expr = MCBinaryExpr::CreateDiv(Expr, MCConstantExpr::Create(Scale, Ctx), - Ctx); - } - - Inst.addOperand(MCOperand::CreateExpr(Expr)); - } - - void addMemoryUnscaledOperands(MCInst &Inst, unsigned N) const { - assert(N == 2 && isMemoryUnscaled() && "Invalid number of operands!"); - // Add the base register operand. - Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); - - // Add the offset operand. - if (!Mem.OffsetImm) - Inst.addOperand(MCOperand::CreateImm(0)); - else { - // Only constant offsets supported. - const MCConstantExpr *CE = cast(Mem.OffsetImm); - Inst.addOperand(MCOperand::CreateImm(CE->getValue())); - } - } - - void addMemoryIndexed128Operands(MCInst &Inst, unsigned N) const { - assert(N == 2 && isMemoryIndexed128() && "Invalid number of operands!"); - addMemoryIndexedOperands(Inst, N, 16); - } - - void addMemoryIndexed64Operands(MCInst &Inst, unsigned N) const { - assert(N == 2 && isMemoryIndexed64() && "Invalid number of operands!"); - addMemoryIndexedOperands(Inst, N, 8); - } - - void addMemoryIndexed32Operands(MCInst &Inst, unsigned N) const { - assert(N == 2 && isMemoryIndexed32() && "Invalid number of operands!"); - addMemoryIndexedOperands(Inst, N, 4); - } - - void addMemoryIndexed16Operands(MCInst &Inst, unsigned N) const { - assert(N == 2 && isMemoryIndexed16() && "Invalid number of operands!"); - addMemoryIndexedOperands(Inst, N, 2); - } - - void addMemoryIndexed8Operands(MCInst &Inst, unsigned N) const { - assert(N == 2 && isMemoryIndexed8() && "Invalid number of operands!"); - addMemoryIndexedOperands(Inst, N, 1); - } - - void addMemoryNoIndexOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && isMemoryNoIndex() && "Invalid number of operands!"); - // Add the base register operand (the offset is always zero, so ignore it). - Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); - } - - void addMemorySIMDNoIndexOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && isMemorySIMDNoIndex() && "Invalid number of operands!"); - // Add the base register operand (the offset is always zero, so ignore it). - Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); - } - - void addMemoryWritebackIndexedOperands(MCInst &Inst, unsigned N, - unsigned Scale) const { - assert(N == 2 && "Invalid number of operands!"); - - // Add the base register operand. - Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); - - // Add the offset operand. - int64_t Offset = 0; - if (Mem.OffsetImm) { - const MCConstantExpr *CE = dyn_cast(Mem.OffsetImm); - assert(CE && "Non-constant indexed offset operand!"); - Offset = CE->getValue(); - } - - if (Scale != 1) { - assert(Offset % Scale == 0 && - "Offset operand must be a multiple of the scale!"); - Offset /= Scale; - } - - Inst.addOperand(MCOperand::CreateImm(Offset)); - } - - void addMemoryIndexedSImm9Operands(MCInst &Inst, unsigned N) const { - addMemoryWritebackIndexedOperands(Inst, N, 1); - } - - void addMemoryIndexed32SImm7Operands(MCInst &Inst, unsigned N) const { - addMemoryWritebackIndexedOperands(Inst, N, 4); - } - - void addMemoryIndexed64SImm7Operands(MCInst &Inst, unsigned N) const { - addMemoryWritebackIndexedOperands(Inst, N, 8); - } - - void addMemoryIndexed128SImm7Operands(MCInst &Inst, unsigned N) const { - addMemoryWritebackIndexedOperands(Inst, N, 16); - } - void print(raw_ostream &OS) const override; static ARM64Operand *CreateToken(StringRef Str, bool IsSuffix, SMLoc S, @@ -1857,40 +1612,6 @@ class ARM64Operand : public MCParsedAsmOperand { return Op; } - static ARM64Operand *CreateMem(unsigned BaseRegNum, const MCExpr *Off, - SMLoc S, SMLoc E, SMLoc OffsetLoc, - MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_Memory, Ctx); - Op->Mem.BaseRegNum = BaseRegNum; - Op->Mem.OffsetRegNum = 0; - Op->Mem.OffsetImm = Off; - Op->Mem.ExtType = ARM64_AM::UXTX; - Op->Mem.ShiftVal = 0; - Op->Mem.ExplicitShift = false; - Op->Mem.Mode = ImmediateOffset; - Op->OffsetLoc = OffsetLoc; - Op->StartLoc = S; - Op->EndLoc = E; - return Op; - } - - static ARM64Operand *CreateRegOffsetMem(unsigned BaseReg, unsigned OffsetReg, - ARM64_AM::ShiftExtendType ExtType, - unsigned ShiftVal, bool ExplicitShift, - SMLoc S, SMLoc E, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_Memory, Ctx); - Op->Mem.BaseRegNum = BaseReg; - Op->Mem.OffsetRegNum = OffsetReg; - Op->Mem.OffsetImm = nullptr; - Op->Mem.ExtType = ExtType; - Op->Mem.ShiftVal = ShiftVal; - Op->Mem.ExplicitShift = ExplicitShift; - Op->Mem.Mode = RegisterOffset; - Op->StartLoc = S; - Op->EndLoc = E; - return Op; - } - static ARM64Operand *CreateSysCR(unsigned Val, SMLoc S, SMLoc E, MCContext &Ctx) { ARM64Operand *Op = new ARM64Operand(k_SysCR, Ctx); @@ -1908,11 +1629,13 @@ class ARM64Operand : public MCParsedAsmOperand { return Op; } - static ARM64Operand *CreateShiftExtend(ARM64_AM::ShiftExtendType ShOp, unsigned Val, + static ARM64Operand *CreateShiftExtend(ARM64_AM::ShiftExtendType ShOp, + unsigned Val, bool HasExplicitAmount, SMLoc S, SMLoc E, MCContext &Ctx) { ARM64Operand *Op = new ARM64Operand(k_ShiftExtend, Ctx); Op->ShiftExtend.Type = ShOp; Op->ShiftExtend.Amount = Val; + Op->ShiftExtend.HasExplicitAmount = HasExplicitAmount; Op->StartLoc = S; Op->EndLoc = E; return Op; @@ -1949,9 +1672,6 @@ void ARM64Operand::print(raw_ostream &OS) const { case k_CondCode: OS << ""; break; - case k_Memory: - OS << ""; - break; case k_Register: OS << ""; break; @@ -1986,7 +1706,10 @@ void ARM64Operand::print(raw_ostream &OS) const { } case k_ShiftExtend: { OS << "<" << ARM64_AM::getShiftExtendName(getShiftExtendType()) << " #" - << getShiftExtendAmount() << ">"; + << getShiftExtendAmount(); + if (!hasShiftExtendAmount()) + OS << ""; + OS << '>'; break; } } @@ -2498,7 +2221,7 @@ ARM64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) { // "extend" type operatoins don't need an immediate, #0 is implicit. SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); Operands.push_back( - ARM64Operand::CreateShiftExtend(ShOp, 0, S, E, getContext())); + ARM64Operand::CreateShiftExtend(ShOp, 0, false, S, E, getContext())); return MatchOperand_Success; } @@ -2523,8 +2246,8 @@ ARM64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) { } SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); - Operands.push_back(ARM64Operand::CreateShiftExtend(ShOp, MCE->getValue(), S, - E, getContext())); + Operands.push_back(ARM64Operand::CreateShiftExtend(ShOp, MCE->getValue(), + true, S, E, getContext())); return MatchOperand_Success; } @@ -2931,213 +2654,6 @@ bool ARM64AsmParser::parseRegister(OperandVector &Operands) { return false; } -/// tryParseNoIndexMemory - Custom parser method for memory operands that -/// do not allow base regisrer writeback modes, -/// or those that handle writeback separately from -/// the memory operand (like the AdvSIMD ldX/stX -/// instructions. -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseNoIndexMemory(OperandVector &Operands) { - if (Parser.getTok().isNot(AsmToken::LBrac)) - return MatchOperand_NoMatch; - SMLoc S = getLoc(); - Parser.Lex(); // Eat left bracket token. - - const AsmToken &BaseRegTok = Parser.getTok(); - if (BaseRegTok.isNot(AsmToken::Identifier)) { - Error(BaseRegTok.getLoc(), "register expected"); - return MatchOperand_ParseFail; - } - - int64_t Reg = tryParseRegister(); - if (Reg == -1) { - Error(BaseRegTok.getLoc(), "register expected"); - return MatchOperand_ParseFail; - } - - SMLoc E = getLoc(); - if (Parser.getTok().isNot(AsmToken::RBrac)) { - Error(E, "']' expected"); - return MatchOperand_ParseFail; - } - - Parser.Lex(); // Eat right bracket token. - - Operands.push_back(ARM64Operand::CreateMem(Reg, nullptr, S, E, E, getContext())); - return MatchOperand_Success; -} - -/// parseMemory - Parse a memory operand for a basic load/store instruction. -bool ARM64AsmParser::parseMemory(OperandVector &Operands) { - assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a Left Bracket"); - SMLoc S = getLoc(); - Parser.Lex(); // Eat left bracket token. - - const AsmToken &BaseRegTok = Parser.getTok(); - SMLoc BaseRegLoc = BaseRegTok.getLoc(); - if (BaseRegTok.isNot(AsmToken::Identifier)) - return Error(BaseRegLoc, "register expected"); - - int64_t Reg = tryParseRegister(); - if (Reg == -1) - return Error(BaseRegLoc, "register expected"); - - if (!ARM64MCRegisterClasses[ARM64::GPR64spRegClassID].contains(Reg)) - return Error(BaseRegLoc, "invalid operand for instruction"); - - // If there is an offset expression, parse it. - const MCExpr *OffsetExpr = nullptr; - SMLoc OffsetLoc; - if (Parser.getTok().is(AsmToken::Comma)) { - Parser.Lex(); // Eat the comma. - OffsetLoc = getLoc(); - - // Register offset - const AsmToken &OffsetRegTok = Parser.getTok(); - int Reg2 = OffsetRegTok.is(AsmToken::Identifier) ? tryParseRegister() : -1; - if (Reg2 != -1) { - // Default shift is LSL, with an omitted shift. We use the third bit of - // the extend value to indicate presence/omission of the immediate offset. - ARM64_AM::ShiftExtendType ExtOp = ARM64_AM::UXTX; - int64_t ShiftVal = 0; - bool ExplicitShift = false; - - if (Parser.getTok().is(AsmToken::Comma)) { - // Embedded extend operand. - Parser.Lex(); // Eat the comma - - SMLoc ExtLoc = getLoc(); - const AsmToken &Tok = Parser.getTok(); - ExtOp = StringSwitch(Tok.getString().lower()) - .Case("uxtw", ARM64_AM::UXTW) - .Case("lsl", ARM64_AM::UXTX) // Alias for UXTX - .Case("sxtw", ARM64_AM::SXTW) - .Case("sxtx", ARM64_AM::SXTX) - .Default(ARM64_AM::InvalidShiftExtend); - if (ExtOp == ARM64_AM::InvalidShiftExtend) - return Error(ExtLoc, "expected valid extend operation"); - - Parser.Lex(); // Eat the extend op. - - // A 32-bit offset register is only valid for [SU]/XTW extend - // operators. - if (ARM64MCRegisterClasses[ARM64::GPR32allRegClassID].contains(Reg2)) { - if (ExtOp != ARM64_AM::UXTW && - ExtOp != ARM64_AM::SXTW) - return Error(ExtLoc, "32-bit general purpose offset register " - "requires sxtw or uxtw extend"); - } else if (!ARM64MCRegisterClasses[ARM64::GPR64allRegClassID].contains( - Reg2)) - return Error(OffsetLoc, - "64-bit general purpose offset register expected"); - - bool Hash = getLexer().is(AsmToken::Hash); - if (getLexer().is(AsmToken::RBrac)) { - // No immediate operand. - if (ExtOp == ARM64_AM::UXTX) - return Error(ExtLoc, "LSL extend requires immediate operand"); - } else if (Hash || getLexer().is(AsmToken::Integer)) { - // Immediate operand. - if (Hash) - Parser.Lex(); // Eat the '#' - const MCExpr *ImmVal; - SMLoc ExprLoc = getLoc(); - if (getParser().parseExpression(ImmVal)) - return true; - const MCConstantExpr *MCE = dyn_cast(ImmVal); - if (!MCE) - return TokError("immediate value expected for extend operand"); - - ExplicitShift = true; - ShiftVal = MCE->getValue(); - if (ShiftVal < 0 || ShiftVal > 4) - return Error(ExprLoc, "immediate operand out of range"); - } else - return Error(getLoc(), "expected immediate operand"); - } - - if (Parser.getTok().isNot(AsmToken::RBrac)) - return Error(getLoc(), "']' expected"); - - Parser.Lex(); // Eat right bracket token. - - SMLoc E = getLoc(); - Operands.push_back(ARM64Operand::CreateRegOffsetMem( - Reg, Reg2, ExtOp, ShiftVal, ExplicitShift, S, E, getContext())); - return false; - - // Immediate expressions. - } else if (Parser.getTok().is(AsmToken::Hash) || - Parser.getTok().is(AsmToken::Colon) || - Parser.getTok().is(AsmToken::Integer)) { - if (Parser.getTok().is(AsmToken::Hash)) - Parser.Lex(); // Eat hash token. - - if (parseSymbolicImmVal(OffsetExpr)) - return true; - } else { - // FIXME: We really should make sure that we're dealing with a LDR/STR - // instruction that can legally have a symbolic expression here. - // Symbol reference. - if (Parser.getTok().isNot(AsmToken::Identifier) && - Parser.getTok().isNot(AsmToken::String)) - return Error(getLoc(), "identifier or immediate expression expected"); - if (getParser().parseExpression(OffsetExpr)) - return true; - // If this is a plain ref, Make sure a legal variant kind was specified. - // Otherwise, it's a more complicated expression and we have to just - // assume it's OK and let the relocation stuff puke if it's not. - ARM64MCExpr::VariantKind ELFRefKind; - MCSymbolRefExpr::VariantKind DarwinRefKind; - int64_t Addend; - if (classifySymbolRef(OffsetExpr, ELFRefKind, DarwinRefKind, Addend) && - Addend == 0) { - assert(ELFRefKind == ARM64MCExpr::VK_INVALID && - "ELF symbol modifiers not supported here yet"); - - switch (DarwinRefKind) { - default: - return Error(getLoc(), "expected @pageoff or @gotpageoff modifier"); - case MCSymbolRefExpr::VK_GOTPAGEOFF: - case MCSymbolRefExpr::VK_PAGEOFF: - case MCSymbolRefExpr::VK_TLVPPAGEOFF: - // These are what we're expecting. - break; - } - } - } - } - - SMLoc E = getLoc(); - if (Parser.getTok().isNot(AsmToken::RBrac)) - return Error(E, "']' expected"); - - Parser.Lex(); // Eat right bracket token. - - // Create the memory operand. - Operands.push_back( - ARM64Operand::CreateMem(Reg, OffsetExpr, S, E, OffsetLoc, getContext())); - - // Check for a '!', indicating pre-indexed addressing with writeback. - if (Parser.getTok().is(AsmToken::Exclaim)) { - // There needs to have been an immediate or wback doesn't make sense. - if (!OffsetExpr) - return Error(E, "missing offset for pre-indexed addressing"); - // Pre-indexed with writeback must have a constant expression for the - // offset. FIXME: Theoretically, we'd like to allow fixups so long - // as they don't require a relocation. - if (!isa(OffsetExpr)) - return Error(OffsetLoc, "constant immediate expression expected"); - - // Create the Token operand for the '!'. - Operands.push_back(ARM64Operand::CreateToken( - "!", false, Parser.getTok().getLoc(), getContext())); - Parser.Lex(); // Eat the '!' token. - } - - return false; -} - bool ARM64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) { bool HasELFModifier = false; ARM64MCExpr::VariantKind RefKind; @@ -3313,6 +2829,47 @@ bool ARM64AsmParser::parseVectorList(OperandVector &Operands) { return false; } +ARM64AsmParser::OperandMatchResultTy +ARM64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) { + const AsmToken &Tok = Parser.getTok(); + if (!Tok.is(AsmToken::Identifier)) + return MatchOperand_NoMatch; + + unsigned RegNum = MatchRegisterName(Tok.getString().lower()); + + MCContext &Ctx = getContext(); + const MCRegisterInfo *RI = Ctx.getRegisterInfo(); + if (!RI->getRegClass(ARM64::GPR64spRegClassID).contains(RegNum)) + return MatchOperand_NoMatch; + + SMLoc S = getLoc(); + Parser.Lex(); // Eat register + + if (Parser.getTok().isNot(AsmToken::Comma)) { + Operands.push_back(ARM64Operand::CreateReg(RegNum, false, S, getLoc(), Ctx)); + return MatchOperand_Success; + } + Parser.Lex(); // Eat comma. + + if (Parser.getTok().is(AsmToken::Hash)) + Parser.Lex(); // Eat hash + + if (Parser.getTok().isNot(AsmToken::Integer)) { + Error(getLoc(), "index must be absent or #0"); + return MatchOperand_ParseFail; + } + + const MCExpr *ImmVal; + if (Parser.parseExpression(ImmVal) || !isa(ImmVal) || + cast(ImmVal)->getValue() != 0) { + Error(getLoc(), "index must be absent or #0"); + return MatchOperand_ParseFail; + } + + Operands.push_back(ARM64Operand::CreateReg(RegNum, false, S, getLoc(), Ctx)); + return MatchOperand_Success; +} + /// parseOperand - Parse a arm instruction operand. For now this parses the /// operand regardless of the mnemonic. bool ARM64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode, @@ -3341,8 +2898,16 @@ bool ARM64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode, Operands.push_back(ARM64Operand::CreateImm(Expr, S, E, getContext())); return false; } - case AsmToken::LBrac: - return parseMemory(Operands); + case AsmToken::LBrac: { + SMLoc Loc = Parser.getTok().getLoc(); + Operands.push_back(ARM64Operand::CreateToken("[", false, Loc, + getContext())); + Parser.Lex(); // Eat '[' + + // There's no comma after a '[', so we can parse the next operand + // immediately. + return parseOperand(Operands, false, false); + } case AsmToken::LCurly: return parseVectorList(Operands); case AsmToken::Identifier: { @@ -3530,6 +3095,28 @@ bool ARM64AsmParser::ParseInstruction(ParseInstructionInfo &Info, return true; } + // After successfully parsing some operands there are two special cases to + // consider (i.e. notional operands not separated by commas). Both are due + // to memory specifiers: + // + An RBrac will end an address for load/store/prefetch + // + An '!' will indicate a pre-indexed operation. + // + // It's someone else's responsibility to make sure these tokens are sane + // in the given context! + if (Parser.getTok().is(AsmToken::RBrac)) { + SMLoc Loc = Parser.getTok().getLoc(); + Operands.push_back(ARM64Operand::CreateToken("]", false, Loc, + getContext())); + Parser.Lex(); + } + + if (Parser.getTok().is(AsmToken::Exclaim)) { + SMLoc Loc = Parser.getTok().getLoc(); + Operands.push_back(ARM64Operand::CreateToken("!", false, Loc, + getContext())); + Parser.Lex(); + } + ++N; } } @@ -3749,23 +3336,51 @@ bool ARM64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) { "expected compatible register or floating-point constant"); case Match_InvalidMemoryIndexedSImm9: return Error(Loc, "index must be an integer in range [-256, 255]."); - case Match_InvalidMemoryIndexed32SImm7: + case Match_InvalidMemoryIndexed4SImm7: return Error(Loc, "index must be a multiple of 4 in range [-256, 252]."); - case Match_InvalidMemoryIndexed64SImm7: + case Match_InvalidMemoryIndexed8SImm7: return Error(Loc, "index must be a multiple of 8 in range [-512, 504]."); - case Match_InvalidMemoryIndexed128SImm7: + case Match_InvalidMemoryIndexed16SImm7: return Error(Loc, "index must be a multiple of 16 in range [-1024, 1008]."); - case Match_InvalidMemoryIndexed: - return Error(Loc, "invalid offset in memory address."); - case Match_InvalidMemoryIndexed8: + case Match_InvalidMemoryWExtend8: + return Error(Loc, + "expected 'uxtw' or 'sxtw' with optional shift of #0"); + case Match_InvalidMemoryWExtend16: + return Error(Loc, + "expected 'uxtw' or 'sxtw' with optional shift of #0 or #1"); + case Match_InvalidMemoryWExtend32: + return Error(Loc, + "expected 'uxtw' or 'sxtw' with optional shift of #0 or #2"); + case Match_InvalidMemoryWExtend64: + return Error(Loc, + "expected 'uxtw' or 'sxtw' with optional shift of #0 or #3"); + case Match_InvalidMemoryWExtend128: + return Error(Loc, + "expected 'uxtw' or 'sxtw' with optional shift of #0 or #4"); + case Match_InvalidMemoryXExtend8: + return Error(Loc, + "expected 'lsl' or 'sxtx' with optional shift of #0"); + case Match_InvalidMemoryXExtend16: + return Error(Loc, + "expected 'lsl' or 'sxtx' with optional shift of #0 or #1"); + case Match_InvalidMemoryXExtend32: + return Error(Loc, + "expected 'lsl' or 'sxtx' with optional shift of #0 or #2"); + case Match_InvalidMemoryXExtend64: + return Error(Loc, + "expected 'lsl' or 'sxtx' with optional shift of #0 or #3"); + case Match_InvalidMemoryXExtend128: + return Error(Loc, + "expected 'lsl' or 'sxtx' with optional shift of #0 or #4"); + case Match_InvalidMemoryIndexed1: return Error(Loc, "index must be an integer in range [0, 4095]."); - case Match_InvalidMemoryIndexed16: + case Match_InvalidMemoryIndexed2: return Error(Loc, "index must be a multiple of 2 in range [0, 8190]."); - case Match_InvalidMemoryIndexed32: + case Match_InvalidMemoryIndexed4: return Error(Loc, "index must be a multiple of 4 in range [0, 16380]."); - case Match_InvalidMemoryIndexed64: + case Match_InvalidMemoryIndexed8: return Error(Loc, "index must be a multiple of 8 in range [0, 32760]."); - case Match_InvalidMemoryIndexed128: + case Match_InvalidMemoryIndexed16: return Error(Loc, "index must be a multiple of 16 in range [0, 65520]."); case Match_InvalidImm0_7: return Error(Loc, "immediate must be an integer in range [0, 7]."); @@ -4109,39 +3724,11 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return showMatchError(ErrorLoc, MatchResult); } - case Match_InvalidMemoryIndexedSImm9: { - // If there is not a '!' after the memory operand that failed, we really - // want the diagnostic for the non-pre-indexed instruction variant instead. - // Be careful to check for the post-indexed variant as well, which also - // uses this match diagnostic. Also exclude the explicitly unscaled - // mnemonics, as they want the unscaled diagnostic as well. - if (Operands.size() == ErrorInfo + 1 && - !((ARM64Operand *)Operands[ErrorInfo])->isImm() && - !Tok.startswith("stur") && !Tok.startswith("ldur")) { - // FIXME: Here we use a vague diagnostic for memory operand in many - // instructions of various formats. This diagnostic can be more accurate - // if splitting memory operand into many smaller operands to help - // diagnose. - MatchResult = Match_InvalidMemoryIndexed; - } - else if(Operands.size() == 3 && Operands.size() == ErrorInfo + 1 && - ((ARM64Operand *)Operands[ErrorInfo])->isImm()) { - MatchResult = Match_InvalidLabel; - } - SMLoc ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getStartLoc(); - if (ErrorLoc == SMLoc()) - ErrorLoc = IDLoc; - return showMatchError(ErrorLoc, MatchResult); - } - case Match_InvalidMemoryIndexed32: - case Match_InvalidMemoryIndexed64: - case Match_InvalidMemoryIndexed128: - // If there is a '!' after the memory operand that failed, we really - // want the diagnostic for the pre-indexed instruction variant instead. - if (Operands.size() > ErrorInfo + 1 && - ((ARM64Operand *)Operands[ErrorInfo + 1])->isTokenEqual("!")) - MatchResult = Match_InvalidMemoryIndexedSImm9; - // FALL THROUGH + case Match_InvalidMemoryIndexed1: + case Match_InvalidMemoryIndexed2: + case Match_InvalidMemoryIndexed4: + case Match_InvalidMemoryIndexed8: + case Match_InvalidMemoryIndexed16: case Match_InvalidCondCode: case Match_AddSubRegExtendSmall: case Match_AddSubRegExtendLarge: @@ -4152,12 +3739,20 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_InvalidMovImm32Shift: case Match_InvalidMovImm64Shift: case Match_InvalidFPImm: - case Match_InvalidMemoryIndexed: - case Match_InvalidMemoryIndexed8: - case Match_InvalidMemoryIndexed16: - case Match_InvalidMemoryIndexed32SImm7: - case Match_InvalidMemoryIndexed64SImm7: - case Match_InvalidMemoryIndexed128SImm7: + case Match_InvalidMemoryWExtend8: + case Match_InvalidMemoryWExtend16: + case Match_InvalidMemoryWExtend32: + case Match_InvalidMemoryWExtend64: + case Match_InvalidMemoryWExtend128: + case Match_InvalidMemoryXExtend8: + case Match_InvalidMemoryXExtend16: + case Match_InvalidMemoryXExtend32: + case Match_InvalidMemoryXExtend64: + case Match_InvalidMemoryXExtend128: + case Match_InvalidMemoryIndexed4SImm7: + case Match_InvalidMemoryIndexed8SImm7: + case Match_InvalidMemoryIndexed16SImm7: + case Match_InvalidMemoryIndexedSImm9: case Match_InvalidImm0_7: case Match_InvalidImm0_15: case Match_InvalidImm0_31: @@ -4179,10 +3774,6 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // Any time we get here, there's nothing fancy to do. Just get the // operand SMLoc and display the diagnostic. SMLoc ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getStartLoc(); - // If it's a memory operand, the error is with the offset immediate, - // so get that location instead. - if (((ARM64Operand *)Operands[ErrorInfo])->isMem()) - ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getOffsetLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; return showMatchError(ErrorLoc, MatchResult); diff --git a/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp b/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp index 92eabcf2b4e0..20bcb366bf5e 100644 --- a/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp +++ b/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp @@ -89,6 +89,8 @@ static DecodeStatus DecodeFixedPointScaleImm64(llvm::MCInst &Inst, unsigned Imm, const void *Decoder); static DecodeStatus DecodePCRelLabel19(llvm::MCInst &Inst, unsigned Imm, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeMemExtend(llvm::MCInst &Inst, unsigned Imm, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeMRSSystemRegister(llvm::MCInst &Inst, unsigned Imm, uint64_t Address, const void *Decoder); static DecodeStatus DecodeMSRSystemRegister(llvm::MCInst &Inst, unsigned Imm, @@ -114,10 +116,6 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst, static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeRegOffsetLdStInstruction(llvm::MCInst &Inst, - uint32_t insn, - uint64_t Address, - const void *Decoder); static DecodeStatus DecodeAddSubERegInstruction(llvm::MCInst &Inst, uint32_t insn, uint64_t Address, const void *Decoder); @@ -605,6 +603,13 @@ static DecodeStatus DecodePCRelLabel19(llvm::MCInst &Inst, unsigned Imm, return Success; } +static DecodeStatus DecodeMemExtend(llvm::MCInst &Inst, unsigned Imm, + uint64_t Address, const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm((Imm >> 1) & 1)); + Inst.addOperand(MCOperand::CreateImm(Imm & 1)); + return Success; +} + static DecodeStatus DecodeMRSSystemRegister(llvm::MCInst &Inst, unsigned Imm, uint64_t Address, const void *Decoder) { @@ -1189,81 +1194,6 @@ static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn, return Success; } -static DecodeStatus DecodeRegOffsetLdStInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Addr, - const void *Decoder) { - unsigned Rt = fieldFromInstruction(insn, 0, 5); - unsigned Rn = fieldFromInstruction(insn, 5, 5); - unsigned Rm = fieldFromInstruction(insn, 16, 5); - unsigned extendHi = fieldFromInstruction(insn, 13, 3); - unsigned extendLo = fieldFromInstruction(insn, 12, 1); - unsigned extend = (extendHi << 1) | extendLo; - - // All RO load-store instructions are undefined if option == 00x or 10x. - if (extend >> 2 == 0x0 || extend >> 2 == 0x2) - return Fail; - - switch (Inst.getOpcode()) { - default: - return Fail; - case ARM64::LDRSWro: - DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRXro: - case ARM64::STRXro: - DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRWro: - case ARM64::STRWro: - DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRQro: - case ARM64::STRQro: - DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRDro: - case ARM64::STRDro: - DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRSro: - case ARM64::STRSro: - DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRHro: - case ARM64::STRHro: - DecodeFPR16RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRBro: - case ARM64::STRBro: - DecodeFPR8RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRBBro: - case ARM64::STRBBro: - case ARM64::LDRSBWro: - DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRHHro: - case ARM64::STRHHro: - case ARM64::LDRSHWro: - DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRSHXro: - DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRSBXro: - DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::PRFMro: - Inst.addOperand(MCOperand::CreateImm(Rt)); - } - - DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder); - - Inst.addOperand(MCOperand::CreateImm(extend)); - return Success; -} - static DecodeStatus DecodeAddSubERegInstruction(llvm::MCInst &Inst, uint32_t insn, uint64_t Addr, const void *Decoder) { diff --git a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp b/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp index adfcb46ac403..48fba37146e5 100644 --- a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp +++ b/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp @@ -990,11 +990,11 @@ void ARM64InstPrinter::printShiftedRegister(const MCInst *MI, unsigned OpNum, void ARM64InstPrinter::printExtendedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O) { O << getRegisterName(MI->getOperand(OpNum).getReg()); - printExtend(MI, OpNum + 1, O); + printArithExtend(MI, OpNum + 1, O); } -void ARM64InstPrinter::printExtend(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { +void ARM64InstPrinter::printArithExtend(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { unsigned Val = MI->getOperand(OpNum).getImm(); ARM64_AM::ShiftExtendType ExtType = ARM64_AM::getArithExtendType(Val); unsigned ShiftVal = ARM64_AM::getArithShiftValue(Val); @@ -1019,6 +1019,23 @@ void ARM64InstPrinter::printExtend(const MCInst *MI, unsigned OpNum, O << " #" << ShiftVal; } +void ARM64InstPrinter::printMemExtend(const MCInst *MI, unsigned OpNum, + raw_ostream &O, char SrcRegKind, + unsigned Width) { + unsigned SignExtend = MI->getOperand(OpNum).getImm(); + unsigned DoShift = MI->getOperand(OpNum + 1).getImm(); + + // sxtw, sxtx, uxtw or lsl (== uxtx) + bool IsLSL = !SignExtend && SrcRegKind == 'x'; + if (IsLSL) + O << "lsl"; + else + O << (SignExtend ? 's' : 'u') << "xt" << SrcRegKind; + + if (DoShift || IsLSL) + O << " #" << Log2_32(Width / 8); +} + void ARM64InstPrinter::printCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O) { ARM64CC::CondCode CC = (ARM64CC::CondCode)MI->getOperand(OpNum).getImm(); @@ -1042,18 +1059,15 @@ void ARM64InstPrinter::printImmScale(const MCInst *MI, unsigned OpNum, O << '#' << Scale * MI->getOperand(OpNum).getImm(); } -void ARM64InstPrinter::printAMIndexed(const MCInst *MI, unsigned OpNum, - unsigned Scale, raw_ostream &O) { - const MCOperand MO1 = MI->getOperand(OpNum + 1); - O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()); - if (MO1.isImm()) { - if (MO1.getImm() != 0) - O << ", #" << (MO1.getImm() * Scale); +void ARM64InstPrinter::printUImm12Offset(const MCInst *MI, unsigned OpNum, + unsigned Scale, raw_ostream &O) { + const MCOperand MO = MI->getOperand(OpNum); + if (MO.isImm()) { + O << "#" << (MO.getImm() * Scale); } else { - assert(MO1.isExpr() && "Unexpected operand type!"); - O << ", " << *MO1.getExpr(); + assert(MO.isExpr() && "Unexpected operand type!"); + O << *MO.getExpr(); } - O << ']'; } void ARM64InstPrinter::printAMIndexedWB(const MCInst *MI, unsigned OpNum, @@ -1080,37 +1094,6 @@ void ARM64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum, O << '#' << prfop; } -void ARM64InstPrinter::printMemoryPostIndexed(const MCInst *MI, unsigned OpNum, - raw_ostream &O, unsigned Scale) { - O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']' << ", #" - << Scale * MI->getOperand(OpNum + 1).getImm(); -} - -void ARM64InstPrinter::printMemoryRegOffset(const MCInst *MI, unsigned OpNum, - raw_ostream &O, int Scale) { - unsigned Val = MI->getOperand(OpNum + 2).getImm(); - ARM64_AM::ShiftExtendType ExtType = ARM64_AM::getMemExtendType(Val); - - O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "; - if (ExtType == ARM64_AM::UXTW || ExtType == ARM64_AM::SXTW) - O << getRegisterName(getWRegFromXReg(MI->getOperand(OpNum + 1).getReg())); - else - O << getRegisterName(MI->getOperand(OpNum + 1).getReg()); - - bool DoShift = ARM64_AM::getMemDoShift(Val); - - if (ExtType == ARM64_AM::UXTX) { - if (DoShift) - O << ", lsl"; - } else - O << ", " << ARM64_AM::getShiftExtendName(ExtType); - - if (DoShift) - O << " #" << Log2_32(Scale); - - O << "]"; -} - void ARM64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); diff --git a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h b/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h index 31818dff980b..0fd6f1007121 100644 --- a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h +++ b/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h @@ -62,18 +62,26 @@ class ARM64InstPrinter : public MCInstPrinter { void printShifter(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printShiftedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printExtendedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printArithExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + void printMemExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O, + char SrcRegKind, unsigned Width); + template + void printMemExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + printMemExtend(MI, OpNum, O, SrcRegKind, Width); + } + void printCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printInverseCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAlignedLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAMIndexed(const MCInst *MI, unsigned OpNum, unsigned Scale, - raw_ostream &O); + void printUImm12Offset(const MCInst *MI, unsigned OpNum, unsigned Scale, + raw_ostream &O); void printAMIndexedWB(const MCInst *MI, unsigned OpNum, unsigned Scale, raw_ostream &O); - template - void printAMIndexed(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printAMIndexed(MI, OpNum, BitWidth / 8, O); + template + void printUImm12Offset(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + printUImm12Offset(MI, OpNum, Scale, O); } template @@ -88,21 +96,6 @@ class ARM64InstPrinter : public MCInstPrinter { void printPrefetchOp(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printMemoryPostIndexed(const MCInst *MI, unsigned OpNum, raw_ostream &O, - unsigned Scale); - template - void printMemoryPostIndexed(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printMemoryPostIndexed(MI, OpNum, O, BitWidth / 8); - } - - void printMemoryRegOffset(const MCInst *MI, unsigned OpNum, raw_ostream &O, - int LegalShiftAmt); - template - void printMemoryRegOffset(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printMemoryRegOffset(MI, OpNum, O, BitWidth / 8); - } - void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O, diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp index 3c6dbc85b138..0db08f422e41 100644 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp +++ b/lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp @@ -56,12 +56,11 @@ class ARM64MCCodeEmitter : public MCCodeEmitter { SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; - /// getAMIndexed8OpValue - Return encoding info for base register - /// and 12-bit unsigned immediate attached to a load, store or prfm - /// instruction. If operand requires a relocation, record it and - /// return zero in that part of the encoding. + /// getLdStUImm12OpValue - Return encoding info for 12-bit unsigned immediate + /// attached to a load, store or prfm instruction. If operand requires a + /// relocation, record it and return zero in that part of the encoding. template - uint32_t getAMIndexed8OpValue(const MCInst &MI, unsigned OpIdx, + uint32_t getLdStUImm12OpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; @@ -89,6 +88,13 @@ class ARM64MCCodeEmitter : public MCCodeEmitter { SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; + /// getMemExtendOpValue - Return the encoded value for a reg-extend load/store + /// instruction: bit 0 is whether a shift is present, bit 1 is whether the + /// operation is a sign extend (as opposed to a zero extend). + uint32_t getMemExtendOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + /// getTestBranchTargetOpValue - Return the encoded value for a test-bit-and- /// branch target. uint32_t getTestBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, @@ -221,15 +227,11 @@ ARM64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, return 0; } -template -uint32_t -ARM64MCCodeEmitter::getAMIndexed8OpValue(const MCInst &MI, unsigned OpIdx, +template uint32_t +ARM64MCCodeEmitter::getLdStUImm12OpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { - unsigned BaseReg = MI.getOperand(OpIdx).getReg(); - BaseReg = Ctx.getRegisterInfo()->getEncodingValue(BaseReg); - - const MCOperand &MO = MI.getOperand(OpIdx + 1); + const MCOperand &MO = MI.getOperand(OpIdx); uint32_t ImmVal = 0; if (MO.isImm()) @@ -241,7 +243,7 @@ ARM64MCCodeEmitter::getAMIndexed8OpValue(const MCInst &MI, unsigned OpIdx, ++MCNumFixups; } - return BaseReg | (ImmVal << 5); + return ImmVal; } /// getAdrLabelOpValue - Return encoding info for 21-bit immediate ADR label @@ -255,7 +257,7 @@ ARM64MCCodeEmitter::getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, // If the destination is an immediate, we have nothing to do. if (MO.isImm()) return MO.getImm(); - assert(MO.isExpr() && "Unexpected ADR target type!"); + assert(MO.isExpr() && "Unexpected target type!"); const MCExpr *Expr = MO.getExpr(); MCFixupKind Kind = MI.getOpcode() == ARM64::ADR @@ -341,6 +343,15 @@ ARM64MCCodeEmitter::getLoadLiteralOpValue(const MCInst &MI, unsigned OpIdx, return 0; } +uint32_t +ARM64MCCodeEmitter::getMemExtendOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + unsigned SignExtend = MI.getOperand(OpIdx).getImm(); + unsigned DoShift = MI.getOperand(OpIdx + 1).getImm(); + return (SignExtend << 1) | DoShift; +} + uint32_t ARM64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, diff --git a/test/MC/AArch64/basic-a64-diagnostics.s b/test/MC/AArch64/basic-a64-diagnostics.s index 42493a2ef7a1..213dc00f0a60 100644 --- a/test/MC/AArch64/basic-a64-diagnostics.s +++ b/test/MC/AArch64/basic-a64-diagnostics.s @@ -1803,7 +1803,7 @@ stxrb w2, w3, [x4, #20] stlxrh w10, w11, [w2] // CHECK-ERROR-AARCH64: error: expected '#0' -// CHECK-ERROR-ARM64: error: invalid operand for instruction +// CHECK-ERROR-ARM64: error: index must be absent or #0 // CHECK-ERROR-NEXT: stxrb w2, w3, [x4, #20] // CHECK-ERROR-NEXT: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -1887,7 +1887,8 @@ //------------------------------------------------------------------------------ ldr x3, [x4, #25], #0 ldr x4, [x9, #0], #4 -// CHECK-ERROR: error: {{expected symbolic reference or integer|index must be a multiple of 8}} in range [0, 32760] +// CHECK-ERROR-AARCH64: error: {{expected symbolic reference or integer|index must be a multiple of 8}} in range [0, 32760] +// CHECK-ERROR-ARM64: error: invalid operand for instruction // CHECK-ERROR-NEXT: ldr x3, [x4, #25], #0 // CHECK-ERROR-NEXT: ^ // CHECK-ERROR-AARCH64-NEXT: error: invalid operand for instruction @@ -2083,22 +2084,19 @@ strh w9, [sp, #-257]! str w1, [x19, #256]! str w9, [sp, #-257]! -// CHECK-ERROR-AARCH64: error: invalid operand for instruction -// CHECK-ERROR-ARM64: error: invalid offset in memory address +// CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR-NEXT: strb w1, [x19, #256]! // CHECK-ERROR-NEXT: ^ // CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] // CHECK-ERROR-NEXT: strb w9, [sp, #-257]! // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: invalid operand for instruction -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: invalid operand for instruction // CHECK-ERROR-NEXT: strh w1, [x19, #256]! // CHECK-ERROR-NEXT: ^ // CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] // CHECK-ERROR-NEXT: strh w9, [sp, #-257]! // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: invalid operand for instruction -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: invalid operand for instruction // CHECK-ERROR-NEXT: str w1, [x19, #256]! // CHECK-ERROR-NEXT: ^ // CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] @@ -2111,22 +2109,19 @@ ldrh w9, [sp, #-257]! ldr w1, [x19, #256]! ldr w9, [sp, #-257]! -// CHECK-ERROR-AARCH64: error: invalid operand for instruction -// CHECK-ERROR-ARM64: error: invalid offset in memory address +// CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR-NEXT: ldrb w1, [x19, #256]! // CHECK-ERROR-NEXT: ^ // CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] // CHECK-ERROR-NEXT: ldrb w9, [sp, #-257]! // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: invalid operand for instruction -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: invalid operand for instruction // CHECK-ERROR-NEXT: ldrh w1, [x19, #256]! // CHECK-ERROR-NEXT: ^ // CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] // CHECK-ERROR-NEXT: ldrh w9, [sp, #-257]! // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: invalid operand for instruction -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: invalid operand for instruction // CHECK-ERROR-NEXT: ldr w1, [x19, #256]! // CHECK-ERROR-NEXT: ^ // CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] @@ -2139,22 +2134,19 @@ ldrsh x22, [x13, #-257]! ldrsw x2, [x3, #256]! ldrsw x22, [x13, #-257]! -// CHECK-ERROR-AARCH64: error: invalid operand for instruction -// CHECK-ERROR-ARM64: error: invalid offset in memory address +// CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR-NEXT: ldrsb x2, [x3, #256]! // CHECK-ERROR-NEXT: ^ // CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] // CHECK-ERROR-NEXT: ldrsb x22, [x13, #-257]! // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: invalid operand for instruction -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: invalid operand for instruction // CHECK-ERROR-NEXT: ldrsh x2, [x3, #256]! // CHECK-ERROR-NEXT: ^ // CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] // CHECK-ERROR-NEXT: ldrsh x22, [x13, #-257]! // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: invalid operand for instruction -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: invalid operand for instruction // CHECK-ERROR-NEXT: ldrsw x2, [x3, #256]! // CHECK-ERROR-NEXT: ^ // CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] @@ -2165,15 +2157,13 @@ ldrsb w22, [x13, #-257]! ldrsh w2, [x3, #256]! ldrsh w22, [x13, #-257]! -// CHECK-ERROR-AARCH64: error: invalid operand for instruction -// CHECK-ERROR-ARM64: error: invalid offset in memory address +// CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR-NEXT: ldrsb w2, [x3, #256]! // CHECK-ERROR-NEXT: ^ // CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] // CHECK-ERROR-NEXT: ldrsb w22, [x13, #-257]! // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: invalid operand for instruction -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: invalid operand for instruction // CHECK-ERROR-NEXT: ldrsh w2, [x3, #256]! // CHECK-ERROR-NEXT: ^ // CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] @@ -2188,29 +2178,25 @@ str s3, [x13, #-257]! str d3, [x3, #256]! str d3, [x13, #-257]! -// CHECK-ERROR-AARCH64: error: invalid operand for instruction -// CHECK-ERROR-ARM64: error: invalid offset in memory address +// CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR-NEXT: str b3, [x3, #256]! // CHECK-ERROR-NEXT: ^ // CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] // CHECK-ERROR-NEXT: str b3, [x13, #-257]! // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: invalid operand for instruction -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: invalid operand for instruction // CHECK-ERROR-NEXT: str h3, [x3, #256]! // CHECK-ERROR-NEXT: ^ // CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] // CHECK-ERROR-NEXT: str h3, [x13, #-257]! // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: invalid operand for instruction -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: invalid operand for instruction // CHECK-ERROR-NEXT: str s3, [x3, #256]! // CHECK-ERROR-NEXT: ^ // CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] // CHECK-ERROR-NEXT: str s3, [x13, #-257]! // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: invalid operand for instruction -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: invalid operand for instruction // CHECK-ERROR-NEXT: str d3, [x3, #256]! // CHECK-ERROR-NEXT: ^ // CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] @@ -2225,29 +2211,25 @@ ldr s3, [x13, #-257]! ldr d3, [x3, #256]! ldr d3, [x13, #-257]! -// CHECK-ERROR-AARCH64: error: invalid operand for instruction -// CHECK-ERROR-ARM64: error: invalid offset in memory address +// CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR-NEXT: ldr b3, [x3, #256]! // CHECK-ERROR-NEXT: ^ // CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] // CHECK-ERROR-NEXT: ldr b3, [x13, #-257]! // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: invalid operand for instruction -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: invalid operand for instruction // CHECK-ERROR-NEXT: ldr h3, [x3, #256]! // CHECK-ERROR-NEXT: ^ // CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] // CHECK-ERROR-NEXT: ldr h3, [x13, #-257]! // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: invalid operand for instruction -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: invalid operand for instruction // CHECK-ERROR-NEXT: ldr s3, [x3, #256]! // CHECK-ERROR-NEXT: ^ // CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] // CHECK-ERROR-NEXT: ldr s3, [x13, #-257]! // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: invalid operand for instruction -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: invalid operand for instruction // CHECK-ERROR-NEXT: ldr d3, [x3, #256]! // CHECK-ERROR-NEXT: ^ // CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] @@ -2262,20 +2244,16 @@ sttrh w17, [x1, #256] ldtrsw x20, [x1, #256] ldtr x12, [sp, #256] -// CHECK-ERROR-AARCH64: error: expected integer in range [-256, 255] -// CHECK-ERROR-ARM64: error: invalid offset in memory address +// CHECK-ERROR: error: {{expected|index must be an}} integer in range [-256, 255] // CHECK-ERROR-NEXT: ldtrb w2, [sp, #256] // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] // CHECK-ERROR-NEXT: sttrh w17, [x1, #256] // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] // CHECK-ERROR-NEXT: ldtrsw x20, [x1, #256] // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] // CHECK-ERROR-NEXT: ldtr x12, [sp, #256] // CHECK-ERROR-NEXT: ^ @@ -2290,12 +2268,10 @@ // CHECK-ERROR-NEXT: error: invalid operand for instruction // CHECK-ERROR-NEXT: sttr b2, [x2, #-257] // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] // CHECK-ERROR-NEXT: ldtrsb x9, [sp, #-257] // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] // CHECK-ERROR-NEXT: ldtr w2, [x30, #-257] // CHECK-ERROR-NEXT: ^ // CHECK-ERROR-NEXT: error: invalid operand for instruction @@ -2313,24 +2289,19 @@ ldr w0, [x4, #16384] ldrh w2, [x21, #8192] ldrb w3, [x12, #4096] -// CHECK-ERROR-AARCH64: error: {{expected|index must be an}} integer in range [-256, 255] -// CHECK-ERROR-ARM64: error: invalid offset in memory address +// CHECK-ERROR: error: {{expected|index must be an}} integer in range [-256, 255] // CHECK-ERROR-NEXT: ldr q0, [x11, #65536] // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] // CHECK-ERROR-NEXT: ldr x0, [sp, #32768] // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] // CHECK-ERROR-NEXT: ldr w0, [x4, #16384] // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] // CHECK-ERROR-NEXT: ldrh w2, [x21, #8192] // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] // CHECK-ERROR-NEXT: ldrb w3, [x12, #4096] // CHECK-ERROR-NEXT: ^ @@ -2372,8 +2343,7 @@ // CHECK-ERROR-AARCH64-NEXT: error: too few operands for instruction // CHECK-ERROR-AARCH64-NEXT: str x5, [x22, #12] // CHECK-ERROR-AARCH64-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255] // CHECK-ERROR-NEXT: str w7, [x12, #16384] // CHECK-ERROR-NEXT: ^ @@ -2411,92 +2381,78 @@ // CHECK-ERROR-NEXT: error: invalid operand for instruction // CHECK-ERROR-NEXT: ldr w3, [xzr, x3] // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: expected #imm after shift specifier -// CHECK-ERROR-ARM64-NEXT: error: LSL extend requires immediate operand +// CHECK-ERROR-NEXT: error: expected #imm after shift specifier // CHECK-ERROR-NEXT: ldr w4, [x0, x4, lsl] // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: expected 'lsl' or 'sxtx' with optional shift of #0 or #2 -// CHECK-ERROR-AARCH64-NEXT: ldr w9, [x5, x5, uxtw] -// CHECK-ERROR-AARCH64-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: expected 'lsl' or 'sxtx' with optional shift of #0 or #2 -// CHECK-ERROR-AARCH64-NEXT: ldr w10, [x6, x9, sxtw #2] -// CHECK-ERROR-AARCH64-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #2 -// CHECK-ERROR-ARM64-NEXT: error: 32-bit general purpose offset register requires sxtw or uxtw extend +// CHECK-ERROR-NEXT: error: expected 'lsl' or 'sxtx' with optional shift of #0 or #2 +// CHECK-ERROR-NEXT: ldr w9, [x5, x5, uxtw] +// CHECK-ERROR-NEXT: ^ +// CHECK-ERROR-NEXT: error: expected 'lsl' or 'sxtx' with optional shift of #0 or #2 +// CHECK-ERROR-NEXT: ldr w10, [x6, x9, sxtw #2] +// CHECK-ERROR-NEXT: ^ +// CHECK-ERROR-NEXT: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #2 // CHECK-ERROR-NEXT: ldr w11, [x7, w2, lsl #2] // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #2 -// CHECK-ERROR-ARM64-NEXT: error: 32-bit general purpose offset register requires sxtw or uxtw extend +// CHECK-ERROR-NEXT: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #2 // CHECK-ERROR-NEXT: ldr w12, [x8, w1, sxtx] // CHECK-ERROR-NEXT: ^ ldrsb w9, [x4, x2, lsl #-1] strb w9, [x4, x2, lsl #1] -// CHECK-ERROR-AARCH64-NEXT: error: expected integer shift amount -// CHECK-ERROR-ARM64-NEXT: error: immediate operand out of range +// CHECK-ERROR-NEXT: error: expected integer shift amount // CHECK-ERROR-NEXT: ldrsb w9, [x4, x2, lsl #-1] // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: expected 'lsl' or 'sxtx' with optional shift of #0 -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: expected 'lsl' or 'sxtx' with optional shift of #0 // CHECK-ERROR-NEXT: strb w9, [x4, x2, lsl #1] // CHECK-ERROR-NEXT: ^ ldrsh w9, [x4, x2, lsl #-1] ldr h13, [x4, w2, uxtw #2] -// CHECK-ERROR-AARCH64-NEXT: error: expected integer shift amount -// CHECK-ERROR-ARM64-NEXT: error: immediate operand out of range +// CHECK-ERROR-NEXT: error: expected integer shift amount // CHECK-ERROR-NEXT: ldrsh w9, [x4, x2, lsl #-1] // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #1 -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #1 // CHECK-ERROR-NEXT: ldr h13, [x4, w2, uxtw #2] // CHECK-ERROR-NEXT: ^ str w9, [x5, w9, sxtw #-1] str s3, [sp, w9, uxtw #1] ldrsw x9, [x15, x4, sxtx #3] -// CHECK-ERROR-AARCH64-NEXT: error: expected integer shift amount -// CHECK-ERROR-ARM64-NEXT: error: immediate operand out of range +// CHECK-ERROR-NEXT: error: expected integer shift amount // CHECK-ERROR-NEXT: str w9, [x5, w9, sxtw #-1] // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #2 -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #2 // CHECK-ERROR-NEXT: str s3, [sp, w9, uxtw #1] // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: expected 'lsl' or 'sxtx' with optional shift of #0 or #2 -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: expected 'lsl' or 'sxtx' with optional shift of #0 or #2 // CHECK-ERROR-NEXT: ldrsw x9, [x15, x4, sxtx #3] // CHECK-ERROR-NEXT: ^ str xzr, [x5, x9, sxtx #-1] prfm pldl3keep, [sp, x20, lsl #2] ldr d3, [x20, wzr, uxtw #4] -// CHECK-ERROR-AARCH64-NEXT: error: expected integer shift amount -// CHECK-ERROR-ARM64-NEXT: error: immediate operand out of range +// CHECK-ERROR-NEXT: error: expected integer shift amount // CHECK-ERROR-NEXT: str xzr, [x5, x9, sxtx #-1] // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: expected 'lsl' or 'sxtx' with optional shift of #0 or #3 -// CHECK-ERROR-ARM64-NEXT: error: expected label or encodable integer pc offset +// CHECK-ERROR-NEXT: error: expected 'lsl' or 'sxtx' with optional shift of #0 or #3 // CHECK-ERROR-NEXT: prfm pldl3keep, [sp, x20, lsl #2] // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #3 -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-NEXT: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #3 // CHECK-ERROR-NEXT: ldr d3, [x20, wzr, uxtw #4] // CHECK-ERROR-NEXT: ^ ldr q5, [sp, x2, lsl #-1] ldr q10, [x20, w4, uxtw #2] str q21, [x20, w4, uxtw #5] -// CHECK-ERROR-AARCH64-NEXT: error: expected integer shift amount -// CHECK-ERROR-ARM64-NEXT: error: immediate operand out of range +// CHECK-ERROR-NEXT: error: expected integer shift amount // CHECK-ERROR-NEXT: ldr q5, [sp, x2, lsl #-1] // CHECK-ERROR-NEXT: ^ // CHECK-ERROR-AARCH64-NEXT: error: expected 'lsl' or 'sxtw' with optional shift of #0 or #4 -// CHECK-ERROR-ARM64-NEXT: error: invalid offset in memory address +// CHECK-ERROR-ARM64-NEXT: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #4 // CHECK-ERROR-NEXT: ldr q10, [x20, w4, uxtw #2] // CHECK-ERROR-NEXT: ^ // CHECK-ERROR-AARCH64-NEXT: error: expected 'lsl' or 'sxtw' with optional shift of #0 or #4 -// CHECK-ERROR-ARM64-NEXT: error: immediate operand out of range +// CHECK-ERROR-ARM64-NEXT: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #4 // CHECK-ERROR-NEXT: str q21, [x20, w4, uxtw #5] // CHECK-ERROR-NEXT: ^ @@ -2695,16 +2651,13 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR-NEXT: ldp d3, q2, [sp], #0 // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: {{expected integer|index must be a}} multiple of 16 in range [-1024, 1008] -// CHECK-ERROR-ARM64-NEXT: error: {{expected integer|index must be a}} multiple of 8 in range [-512, 504] +// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 16 in range [-1024, 1008] // CHECK-ERROR-NEXT: ldp q3, q5, [sp], #8 // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: {{expected integer|index must be a}} multiple of 16 in range [-1024, 1008] -// CHECK-ERROR-ARM64-NEXT: error: {{expected integer|index must be a}} multiple of 8 in range [-512, 504] +// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 16 in range [-1024, 1008] // CHECK-ERROR-NEXT: stp q20, q25, [x5], #1024 // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-AARCH64-NEXT: error: {{expected integer|index must be a}} multiple of 16 in range [-1024, 1008] -// CHECK-ERROR-ARM64-NEXT: error: {{expected integer|index must be a}} multiple of 8 in range [-512, 504] +// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 16 in range [-1024, 1008] // CHECK-ERROR-NEXT: ldp q30, q15, [x23], #-1040 // CHECK-ERROR-NEXT: ^ diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s index 03c678f28f7b..10fdde460284 100644 --- a/test/MC/AArch64/neon-diagnostics.s +++ b/test/MC/AArch64/neon-diagnostics.s @@ -4080,8 +4080,7 @@ // CHECK-ARM64-ERROR: error: vector register expected // CHECK-ERROR: ld1 {v32.16b}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: invalid operand for instruction -// CHECK-ARM64-ERROR: error: register expected +// CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: ld1 {v15.8h}, [x32] // CHECK-ERROR: ^ @@ -4130,8 +4129,7 @@ // CHECK-ARM64-ERROR: error: registers must be sequential // CHECK-ERROR: ld2 {v0.8b, v2.8b}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: invalid operand for instruction -// CHECK-ARM64-ERROR: error: register expected +// CHECK-AARCH64: error: invalid operand for instruction // CHECK-ERROR: ld2 {v15.4h, v16.4h, v17.4h}, [x32] // CHECK-ERROR: ^ // CHECK-AARCH64-ERROR: error: expected the same vector layout @@ -4207,8 +4205,7 @@ // CHECK-ARM64-ERROR: error: vector register expected // CHECK-ERROR: st1 {v32.16b}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: invalid operand for instruction -// CHECK-ARM64-ERROR: error: register expected +// CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: st1 {v15.8h}, [x32] // CHECK-ERROR: ^ @@ -4434,8 +4431,7 @@ // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: st1 {v0.d}[16], [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: invalid operand for instruction -// CHECK-ARM64-ERROR: error: register expected +// CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: st2 {v31.s, v0.s}[3], [8] // CHECK-ERROR: ^ // CHECK-AARCH64-ERROR: error: expected lane number diff --git a/test/MC/ARM64/diags.s b/test/MC/ARM64/diags.s index edbdfe98c2ee..3ff2b54998f7 100644 --- a/test/MC/ARM64/diags.s +++ b/test/MC/ARM64/diags.s @@ -9,7 +9,7 @@ foo: ldr x3, [foo + 4] ; CHECK: ldr x3, foo+4 ; encoding: [0bAAA00011,A,A,0x58] ; CHECK: ; fixup A - offset: 0, value: foo+4, kind: fixup_arm64_ldr_pcrel_imm19 -; CHECK-ERRORS: error: register expected +; CHECK-ERRORS: error: invalid operand for instruction ; The last argument should be flagged as an error. rdar://9576009 ld4.8b {v0, v1, v2, v3}, [x0], #33 @@ -33,10 +33,10 @@ foo: ldur x0, [x1, #-257] -; CHECK-ERRORS: error: invalid offset in memory address. +; CHECK-ERRORS: error: index must be an integer in range [-256, 255]. ; CHECK-ERRORS: ldr x0, [x0, #804] ; CHECK-ERRORS: ^ -; CHECK-ERRORS: error: invalid offset in memory address. +; CHECK-ERRORS: error: index must be an integer in range [-256, 255]. ; CHECK-ERRORS: ldr w0, [x0, #802] ; CHECK-ERRORS: ^ ; CHECK-ERRORS: error: index must be an integer in range [-256, 255]. @@ -66,7 +66,7 @@ foo: ; CHECK-ERRORS: error: index must be a multiple of 8 in range [-512, 504]. ; CHECK-ERRORS: ldp x3, x4, [x5], #12 ; CHECK-ERRORS: ^ -; CHECK-ERRORS: error: index must be a multiple of 8 in range [-512, 504]. +; CHECK-ERRORS: error: index must be a multiple of 16 in range [-1024, 1008]. ; CHECK-ERRORS: ldp q3, q4, [x5], #12 ; CHECK-ERRORS: ^ ; CHECK-ERRORS: error: index must be an integer in range [-256, 255]. @@ -84,31 +84,31 @@ ldr s1, [x3, w3, sxtw #4] ldr d1, [x3, w3, sxtw #4] ldr q1, [x3, w3, sxtw #1] -; CHECK-ERRORS: error: invalid offset in memory address. +; CHECK-ERRORS: error: expected 'uxtw' or 'sxtw' with optional shift of #0 ; CHECK-ERRORS:ldrb w1, [x3, w3, sxtw #4] ; CHECK-ERRORS: ^ -; CHECK-ERRORS: error: invalid offset in memory address. +; CHECK-ERRORS: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #1 ; CHECK-ERRORS:ldrh w1, [x3, w3, sxtw #4] ; CHECK-ERRORS: ^ -; CHECK-ERRORS: error: invalid offset in memory address. +; CHECK-ERRORS: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #2 ; CHECK-ERRORS:ldr w1, [x3, w3, sxtw #4] ; CHECK-ERRORS: ^ -; CHECK-ERRORS: error: invalid offset in memory address. +; CHECK-ERRORS: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #3 ; CHECK-ERRORS:ldr x1, [x3, w3, sxtw #4] ; CHECK-ERRORS: ^ -; CHECK-ERRORS: error: invalid offset in memory address. +; CHECK-ERRORS: error: expected 'uxtw' or 'sxtw' with optional shift of #0 ; CHECK-ERRORS:ldr b1, [x3, w3, sxtw #4] ; CHECK-ERRORS: ^ -; CHECK-ERRORS: invalid offset in memory address. +; CHECK-ERRORS: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #1 ; CHECK-ERRORS:ldr h1, [x3, w3, sxtw #4] ; CHECK-ERRORS: ^ -; CHECK-ERRORS: invalid offset in memory address. +; CHECK-ERRORS: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #2 ; CHECK-ERRORS:ldr s1, [x3, w3, sxtw #4] ; CHECK-ERRORS: ^ -; CHECK-ERRORS: invalid offset in memory address. +; CHECK-ERRORS: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #3 ; CHECK-ERRORS:ldr d1, [x3, w3, sxtw #4] ; CHECK-ERRORS: ^ -; CHECK-ERRORS: invalid offset in memory address. +; CHECK-ERRORS: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #4 ; CHECK-ERRORS:ldr q1, [x3, w3, sxtw #1] ; CHECK-ERRORS: ^ @@ -118,10 +118,10 @@ ldr q1, [x3, w3, sxtw #1] str d1, [x3, w3, sxtx #3] ldr s1, [x3, d3, sxtx #2] -; CHECK-ERRORS: 32-bit general purpose offset register requires sxtw or uxtw extend +; CHECK-ERRORS: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #3 ; CHECK-ERRORS: str d1, [x3, w3, sxtx #3] ; CHECK-ERRORS: ^ -; CHECK-ERRORS: error: 64-bit general purpose offset register expected +; CHECK-ERRORS: error: index must be an integer in range [-256, 255]. ; CHECK-ERRORS: ldr s1, [x3, d3, sxtx #2] ; CHECK-ERRORS: ^ diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp index 1636fb423393..00bc9a5bbb9b 100644 --- a/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/utils/TableGen/CodeGenDAGPatterns.cpp @@ -1718,9 +1718,9 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { DagInit *MIOpInfo = OperandNode->getValueAsDag("MIOperandInfo"); if (unsigned NumArgs = MIOpInfo->getNumArgs()) { // But don't do that if the whole operand is being provided by - // a single ComplexPattern. - const ComplexPattern *AM = Child->getComplexPatternInfo(CDP); - if (!AM || AM->getNumOperands() < NumArgs) { + // a single ComplexPattern-related Operand. + + if (Child->getNumMIResults(CDP) < NumArgs) { // Match first sub-operand against the child we already have. Record *SubRec = cast(MIOpInfo->getArg(0))->getDef(); MadeChange |= From 369e565f23d34876f1689971b98255dd84131546 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 22 May 2014 11:56:20 +0000 Subject: [PATCH 073/906] ARM64: model pre/post-indexed operations properly. We should be keeping track of the writeback on these instructions, otherwise we're relying on LLVM's stupidity for correct code. Fortunately, the MC layer can now handle all required constraints, which means we can get rid of the CodeGen only PseudoInsts too. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209426 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM64/ARM64AsmPrinter.cpp | 93 ---------- lib/Target/ARM64/ARM64FrameLowering.cpp | 32 ++-- lib/Target/ARM64/ARM64ISelDAGToDAG.cpp | 48 +++--- lib/Target/ARM64/ARM64InstrFormats.td | 113 +++--------- lib/Target/ARM64/ARM64InstrInfo.cpp | 2 + lib/Target/ARM64/ARM64InstrInfo.td | 162 +++++++----------- lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp | 2 + lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp | 40 +++-- .../ARM64/Disassembler/ARM64Disassembler.cpp | 85 +++++++++ 9 files changed, 241 insertions(+), 336 deletions(-) diff --git a/lib/Target/ARM64/ARM64AsmPrinter.cpp b/lib/Target/ARM64/ARM64AsmPrinter.cpp index 5531101fe2f4..78f9ed12f56d 100644 --- a/lib/Target/ARM64/ARM64AsmPrinter.cpp +++ b/lib/Target/ARM64/ARM64AsmPrinter.cpp @@ -423,45 +423,6 @@ void ARM64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, // instructions) auto-generated. #include "ARM64GenMCPseudoLowering.inc" -static unsigned getRealIndexedOpcode(unsigned Opc) { - switch (Opc) { - case ARM64::LDRXpre_isel: return ARM64::LDRXpre; - case ARM64::LDRWpre_isel: return ARM64::LDRWpre; - case ARM64::LDRQpre_isel: return ARM64::LDRQpre; - case ARM64::LDRDpre_isel: return ARM64::LDRDpre; - case ARM64::LDRSpre_isel: return ARM64::LDRSpre; - case ARM64::LDRBBpre_isel: return ARM64::LDRBBpre; - case ARM64::LDRHHpre_isel: return ARM64::LDRHHpre; - case ARM64::LDRSBWpre_isel: return ARM64::LDRSBWpre; - case ARM64::LDRSBXpre_isel: return ARM64::LDRSBXpre; - case ARM64::LDRSHWpre_isel: return ARM64::LDRSHWpre; - case ARM64::LDRSHXpre_isel: return ARM64::LDRSHXpre; - case ARM64::LDRSWpre_isel: return ARM64::LDRSWpre; - - case ARM64::LDRQpost_isel: return ARM64::LDRQpost; - case ARM64::LDRDpost_isel: return ARM64::LDRDpost; - case ARM64::LDRSpost_isel: return ARM64::LDRSpost; - case ARM64::LDRXpost_isel: return ARM64::LDRXpost; - case ARM64::LDRWpost_isel: return ARM64::LDRWpost; - case ARM64::LDRHHpost_isel: return ARM64::LDRHHpost; - case ARM64::LDRBBpost_isel: return ARM64::LDRBBpost; - case ARM64::LDRSWpost_isel: return ARM64::LDRSWpost; - case ARM64::LDRSHWpost_isel: return ARM64::LDRSHWpost; - case ARM64::LDRSHXpost_isel: return ARM64::LDRSHXpost; - case ARM64::LDRSBWpost_isel: return ARM64::LDRSBWpost; - case ARM64::LDRSBXpost_isel: return ARM64::LDRSBXpost; - - case ARM64::STRXpre_isel: return ARM64::STRXpre; - case ARM64::STRWpre_isel: return ARM64::STRWpre; - case ARM64::STRHHpre_isel: return ARM64::STRHHpre; - case ARM64::STRBBpre_isel: return ARM64::STRBBpre; - case ARM64::STRQpre_isel: return ARM64::STRQpre; - case ARM64::STRDpre_isel: return ARM64::STRDpre; - case ARM64::STRSpre_isel: return ARM64::STRSpre; - } - llvm_unreachable("Unexpected pre-indexed opcode!"); -} - void ARM64AsmPrinter::EmitInstruction(const MachineInstr *MI) { // Do any auto-generated pseudo lowerings. if (emitPseudoExpansionLowering(OutStreamer, MI)) @@ -488,60 +449,6 @@ void ARM64AsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } - // Indexed loads and stores use a pseudo to handle complex operand - // tricks and writeback to the base register. We strip off the writeback - // operand and switch the opcode here. Post-indexed stores were handled by the - // tablegen'erated pseudos above. (The complex operand <--> simple - // operand isel is beyond tablegen's ability, so we do these manually). - case ARM64::LDRHHpre_isel: - case ARM64::LDRBBpre_isel: - case ARM64::LDRXpre_isel: - case ARM64::LDRWpre_isel: - case ARM64::LDRQpre_isel: - case ARM64::LDRDpre_isel: - case ARM64::LDRSpre_isel: - case ARM64::LDRSBWpre_isel: - case ARM64::LDRSBXpre_isel: - case ARM64::LDRSHWpre_isel: - case ARM64::LDRSHXpre_isel: - case ARM64::LDRSWpre_isel: - case ARM64::LDRQpost_isel: - case ARM64::LDRDpost_isel: - case ARM64::LDRSpost_isel: - case ARM64::LDRXpost_isel: - case ARM64::LDRWpost_isel: - case ARM64::LDRHHpost_isel: - case ARM64::LDRBBpost_isel: - case ARM64::LDRSWpost_isel: - case ARM64::LDRSHWpost_isel: - case ARM64::LDRSHXpost_isel: - case ARM64::LDRSBWpost_isel: - case ARM64::LDRSBXpost_isel: { - MCInst TmpInst; - // For loads, the writeback operand to be skipped is the second. - TmpInst.setOpcode(getRealIndexedOpcode(MI->getOpcode())); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(2).getReg())); - TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm())); - EmitToStreamer(OutStreamer, TmpInst); - return; - } - case ARM64::STRXpre_isel: - case ARM64::STRWpre_isel: - case ARM64::STRHHpre_isel: - case ARM64::STRBBpre_isel: - case ARM64::STRQpre_isel: - case ARM64::STRDpre_isel: - case ARM64::STRSpre_isel: { - MCInst TmpInst; - // For loads, the writeback operand to be skipped is the first. - TmpInst.setOpcode(getRealIndexedOpcode(MI->getOpcode())); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg())); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(2).getReg())); - TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm())); - EmitToStreamer(OutStreamer, TmpInst); - return; - } // Tail calls use pseudo instructions so they have the proper code-gen // attributes (isCall, isReturn, etc.). We lower them to the real diff --git a/lib/Target/ARM64/ARM64FrameLowering.cpp b/lib/Target/ARM64/ARM64FrameLowering.cpp index 3b14649c487b..9c17488ec588 100644 --- a/lib/Target/ARM64/ARM64FrameLowering.cpp +++ b/lib/Target/ARM64/ARM64FrameLowering.cpp @@ -246,14 +246,14 @@ void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const { // that is a multiple of -2. assert((MBBI->getOpcode() == ARM64::STPXpre || MBBI->getOpcode() == ARM64::STPDpre) && - MBBI->getOperand(2).getReg() == ARM64::SP && - MBBI->getOperand(3).getImm() < 0 && - (MBBI->getOperand(3).getImm() & 1) == 0); + MBBI->getOperand(3).getReg() == ARM64::SP && + MBBI->getOperand(4).getImm() < 0 && + (MBBI->getOperand(4).getImm() & 1) == 0); // Frame pointer is fp = sp - 16. Since the STPXpre subtracts the space // required for the callee saved register area we get the frame pointer // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8. - FPOffset = -(MBBI->getOperand(3).getImm() + 2) * 8; + FPOffset = -(MBBI->getOperand(4).getImm() + 2) * 8; assert(FPOffset >= 0 && "Bad Framepointer Offset"); } @@ -409,12 +409,16 @@ static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs) { } static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) { + unsigned RtIdx = 0; + if (MI->getOpcode() == ARM64::LDPXpost || MI->getOpcode() == ARM64::LDPDpost) + RtIdx = 1; + if (MI->getOpcode() == ARM64::LDPXpost || MI->getOpcode() == ARM64::LDPDpost || MI->getOpcode() == ARM64::LDPXi || MI->getOpcode() == ARM64::LDPDi) { - if (!isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) || - !isCalleeSavedRegister(MI->getOperand(1).getReg(), CSRegs) || - MI->getOperand(2).getReg() != ARM64::SP) + if (!isCalleeSavedRegister(MI->getOperand(RtIdx).getReg(), CSRegs) || + !isCalleeSavedRegister(MI->getOperand(RtIdx + 1).getReg(), CSRegs) || + MI->getOperand(RtIdx + 2).getReg() != ARM64::SP) return false; return true; } @@ -667,8 +671,11 @@ bool ARM64FrameLowering::spillCalleeSavedRegisters( const int Offset = (i == 0) ? -Count : i; assert((Offset >= -64 && Offset <= 63) && "Offset out of bounds for STP immediate"); - BuildMI(MBB, MI, DL, TII.get(StrOpc)) - .addReg(Reg2, getPrologueDeath(MF, Reg2)) + MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); + if (StrOpc == ARM64::STPDpre || StrOpc == ARM64::STPXpre) + MIB.addReg(ARM64::SP, RegState::Define); + + MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)) .addReg(Reg1, getPrologueDeath(MF, Reg1)) .addReg(ARM64::SP) .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit @@ -734,8 +741,11 @@ bool ARM64FrameLowering::restoreCalleeSavedRegisters( const int Offset = (i == Count - 2) ? Count : Count - i - 2; assert((Offset >= -64 && Offset <= 63) && "Offset out of bounds for LDP immediate"); - BuildMI(MBB, MI, DL, TII.get(LdrOpc)) - .addReg(Reg2, getDefRegState(true)) + MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc)); + if (LdrOpc == ARM64::LDPXpost || LdrOpc == ARM64::LDPDpost) + MIB.addReg(ARM64::SP, RegState::Define); + + MIB.addReg(Reg2, getDefRegState(true)) .addReg(Reg1, getDefRegState(true)) .addReg(ARM64::SP) .addImm(Offset); // [sp], #offset * 8 or [sp, #offset * 8] diff --git a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp index 8fec6f02b768..9b235db30a32 100644 --- a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp +++ b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp @@ -901,14 +901,14 @@ SDNode *ARM64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) { ISD::LoadExtType ExtType = LD->getExtensionType(); bool InsertTo64 = false; if (VT == MVT::i64) - Opcode = IsPre ? ARM64::LDRXpre_isel : ARM64::LDRXpost_isel; + Opcode = IsPre ? ARM64::LDRXpre : ARM64::LDRXpost; else if (VT == MVT::i32) { if (ExtType == ISD::NON_EXTLOAD) - Opcode = IsPre ? ARM64::LDRWpre_isel : ARM64::LDRWpost_isel; + Opcode = IsPre ? ARM64::LDRWpre : ARM64::LDRWpost; else if (ExtType == ISD::SEXTLOAD) - Opcode = IsPre ? ARM64::LDRSWpre_isel : ARM64::LDRSWpost_isel; + Opcode = IsPre ? ARM64::LDRSWpre : ARM64::LDRSWpost; else { - Opcode = IsPre ? ARM64::LDRWpre_isel : ARM64::LDRWpost_isel; + Opcode = IsPre ? ARM64::LDRWpre : ARM64::LDRWpost; InsertTo64 = true; // The result of the load is only i32. It's the subreg_to_reg that makes // it into an i64. @@ -917,11 +917,11 @@ SDNode *ARM64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) { } else if (VT == MVT::i16) { if (ExtType == ISD::SEXTLOAD) { if (DstVT == MVT::i64) - Opcode = IsPre ? ARM64::LDRSHXpre_isel : ARM64::LDRSHXpost_isel; + Opcode = IsPre ? ARM64::LDRSHXpre : ARM64::LDRSHXpost; else - Opcode = IsPre ? ARM64::LDRSHWpre_isel : ARM64::LDRSHWpost_isel; + Opcode = IsPre ? ARM64::LDRSHWpre : ARM64::LDRSHWpost; } else { - Opcode = IsPre ? ARM64::LDRHHpre_isel : ARM64::LDRHHpost_isel; + Opcode = IsPre ? ARM64::LDRHHpre : ARM64::LDRHHpost; InsertTo64 = DstVT == MVT::i64; // The result of the load is only i32. It's the subreg_to_reg that makes // it into an i64. @@ -930,22 +930,22 @@ SDNode *ARM64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) { } else if (VT == MVT::i8) { if (ExtType == ISD::SEXTLOAD) { if (DstVT == MVT::i64) - Opcode = IsPre ? ARM64::LDRSBXpre_isel : ARM64::LDRSBXpost_isel; + Opcode = IsPre ? ARM64::LDRSBXpre : ARM64::LDRSBXpost; else - Opcode = IsPre ? ARM64::LDRSBWpre_isel : ARM64::LDRSBWpost_isel; + Opcode = IsPre ? ARM64::LDRSBWpre : ARM64::LDRSBWpost; } else { - Opcode = IsPre ? ARM64::LDRBBpre_isel : ARM64::LDRBBpost_isel; + Opcode = IsPre ? ARM64::LDRBBpre : ARM64::LDRBBpost; InsertTo64 = DstVT == MVT::i64; // The result of the load is only i32. It's the subreg_to_reg that makes // it into an i64. DstVT = MVT::i32; } } else if (VT == MVT::f32) { - Opcode = IsPre ? ARM64::LDRSpre_isel : ARM64::LDRSpost_isel; + Opcode = IsPre ? ARM64::LDRSpre : ARM64::LDRSpost; } else if (VT == MVT::f64 || VT.is64BitVector()) { - Opcode = IsPre ? ARM64::LDRDpre_isel : ARM64::LDRDpost_isel; + Opcode = IsPre ? ARM64::LDRDpre : ARM64::LDRDpost; } else if (VT.is128BitVector()) { - Opcode = IsPre ? ARM64::LDRQpre_isel : ARM64::LDRQpost_isel; + Opcode = IsPre ? ARM64::LDRQpre : ARM64::LDRQpost; } else return nullptr; SDValue Chain = LD->getChain(); @@ -954,21 +954,25 @@ SDNode *ARM64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) { int OffsetVal = (int)OffsetOp->getZExtValue(); SDValue Offset = CurDAG->getTargetConstant(OffsetVal, MVT::i64); SDValue Ops[] = { Base, Offset, Chain }; - SDNode *Res = CurDAG->getMachineNode(Opcode, SDLoc(N), DstVT, MVT::i64, + SDNode *Res = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i64, DstVT, MVT::Other, Ops); // Either way, we're replacing the node, so tell the caller that. Done = true; + SDValue LoadedVal = SDValue(Res, 1); if (InsertTo64) { SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32); - SDNode *Sub = CurDAG->getMachineNode( - ARM64::SUBREG_TO_REG, SDLoc(N), MVT::i64, - CurDAG->getTargetConstant(0, MVT::i64), SDValue(Res, 0), SubReg); - ReplaceUses(SDValue(N, 0), SDValue(Sub, 0)); - ReplaceUses(SDValue(N, 1), SDValue(Res, 1)); - ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); - return nullptr; + LoadedVal = + SDValue(CurDAG->getMachineNode(ARM64::SUBREG_TO_REG, SDLoc(N), MVT::i64, + CurDAG->getTargetConstant(0, MVT::i64), + LoadedVal, SubReg), + 0); } - return Res; + + ReplaceUses(SDValue(N, 0), LoadedVal); + ReplaceUses(SDValue(N, 1), SDValue(Res, 0)); + ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); + + return nullptr; } SDNode *ARM64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, diff --git a/lib/Target/ARM64/ARM64InstrFormats.td b/lib/Target/ARM64/ARM64InstrFormats.td index bf9fa2992b71..ea45b3d4fb22 100644 --- a/lib/Target/ARM64/ARM64InstrFormats.td +++ b/lib/Target/ARM64/ARM64InstrFormats.td @@ -2918,8 +2918,8 @@ multiclass StoreUnprivileged sz, bit V, bits<2> opc, //--- class BaseLoadStorePreIdx sz, bit V, bits<2> opc, dag oops, dag iops, - string asm, string cstr> - : I { + string asm, string cstr, list pat> + : I { bits<5> Rt; bits<5> Rn; bits<9> offset; @@ -2939,74 +2939,34 @@ class BaseLoadStorePreIdx sz, bit V, bits<2> opc, dag oops, dag iops, let hasSideEffects = 0 in { let mayStore = 0, mayLoad = 1 in -// FIXME: Modeling the write-back of these instructions for isel used -// to be tricky. we need the complex addressing mode for the memory -// reference, but we also need the write-back specified as a tied -// operand to the base register. It should work now, but needs to be -// done as a separate patch. This would allow us to be rid of the -// codegenonly pseudoinstructions below too. class LoadPreIdx sz, bit V, bits<2> opc, RegisterClass regtype, string asm> : BaseLoadStorePreIdx, + "$Rn = $wback", []>, Sched<[WriteLD, WriteAdr]>; let mayStore = 1, mayLoad = 0 in class StorePreIdx sz, bit V, bits<2> opc, RegisterClass regtype, - string asm> + string asm, SDPatternOperator storeop, ValueType Ty> : BaseLoadStorePreIdx, + asm, "$Rn = $wback", + [(set GPR64sp:$wback, + (storeop (Ty regtype:$Rt), GPR64sp:$Rn, simm9:$offset))]>, Sched<[WriteAdr, WriteST]>; } // hasSideEffects = 0 -// ISel pseudo-instructions which have the tied operands. When the MC lowering -// logic finally gets smart enough to strip off tied operands that are just -// for isel convenience, we can get rid of these pseudos and just reference -// the real instructions directly. -// -// Ironically, also because of the writeback operands, we can't put the -// matcher pattern directly on the instruction, but need to define it -// separately. -// -// Loads aren't matched with patterns here at all, but rather in C++ -// custom lowering. -let mayStore = 0, mayLoad = 1, hasSideEffects = 0 in { -class LoadPreIdxPseudo - : Pseudo<(outs regtype:$Rt, GPR64sp:$wback), - (ins GPR64sp:$addr, simm9:$offset), [], - "$addr = $wback,@earlyclobber $wback">, - Sched<[WriteLD, WriteAdr]>; -class LoadPostIdxPseudo - : Pseudo<(outs regtype:$Rt, GPR64sp:$wback), - (ins GPR64sp:$addr, simm9:$offset), [], - "$addr = $wback,@earlyclobber $wback">, - Sched<[WriteLD, WriteI]>; -} -multiclass StorePreIdxPseudo { - let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in - def _isel: Pseudo<(outs GPR64sp:$wback), - (ins regtype:$Rt, GPR64sp:$addr, simm9:$offset), [], - "$addr = $wback,@earlyclobber $wback">, - Sched<[WriteAdr, WriteST]>; - - def : Pat<(OpNode (Ty regtype:$Rt), GPR64sp:$addr, simm9:$offset), - (!cast(NAME#_isel) regtype:$Rt, GPR64sp:$addr, - simm9:$offset)>; -} - //--- // Load/store post-indexed //--- // (pre-index) load/stores. class BaseLoadStorePostIdx sz, bit V, bits<2> opc, dag oops, dag iops, - string asm, string cstr> - : I { + string asm, string cstr, list pat> + : I { bits<5> Rt; bits<5> Rn; bits<9> offset; @@ -3026,51 +2986,26 @@ class BaseLoadStorePostIdx sz, bit V, bits<2> opc, dag oops, dag iops, let hasSideEffects = 0 in { let mayStore = 0, mayLoad = 1 in -// FIXME: Modeling the write-back of these instructions for isel used -// to be tricky. we need the complex addressing mode for the memory -// reference, but we also need the write-back specified as a tied -// operand to the base register. It should work now, but needs to be -// done as a separate patch. This would allow us to be rid of the -// codegenonly pseudoinstructions below too. class LoadPostIdx sz, bit V, bits<2> opc, RegisterClass regtype, string asm> : BaseLoadStorePostIdx, + asm, "$Rn = $wback", []>, Sched<[WriteLD, WriteI]>; let mayStore = 1, mayLoad = 0 in class StorePostIdx sz, bit V, bits<2> opc, RegisterClass regtype, - string asm> + string asm, SDPatternOperator storeop, ValueType Ty> : BaseLoadStorePostIdx, + asm, "$Rn = $wback", + [(set GPR64sp:$wback, + (storeop (Ty regtype:$Rt), GPR64sp:$Rn, simm9:$offset))]>, Sched<[WriteAdr, WriteST, ReadAdrBase]>; } // hasSideEffects = 0 -// ISel pseudo-instructions which have the tied operands. When the MC lowering -// logic finally gets smart enough to strip off tied operands that are just -// for isel convenience, we can get rid of these pseudos and just reference -// the real instructions directly. -// -// Ironically, also because of the writeback operands, we can't put the -// matcher pattern directly on the instruction, but need to define it -// separately. -multiclass StorePostIdxPseudo { - let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in - def _isel: Pseudo<(outs GPR64sp:$wback), - (ins regtype:$Rt, GPR64sp:$Rn, simm9:$idx), [], - "$Rn = $wback,@earlyclobber $wback">, - PseudoInstExpansion<(Insn regtype:$Rt, GPR64sp:$Rn, simm9:$idx)>, - Sched<[WriteAdr, WriteST, ReadAdrBase]>; - - def : Pat<(OpNode (Ty regtype:$Rt), GPR64sp:$Rn, simm9:$idx), - (!cast(NAME#_isel) regtype:$Rt, GPR64sp:$Rn, - simm9:$idx)>; -} //--- // Load/store pair @@ -3129,7 +3064,7 @@ multiclass StorePairOffset opc, bit V, RegisterClass regtype, // (pre-indexed) class BaseLoadStorePairPreIdx opc, bit V, bit L, dag oops, dag iops, string asm> - : I { + : I { bits<5> Rt; bits<5> Rt2; bits<5> Rn; @@ -3152,14 +3087,14 @@ let mayStore = 0, mayLoad = 1 in class LoadPairPreIdx opc, bit V, RegisterClass regtype, Operand indextype, string asm> : BaseLoadStorePairPreIdx, Sched<[WriteLD, WriteLDHi, WriteAdr]>; let mayStore = 1, mayLoad = 0 in class StorePairPreIdx opc, bit V, RegisterClass regtype, Operand indextype, string asm> - : BaseLoadStorePairPreIdx, @@ -3170,7 +3105,7 @@ class StorePairPreIdx opc, bit V, RegisterClass regtype, class BaseLoadStorePairPostIdx opc, bit V, bit L, dag oops, dag iops, string asm> - : I { + : I { bits<5> Rt; bits<5> Rt2; bits<5> Rn; @@ -3193,7 +3128,7 @@ let mayStore = 0, mayLoad = 1 in class LoadPairPostIdx opc, bit V, RegisterClass regtype, Operand idxtype, string asm> : BaseLoadStorePairPostIdx, Sched<[WriteLD, WriteLDHi, WriteAdr]>; @@ -3201,7 +3136,7 @@ let mayStore = 1, mayLoad = 0 in class StorePairPostIdx opc, bit V, RegisterClass regtype, Operand idxtype, string asm> : BaseLoadStorePairPostIdx, Sched<[WriteAdr, WriteSTP]>; diff --git a/lib/Target/ARM64/ARM64InstrInfo.cpp b/lib/Target/ARM64/ARM64InstrInfo.cpp index e1f9667841c8..fbbddd566606 100644 --- a/lib/Target/ARM64/ARM64InstrInfo.cpp +++ b/lib/Target/ARM64/ARM64InstrInfo.cpp @@ -1389,10 +1389,12 @@ void ARM64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, SrcReg, getKillRegState(KillSrc)); } else { BuildMI(MBB, I, DL, get(ARM64::STRQpre)) + .addReg(ARM64::SP, RegState::Define) .addReg(SrcReg, getKillRegState(KillSrc)) .addReg(ARM64::SP) .addImm(-16); BuildMI(MBB, I, DL, get(ARM64::LDRQpre)) + .addReg(ARM64::SP, RegState::Define) .addReg(DestReg, RegState::Define) .addReg(ARM64::SP) .addImm(16); diff --git a/lib/Target/ARM64/ARM64InstrInfo.td b/lib/Target/ARM64/ARM64InstrInfo.td index 94a39c111224..9c39d72a8e7b 100644 --- a/lib/Target/ARM64/ARM64InstrInfo.td +++ b/lib/Target/ARM64/ARM64InstrInfo.td @@ -1699,21 +1699,6 @@ def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32, "ldrh">; // load sign-extended word def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64, "ldrsw">; -// ISel pseudos and patterns. See expanded comment on LoadPreIdxPseudo. -def LDRQpre_isel : LoadPreIdxPseudo; -def LDRDpre_isel : LoadPreIdxPseudo; -def LDRSpre_isel : LoadPreIdxPseudo; -def LDRXpre_isel : LoadPreIdxPseudo; -def LDRWpre_isel : LoadPreIdxPseudo; -def LDRHHpre_isel : LoadPreIdxPseudo; -def LDRBBpre_isel : LoadPreIdxPseudo; - -def LDRSWpre_isel : LoadPreIdxPseudo; -def LDRSHWpre_isel : LoadPreIdxPseudo; -def LDRSHXpre_isel : LoadPreIdxPseudo; -def LDRSBWpre_isel : LoadPreIdxPseudo; -def LDRSBXpre_isel : LoadPreIdxPseudo; - //--- // (immediate post-indexed) def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32, "ldr">; @@ -1739,21 +1724,6 @@ def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32, "ldrh">; // load sign-extended word def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64, "ldrsw">; -// ISel pseudos and patterns. See expanded comment on LoadPostIdxPseudo. -def LDRQpost_isel : LoadPostIdxPseudo; -def LDRDpost_isel : LoadPostIdxPseudo; -def LDRSpost_isel : LoadPostIdxPseudo; -def LDRXpost_isel : LoadPostIdxPseudo; -def LDRWpost_isel : LoadPostIdxPseudo; -def LDRHHpost_isel : LoadPostIdxPseudo; -def LDRBBpost_isel : LoadPostIdxPseudo; - -def LDRSWpost_isel : LoadPostIdxPseudo; -def LDRSHWpost_isel : LoadPostIdxPseudo; -def LDRSHXpost_isel : LoadPostIdxPseudo; -def LDRSBWpost_isel : LoadPostIdxPseudo; -def LDRSBXpost_isel : LoadPostIdxPseudo; - //===----------------------------------------------------------------------===// // Store instructions. //===----------------------------------------------------------------------===// @@ -2072,119 +2042,103 @@ defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">; //--- // (immediate pre-indexed) -def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32, "str">; -def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64, "str">; -def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8, "str">; -def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16, "str">; -def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32, "str">; -def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64, "str">; -def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128, "str">; - -def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32, "strb">; -def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32, "strh">; - -// ISel pseudos and patterns. See expanded comment on StorePreIdxPseudo. -defm STRQpre : StorePreIdxPseudo; -defm STRDpre : StorePreIdxPseudo; -defm STRSpre : StorePreIdxPseudo; -defm STRXpre : StorePreIdxPseudo; -defm STRWpre : StorePreIdxPseudo; -defm STRHHpre : StorePreIdxPseudo; -defm STRBBpre : StorePreIdxPseudo; +def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32, "str", pre_store, i32>; +def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64, "str", pre_store, i64>; +def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8, "str", pre_store, untyped>; +def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16, "str", pre_store, f16>; +def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32, "str", pre_store, f32>; +def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64, "str", pre_store, f64>; +def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128, "str", pre_store, f128>; + +def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32, "strb", pre_truncsti8, i32>; +def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32, "strh", pre_truncsti16, i32>; + // truncstore i64 def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), - (STRWpre_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, - simm9:$off)>; + (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, + simm9:$off)>; def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), - (STRHHpre_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, - simm9:$off)>; + (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, + simm9:$off)>; def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), - (STRBBpre_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, - simm9:$off)>; + (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, + simm9:$off)>; def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpre_isel FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpre_isel FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpre_isel FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpre_isel FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpre_isel FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpre_isel FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpre_isel FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpre_isel FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpre_isel FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpre_isel FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpre_isel FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpre_isel FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; //--- // (immediate post-indexed) -def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32, "str">; -def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64, "str">; -def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8, "str">; -def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16, "str">; -def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32, "str">; -def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64, "str">; -def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128, "str">; - -def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32, "strb">; -def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32, "strh">; - -// ISel pseudos and patterns. See expanded comment on StorePostIdxPseudo. -defm STRQpost : StorePostIdxPseudo; -defm STRDpost : StorePostIdxPseudo; -defm STRSpost : StorePostIdxPseudo; -defm STRXpost : StorePostIdxPseudo; -defm STRWpost : StorePostIdxPseudo; -defm STRHHpost : StorePostIdxPseudo; -defm STRBBpost : StorePostIdxPseudo; +def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32, "str", post_store, i32>; +def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64, "str", post_store, i64>; +def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8, "str", post_store, untyped>; +def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16, "str", post_store, f16>; +def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32, "str", post_store, f32>; +def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64, "str", post_store, f64>; +def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128, "str", post_store, f128>; + +def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32, "strb", post_truncsti8, i32>; +def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32, "strh", post_truncsti16, i32>; + // truncstore i64 def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), - (STRWpost_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, - simm9:$off)>; + (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, + simm9:$off)>; def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), - (STRHHpost_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, - simm9:$off)>; + (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, + simm9:$off)>; def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), - (STRBBpost_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, - simm9:$off)>; + (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, + simm9:$off)>; def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost_isel FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost_isel FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost_isel FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost_isel FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost_isel FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost_isel FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost_isel FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost_isel FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost_isel FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost_isel FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost_isel FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost_isel FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; //===----------------------------------------------------------------------===// // Load/store exclusive instructions. diff --git a/lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp b/lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp index 9a8e1c3d91c0..e2c4b13f0369 100644 --- a/lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp +++ b/lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp @@ -528,6 +528,7 @@ ARM64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I, unsigned NewOpc = getPreIndexedOpcode(I->getOpcode()); MachineInstrBuilder MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) + .addOperand(Update->getOperand(0)) .addOperand(I->getOperand(0)) .addOperand(I->getOperand(1)) .addImm(Value); @@ -571,6 +572,7 @@ ARM64LoadStoreOpt::mergePostIdxUpdateInsn(MachineBasicBlock::iterator I, unsigned NewOpc = getPostIndexedOpcode(I->getOpcode()); MachineInstrBuilder MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) + .addOperand(Update->getOperand(0)) .addOperand(I->getOperand(0)) .addOperand(I->getOperand(1)) .addImm(Value); diff --git a/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp b/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp index 982690fe86c0..0c422c5cece8 100644 --- a/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp +++ b/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp @@ -3146,9 +3146,9 @@ bool ARM64AsmParser::validateInstruction(MCInst &Inst, case ARM64::LDPWpre: case ARM64::LDPXpost: case ARM64::LDPXpre: { - unsigned Rt = Inst.getOperand(0).getReg(); - unsigned Rt2 = Inst.getOperand(1).getReg(); - unsigned Rn = Inst.getOperand(2).getReg(); + unsigned Rt = Inst.getOperand(1).getReg(); + unsigned Rt2 = Inst.getOperand(2).getReg(); + unsigned Rn = Inst.getOperand(3).getReg(); if (RI->isSubRegisterEq(Rn, Rt)) return Error(Loc[0], "unpredictable LDP instruction, writeback base " "is also a destination"); @@ -3157,13 +3157,6 @@ bool ARM64AsmParser::validateInstruction(MCInst &Inst, "is also a destination"); // FALLTHROUGH } - case ARM64::LDPDpost: - case ARM64::LDPDpre: - case ARM64::LDPQpost: - case ARM64::LDPQpre: - case ARM64::LDPSpost: - case ARM64::LDPSpre: - case ARM64::LDPSWpost: case ARM64::LDPDi: case ARM64::LDPQi: case ARM64::LDPSi: @@ -3176,6 +3169,19 @@ bool ARM64AsmParser::validateInstruction(MCInst &Inst, return Error(Loc[1], "unpredictable LDP instruction, Rt2==Rt"); break; } + case ARM64::LDPDpost: + case ARM64::LDPDpre: + case ARM64::LDPQpost: + case ARM64::LDPQpre: + case ARM64::LDPSpost: + case ARM64::LDPSpre: + case ARM64::LDPSWpost: { + unsigned Rt = Inst.getOperand(1).getReg(); + unsigned Rt2 = Inst.getOperand(2).getReg(); + if (Rt == Rt2) + return Error(Loc[1], "unpredictable LDP instruction, Rt2==Rt"); + break; + } case ARM64::STPDpost: case ARM64::STPDpre: case ARM64::STPQpost: @@ -3186,9 +3192,9 @@ bool ARM64AsmParser::validateInstruction(MCInst &Inst, case ARM64::STPWpre: case ARM64::STPXpost: case ARM64::STPXpre: { - unsigned Rt = Inst.getOperand(0).getReg(); - unsigned Rt2 = Inst.getOperand(1).getReg(); - unsigned Rn = Inst.getOperand(2).getReg(); + unsigned Rt = Inst.getOperand(1).getReg(); + unsigned Rt2 = Inst.getOperand(2).getReg(); + unsigned Rn = Inst.getOperand(3).getReg(); if (RI->isSubRegisterEq(Rn, Rt)) return Error(Loc[0], "unpredictable STP instruction, writeback base " "is also a source"); @@ -3219,8 +3225,8 @@ bool ARM64AsmParser::validateInstruction(MCInst &Inst, case ARM64::LDRSWpost: case ARM64::LDRWpost: case ARM64::LDRXpost: { - unsigned Rt = Inst.getOperand(0).getReg(); - unsigned Rn = Inst.getOperand(1).getReg(); + unsigned Rt = Inst.getOperand(1).getReg(); + unsigned Rn = Inst.getOperand(2).getReg(); if (RI->isSubRegisterEq(Rn, Rt)) return Error(Loc[0], "unpredictable LDR instruction, writeback base " "is also a source"); @@ -3238,8 +3244,8 @@ bool ARM64AsmParser::validateInstruction(MCInst &Inst, case ARM64::STRHpre: case ARM64::STRWpre: case ARM64::STRXpre: { - unsigned Rt = Inst.getOperand(0).getReg(); - unsigned Rn = Inst.getOperand(1).getReg(); + unsigned Rt = Inst.getOperand(1).getReg(); + unsigned Rn = Inst.getOperand(2).getReg(); if (RI->isSubRegisterEq(Rn, Rt)) return Error(Loc[0], "unpredictable STR instruction, writeback base " "is also a source"); diff --git a/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp b/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp index 20bcb366bf5e..4fa9339d2b7a 100644 --- a/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp +++ b/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp @@ -902,6 +902,60 @@ static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst, if (offset & (1 << (9 - 1))) offset |= ~((1LL << 9) - 1); + // First operand is always the writeback to the address register, if needed. + switch (Inst.getOpcode()) { + default: + break; + case ARM64::LDRSBWpre: + case ARM64::LDRSHWpre: + case ARM64::STRBBpre: + case ARM64::LDRBBpre: + case ARM64::STRHHpre: + case ARM64::LDRHHpre: + case ARM64::STRWpre: + case ARM64::LDRWpre: + case ARM64::LDRSBWpost: + case ARM64::LDRSHWpost: + case ARM64::STRBBpost: + case ARM64::LDRBBpost: + case ARM64::STRHHpost: + case ARM64::LDRHHpost: + case ARM64::STRWpost: + case ARM64::LDRWpost: + case ARM64::LDRSBXpre: + case ARM64::LDRSHXpre: + case ARM64::STRXpre: + case ARM64::LDRSWpre: + case ARM64::LDRXpre: + case ARM64::LDRSBXpost: + case ARM64::LDRSHXpost: + case ARM64::STRXpost: + case ARM64::LDRSWpost: + case ARM64::LDRXpost: + case ARM64::LDRQpre: + case ARM64::STRQpre: + case ARM64::LDRQpost: + case ARM64::STRQpost: + case ARM64::LDRDpre: + case ARM64::STRDpre: + case ARM64::LDRDpost: + case ARM64::STRDpost: + case ARM64::LDRSpre: + case ARM64::STRSpre: + case ARM64::LDRSpost: + case ARM64::STRSpost: + case ARM64::LDRHpre: + case ARM64::STRHpre: + case ARM64::LDRHpost: + case ARM64::STRHpost: + case ARM64::LDRBpre: + case ARM64::STRBpre: + case ARM64::LDRBpost: + case ARM64::STRBpost: + DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); + break; + } + switch (Inst.getOpcode()) { default: return Fail; @@ -1112,6 +1166,37 @@ static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn, unsigned Opcode = Inst.getOpcode(); bool NeedsDisjointWritebackTransfer = false; + + // First operand is always writeback of base register. + switch (Opcode) { + default: + break; + case ARM64::LDPXpost: + case ARM64::STPXpost: + case ARM64::LDPSWpost: + case ARM64::LDPXpre: + case ARM64::STPXpre: + case ARM64::LDPSWpre: + case ARM64::LDPWpost: + case ARM64::STPWpost: + case ARM64::LDPWpre: + case ARM64::STPWpre: + case ARM64::LDPQpost: + case ARM64::STPQpost: + case ARM64::LDPQpre: + case ARM64::STPQpre: + case ARM64::LDPDpost: + case ARM64::STPDpost: + case ARM64::LDPDpre: + case ARM64::STPDpre: + case ARM64::LDPSpost: + case ARM64::STPSpost: + case ARM64::LDPSpre: + case ARM64::STPSpre: + DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); + break; + } + switch (Opcode) { default: return Fail; From 213f915ffe05a07ec8d3fb502cb7bd93f756724e Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 22 May 2014 12:14:02 +0000 Subject: [PATCH 074/906] Yes they do git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209429 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM64/ARM64InstrInfo.td | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/Target/ARM64/ARM64InstrInfo.td b/lib/Target/ARM64/ARM64InstrInfo.td index 9c39d72a8e7b..81c04c347734 100644 --- a/lib/Target/ARM64/ARM64InstrInfo.td +++ b/lib/Target/ARM64/ARM64InstrInfo.td @@ -1585,7 +1585,6 @@ def simm9_offset_fb128 : Operand { let ParserMatchClass = SImm9OffsetFB128Operand; } -// FIXME: these don't work def : InstAlias<"ldr $Rt, [$Rn, $offset]", (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; def : InstAlias<"ldr $Rt, [$Rn, $offset]", From 65ea1ad2086954740678842d1b877f817003f727 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 22 May 2014 12:14:49 +0000 Subject: [PATCH 075/906] ARM64: these work too git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209430 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM64/ARM64InstrInfo.td | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/Target/ARM64/ARM64InstrInfo.td b/lib/Target/ARM64/ARM64InstrInfo.td index 81c04c347734..e68980c83c54 100644 --- a/lib/Target/ARM64/ARM64InstrInfo.td +++ b/lib/Target/ARM64/ARM64InstrInfo.td @@ -1633,7 +1633,6 @@ defm LDURSW (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; // zero and sign extending aliases from generic LDR* mnemonics to LDUR*. -// FIXME: these don't work now def : InstAlias<"ldrb $Rt, [$Rn, $offset]", (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; def : InstAlias<"ldrh $Rt, [$Rn, $offset]", @@ -2010,7 +2009,6 @@ def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), //--- // STR mnemonics fall back to STUR for negative or unaligned offsets. -// FIXME: these don't work now. def : InstAlias<"str $Rt, [$Rn, $offset]", (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; def : InstAlias<"str $Rt, [$Rn, $offset]", From de70176f5ff5465cb32828ffcd70797c6ccb1f81 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 22 May 2014 13:03:43 +0000 Subject: [PATCH 076/906] Segmented stacks: omit __morestack call when there's no frame. Patch by Florian Zeitz git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209436 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFrameLowering.cpp | 8 +++-- lib/Target/X86/X86FrameLowering.cpp | 14 +++++---- test/CodeGen/ARM/segmented-stacks.ll | 16 ++++++++-- test/CodeGen/Thumb/segmented-stacks.ll | 16 ++++++++-- test/CodeGen/X86/segmented-stacks.ll | 41 +++++++++++++++++++++----- 5 files changed, 77 insertions(+), 18 deletions(-) diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index c0f8a8d90253..0caf4bfd77a5 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -1746,6 +1746,12 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { ARMFunctionInfo *ARMFI = MF.getInfo(); DebugLoc DL; + uint64_t StackSize = MFI->getStackSize(); + + // Do not generate a prologue for functions with a stack of size zero + if (StackSize == 0) + return; + // Use R4 and R5 as scratch registers. // We save R4 and R5 before use and restore them before leaving the function. unsigned ScratchReg0 = ARM::R4; @@ -1775,8 +1781,6 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { MF.push_front(PrevStackMBB); // The required stack size that is aligned to ARM constant criterion. - uint64_t StackSize = MFI->getStackSize(); - AlignedStackSize = alignToARMConstant(StackSize); // When the frame size is less than 256 we just compare the stack diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 1c1b06623bde..4c1374f70f42 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1176,6 +1176,15 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { !STI.isTargetWin32() && !STI.isTargetWin64() && !STI.isTargetFreeBSD()) report_fatal_error("Segmented stacks not supported on this platform."); + // Eventually StackSize will be calculated by a link-time pass; which will + // also decide whether checking code needs to be injected into this particular + // prologue. + StackSize = MFI->getStackSize(); + + // Do not generate a prologue for functions with a stack of size zero + if (StackSize == 0) + return; + MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock(); X86MachineFunctionInfo *X86FI = MF.getInfo(); @@ -1200,11 +1209,6 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { MF.push_front(allocMBB); MF.push_front(checkMBB); - // Eventually StackSize will be calculated by a link-time pass; which will - // also decide whether checking code needs to be injected into this particular - // prologue. - StackSize = MFI->getStackSize(); - // When the frame size is less than 256 we just compare the stack // boundary directly to the value of the stack pointer, per gcc. bool CompareStackPointer = StackSize < kSplitStackAvailable; diff --git a/test/CodeGen/ARM/segmented-stacks.ll b/test/CodeGen/ARM/segmented-stacks.ll index a7804b900a50..9873bf332948 100644 --- a/test/CodeGen/ARM/segmented-stacks.ll +++ b/test/CodeGen/ARM/segmented-stacks.ll @@ -57,6 +57,8 @@ define void @test_basic() #0 { define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { %addend = load i32 * %closure %result = add i32 %other, %addend + %mem = alloca i32, i32 10 + call void @dummy_use (i32* %mem, i32 10) ret i32 %result ; ARM-linux: test_nested: @@ -68,7 +70,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { ; ARM-linux-NEXT: cmp r4, r5 ; ARM-linux-NEXT: blo .LBB1_2 -; ARM-linux: mov r4, #0 +; ARM-linux: mov r4, #56 ; ARM-linux-NEXT: mov r5, #0 ; ARM-linux-NEXT: stmdb sp!, {lr} ; ARM-linux-NEXT: bl __morestack @@ -87,7 +89,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { ; ARM-android-NEXT: cmp r4, r5 ; ARM-android-NEXT: blo .LBB1_2 -; ARM-android: mov r4, #0 +; ARM-android: mov r4, #56 ; ARM-android-NEXT: mov r5, #0 ; ARM-android-NEXT: stmdb sp!, {lr} ; ARM-android-NEXT: bl __morestack @@ -234,4 +236,14 @@ define fastcc void @test_fastcc_large() #0 { } +define void @test_nostack() #0 { + ret void + +; ARM-linux-LABEL: test_nostack: +; ARM-linux-NOT: bl __morestack + +; ARM-android-LABEL: test_nostack: +; ARM-android-NOT: bl __morestack +} + attributes #0 = { "split-stack" } diff --git a/test/CodeGen/Thumb/segmented-stacks.ll b/test/CodeGen/Thumb/segmented-stacks.ll index 89043ec11dce..d6e25c7792e8 100644 --- a/test/CodeGen/Thumb/segmented-stacks.ll +++ b/test/CodeGen/Thumb/segmented-stacks.ll @@ -57,6 +57,8 @@ define void @test_basic() #0 { define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { %addend = load i32 * %closure %result = add i32 %other, %addend + %mem = alloca i32, i32 10 + call void @dummy_use (i32* %mem, i32 10) ret i32 %result ; Thumb-android: test_nested: @@ -68,7 +70,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { ; Thumb-android-NEXT: cmp r4, r5 ; Thumb-android-NEXT: blo .LBB1_2 -; Thumb-android: mov r4, #0 +; Thumb-android: mov r4, #56 ; Thumb-android-NEXT: mov r5, #0 ; Thumb-android-NEXT: push {lr} ; Thumb-android-NEXT: bl __morestack @@ -88,7 +90,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { ; Thumb-linux-NEXT: cmp r4, r5 ; Thumb-linux-NEXT: blo .LBB1_2 -; Thumb-linux: mov r4, #0 +; Thumb-linux: mov r4, #56 ; Thumb-linux-NEXT: mov r5, #0 ; Thumb-linux-NEXT: push {lr} ; Thumb-linux-NEXT: bl __morestack @@ -246,4 +248,14 @@ define fastcc void @test_fastcc_large() #0 { } +define void @test_nostack() #0 { + ret void + +; Thumb-android-LABEL: test_nostack: +; Thumb-android-NOT: bl __morestack + +; Thumb-linux-LABEL: test_nostack: +; Thumb-linux-NOT: bl __morestack +} + attributes #0 = { "split-stack" } diff --git a/test/CodeGen/X86/segmented-stacks.ll b/test/CodeGen/X86/segmented-stacks.ll index 8089f2056847..9dab3cd8d6d5 100644 --- a/test/CodeGen/X86/segmented-stacks.ll +++ b/test/CodeGen/X86/segmented-stacks.ll @@ -107,13 +107,15 @@ define void @test_basic() #0 { define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { %addend = load i32 * %closure %result = add i32 %other, %addend + %mem = alloca i32, i32 10 + call void @dummy_use (i32* %mem, i32 10) ret i32 %result ; X32-Linux: cmpl %gs:48, %esp ; X32-Linux-NEXT: ja .LBB1_2 ; X32-Linux: pushl $4 -; X32-Linux-NEXT: pushl $0 +; X32-Linux-NEXT: pushl $60 ; X32-Linux-NEXT: calll __morestack ; X32-Linux-NEXT: ret @@ -121,7 +123,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { ; X64-Linux-NEXT: ja .LBB1_2 ; X64-Linux: movq %r10, %rax -; X64-Linux-NEXT: movabsq $0, %r10 +; X64-Linux-NEXT: movabsq $56, %r10 ; X64-Linux-NEXT: movabsq $0, %r11 ; X64-Linux-NEXT: callq __morestack ; X64-Linux-NEXT: ret @@ -132,7 +134,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { ; X32-Darwin-NEXT: ja LBB1_2 ; X32-Darwin: pushl $4 -; X32-Darwin-NEXT: pushl $0 +; X32-Darwin-NEXT: pushl $60 ; X32-Darwin-NEXT: calll ___morestack ; X32-Darwin-NEXT: ret @@ -140,7 +142,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { ; X64-Darwin-NEXT: ja LBB1_2 ; X64-Darwin: movq %r10, %rax -; X64-Darwin-NEXT: movabsq $0, %r10 +; X64-Darwin-NEXT: movabsq $56, %r10 ; X64-Darwin-NEXT: movabsq $0, %r11 ; X64-Darwin-NEXT: callq ___morestack ; X64-Darwin-NEXT: ret @@ -150,7 +152,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { ; X32-MinGW-NEXT: ja LBB1_2 ; X32-MinGW: pushl $4 -; X32-MinGW-NEXT: pushl $0 +; X32-MinGW-NEXT: pushl $52 ; X32-MinGW-NEXT: calll ___morestack ; X32-MinGW-NEXT: ret @@ -159,7 +161,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { ; X64-MinGW-NEXT: ja .LBB1_2 ; X64-MinGW: movq %r10, %rax -; X64-MinGW-NEXT: movabsq $0, %r10 +; X64-MinGW-NEXT: movabsq $88, %r10 ; X64-MinGW-NEXT: movabsq $32, %r11 ; X64-MinGW-NEXT: callq __morestack ; X64-MinGW-NEXT: retq @@ -169,7 +171,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { ; X64-FreeBSD-NEXT: ja .LBB1_2 ; X64-FreeBSD: movq %r10, %rax -; X64-FreeBSD-NEXT: movabsq $0, %r10 +; X64-FreeBSD-NEXT: movabsq $56, %r10 ; X64-FreeBSD-NEXT: movabsq $0, %r11 ; X64-FreeBSD-NEXT: callq __morestack ; X64-FreeBSD-NEXT: ret @@ -435,4 +437,29 @@ define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) #0 { } +define void @test_nostack() #0 { + ret void + +; X32-Linux-LABEL: test_nostack: +; X32-Linux-NOT: calll __morestack + +; X64-Linux-LABEL: test_nostack: +; X32-Linux-NOT: callq __morestack + +; X32-Darwin-LABEL: test_nostack: +; X32-Darwin-NOT: calll __morestack + +; X64-Darwin-LABEL: test_nostack: +; X64-Darwin-NOT: callq __morestack + +; X32-MinGW-LABEL: test_nostack: +; X32-MinGW-NOT: calll __morestack + +; X64-MinGW-LABEL: test_nostack: +; X64-MinGW-NOT: callq __morestack + +; X64-FreeBSD-LABEL: test_nostack: +; X64-FreeBSD-NOT: callq __morestack +} + attributes #0 = { "split-stack" } From d16404a0e79a176b7698d9c7bc1ec146ae2f2f1b Mon Sep 17 00:00:00 2001 From: Diego Novillo Date: Thu, 22 May 2014 14:19:46 +0000 Subject: [PATCH 077/906] Add support for missed and analysis optimization remarks. Summary: This adds two new diagnostics: -pass-remarks-missed and -pass-remarks-analysis. They take the same values as -pass-remarks but are intended to be triggered in different contexts. -pass-remarks-missed is used by LLVMContext::emitOptimizationRemarkMissed, which passes call when they tried to apply a transformation but couldn't. -pass-remarks-analysis is used by LLVMContext::emitOptimizationRemarkAnalysis, which passes call when they want to inform the user about analysis results. The patch also: 1- Adds support in the inliner for the two new remarks and a test case. 2- Moves emitOptimizationRemark* functions to the llvm namespace. 3- Adds an LLVMContext argument instead of making them member functions of LLVMContext. Reviewers: qcolombet Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D3682 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209442 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/DiagnosticInfo.h | 151 ++++++++++++++++-- include/llvm/IR/LLVMContext.h | 9 -- lib/IR/DiagnosticInfo.cpp | 50 +++++- lib/IR/LLVMContext.cpp | 35 ++-- lib/IR/LLVMContextImpl.cpp | 68 +++++--- lib/IR/LLVMContextImpl.h | 10 +- lib/Transforms/IPO/Inliner.cpp | 51 +++++- .../Scalar/TailRecursionElimination.cpp | 16 +- lib/Transforms/Utils/LoopUnroll.cpp | 9 +- lib/Transforms/Utils/SimplifyLibCalls.cpp | 7 +- lib/Transforms/Vectorize/LoopVectorize.cpp | 13 +- .../Transforms/Inline/optimization-remarks.ll | 60 +++++++ 12 files changed, 387 insertions(+), 92 deletions(-) create mode 100644 test/Transforms/Inline/optimization-remarks.ll diff --git a/include/llvm/IR/DiagnosticInfo.h b/include/llvm/IR/DiagnosticInfo.h index 604de1fed93d..8e334c996d95 100644 --- a/include/llvm/IR/DiagnosticInfo.h +++ b/include/llvm/IR/DiagnosticInfo.h @@ -26,6 +26,7 @@ namespace llvm { class DiagnosticPrinter; class Function; class Instruction; +class LLVMContextImpl; class Twine; class Value; class DebugLoc; @@ -48,6 +49,8 @@ enum DiagnosticKind { DK_DebugMetadataVersion, DK_SampleProfile, DK_OptimizationRemark, + DK_OptimizationRemarkMissed, + DK_OptimizationRemarkAnalysis, DK_FirstPluginKind }; @@ -239,21 +242,21 @@ class DiagnosticInfoSampleProfile : public DiagnosticInfo { const Twine &Msg; }; -/// Diagnostic information for optimization remarks. -class DiagnosticInfoOptimizationRemark : public DiagnosticInfo { +/// Common features for diagnostics dealing with optimization remarks. +class DiagnosticInfoOptimizationRemarkBase : public DiagnosticInfo { public: - /// \p PassName is the name of the pass emitting this diagnostic. If - /// this name matches the regular expression given in -Rpass=, then the - /// diagnostic will be emitted. \p Fn is the function where the diagnostic - /// is being emitted. \p DLoc is the location information to use in the - /// diagnostic. If line table information is available, the diagnostic - /// will include the source code location. \p Msg is the message to show. - /// Note that this class does not copy this message, so this reference - /// must be valid for the whole life time of the diagnostic. - DiagnosticInfoOptimizationRemark(const char *PassName, const Function &Fn, - const DebugLoc &DLoc, const Twine &Msg) - : DiagnosticInfo(DK_OptimizationRemark, DS_Remark), PassName(PassName), - Fn(Fn), DLoc(DLoc), Msg(Msg) {} + /// \p PassName is the name of the pass emitting this diagnostic. + /// \p Fn is the function where the diagnostic is being emitted. \p DLoc is + /// the location information to use in the diagnostic. If line table + /// information is available, the diagnostic will include the source code + /// location. \p Msg is the message to show. Note that this class does not + /// copy this message, so this reference must be valid for the whole life time + /// of the diagnostic. + DiagnosticInfoOptimizationRemarkBase(enum DiagnosticKind Kind, + const char *PassName, const Function &Fn, + const DebugLoc &DLoc, const Twine &Msg) + : DiagnosticInfo(Kind, DS_Remark), PassName(PassName), Fn(Fn), DLoc(DLoc), + Msg(Msg) {} /// \see DiagnosticInfo::print. void print(DiagnosticPrinter &DP) const override; @@ -263,6 +266,16 @@ class DiagnosticInfoOptimizationRemark : public DiagnosticInfo { return DI->getKind() == DK_OptimizationRemark; } + /// Return true if this optimization remark is enabled by one of + /// of the LLVM command line flags (-pass-remarks, -pass-remarks-missed, + /// or -pass-remarks-analysis). Note that this only handles the LLVM + /// flags. We cannot access Clang flags from here (they are handled + /// in BackendConsumer::OptimizationRemarkHandler). + /// + /// \p pImpl points to the current LLVM context. It is needed to query the + /// value of the command line flag associated with this remark. + virtual bool isEnabled(LLVMContextImpl *pImpl) const = 0; + /// Return true if location information is available for this diagnostic. bool isLocationAvailable() const; @@ -296,9 +309,119 @@ class DiagnosticInfoOptimizationRemark : public DiagnosticInfo { const Twine &Msg; }; +/// Diagnostic information for applied optimization remarks. +class DiagnosticInfoOptimizationRemark + : public DiagnosticInfoOptimizationRemarkBase { +public: + /// \p PassName is the name of the pass emitting this diagnostic. If + /// this name matches the regular expression given in -Rpass=, then the + /// diagnostic will be emitted. \p Fn is the function where the diagnostic + /// is being emitted. \p DLoc is the location information to use in the + /// diagnostic. If line table information is available, the diagnostic + /// will include the source code location. \p Msg is the message to show. + /// Note that this class does not copy this message, so this reference + /// must be valid for the whole life time of the diagnostic. + DiagnosticInfoOptimizationRemark(const char *PassName, const Function &Fn, + const DebugLoc &DLoc, const Twine &Msg) + : DiagnosticInfoOptimizationRemarkBase(DK_OptimizationRemark, PassName, + Fn, DLoc, Msg) {} + + /// Hand rolled RTTI + static bool classof(const DiagnosticInfo *DI) { + return DI->getKind() == DK_OptimizationRemark; + } + + /// \see DiagnosticInfoOptimizationRemarkBase::isEnabled. + virtual bool isEnabled(LLVMContextImpl *pImpl) const override; +}; + +/// Diagnostic information for missed-optimization remarks. +class DiagnosticInfoOptimizationRemarkMissed + : public DiagnosticInfoOptimizationRemarkBase { +public: + /// \p PassName is the name of the pass emitting this diagnostic. If + /// this name matches the regular expression given in -Rpass-missed=, then the + /// diagnostic will be emitted. \p Fn is the function where the diagnostic + /// is being emitted. \p DLoc is the location information to use in the + /// diagnostic. If line table information is available, the diagnostic + /// will include the source code location. \p Msg is the message to show. + /// Note that this class does not copy this message, so this reference + /// must be valid for the whole life time of the diagnostic. + DiagnosticInfoOptimizationRemarkMissed(const char *PassName, + const Function &Fn, + const DebugLoc &DLoc, const Twine &Msg) + : DiagnosticInfoOptimizationRemarkBase(DK_OptimizationRemarkMissed, + PassName, Fn, DLoc, Msg) {} + + /// Hand rolled RTTI + static bool classof(const DiagnosticInfo *DI) { + return DI->getKind() == DK_OptimizationRemarkMissed; + } + + /// \see DiagnosticInfoOptimizationRemarkBase::isEnabled. + virtual bool isEnabled(LLVMContextImpl *pImpl) const override; +}; + +/// Diagnostic information for optimization analysis remarks. +class DiagnosticInfoOptimizationRemarkAnalysis + : public DiagnosticInfoOptimizationRemarkBase { +public: + /// \p PassName is the name of the pass emitting this diagnostic. If + /// this name matches the regular expression given in -Rpass-analysis=, then + /// the diagnostic will be emitted. \p Fn is the function where the diagnostic + /// is being emitted. \p DLoc is the location information to use in the + /// diagnostic. If line table information is available, the diagnostic will + /// include the source code location. \p Msg is the message to show. Note that + /// this class does not copy this message, so this reference must be valid for + /// the whole life time of the diagnostic. + DiagnosticInfoOptimizationRemarkAnalysis(const char *PassName, + const Function &Fn, + const DebugLoc &DLoc, + const Twine &Msg) + : DiagnosticInfoOptimizationRemarkBase(DK_OptimizationRemarkAnalysis, + PassName, Fn, DLoc, Msg) {} + + /// Hand rolled RTTI + static bool classof(const DiagnosticInfo *DI) { + return DI->getKind() == DK_OptimizationRemarkAnalysis; + } + + /// \see DiagnosticInfoOptimizationRemarkBase::isEnabled. + virtual bool isEnabled(LLVMContextImpl *pImpl) const override; +}; + // Create wrappers for C Binding types (see CBindingWrapping.h). DEFINE_SIMPLE_CONVERSION_FUNCTIONS(DiagnosticInfo, LLVMDiagnosticInfoRef) +/// Emit an optimization-applied message. \p PassName is the name of the pass +/// emitting the message. If -Rpass= is given and \p PassName matches the +/// regular expression in -Rpass, then the remark will be emitted. \p Fn is +/// the function triggering the remark, \p DLoc is the debug location where +/// the diagnostic is generated. \p Msg is the message string to use. +void emitOptimizationRemark(LLVMContext &Ctx, const char *PassName, + const Function &Fn, const DebugLoc &DLoc, + const Twine &Msg); + +/// Emit an optimization-missed message. \p PassName is the name of the +/// pass emitting the message. If -Rpass-missed= is given and \p PassName +/// matches the regular expression in -Rpass, then the remark will be +/// emitted. \p Fn is the function triggering the remark, \p DLoc is the +/// debug location where the diagnostic is generated. \p Msg is the +/// message string to use. +void emitOptimizationRemarkMissed(LLVMContext &Ctx, const char *PassName, + const Function &Fn, const DebugLoc &DLoc, + const Twine &Msg); + +/// Emit an optimization analysis remark message. \p PassName is the name of +/// the pass emitting the message. If -Rpass-analysis= is given and \p +/// PassName matches the regular expression in -Rpass, then the remark will be +/// emitted. \p Fn is the function triggering the remark, \p DLoc is the debug +/// location where the diagnostic is generated. \p Msg is the message string +/// to use. +void emitOptimizationRemarkAnalysis(LLVMContext &Ctx, const char *PassName, + const Function &Fn, const DebugLoc &DLoc, + const Twine &Msg); + } // End namespace llvm #endif diff --git a/include/llvm/IR/LLVMContext.h b/include/llvm/IR/LLVMContext.h index f9644aca6b47..4d940d599b9a 100644 --- a/include/llvm/IR/LLVMContext.h +++ b/include/llvm/IR/LLVMContext.h @@ -157,15 +157,6 @@ class LLVMContext { void emitError(const Instruction *I, const Twine &ErrorStr); void emitError(const Twine &ErrorStr); - /// emitOptimizationRemark - Emit an optimization remark message. \p PassName - /// is the name of the pass emitting the message. If -Rpass= is given - /// and \p PassName matches the regular expression in -Rpass, then the - /// remark will be emitted. \p Fn is the function triggering the remark, - /// \p DLoc is the debug location where the diagnostic is generated. - /// \p Msg is the message string to use. - void emitOptimizationRemark(const char *PassName, const Function &Fn, - const DebugLoc &DLoc, const Twine &Msg); - private: LLVMContext(LLVMContext&) LLVM_DELETED_FUNCTION; void operator=(LLVMContext&) LLVM_DELETED_FUNCTION; diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp index 3f8100f985d7..68a69183afff 100644 --- a/lib/IR/DiagnosticInfo.cpp +++ b/lib/IR/DiagnosticInfo.cpp @@ -12,6 +12,7 @@ // Diagnostics reporting is still done as part of the LLVMContext. //===----------------------------------------------------------------------===// +#include "LLVMContextImpl.h" #include "llvm/ADT/Twine.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" @@ -67,20 +68,20 @@ void DiagnosticInfoSampleProfile::print(DiagnosticPrinter &DP) const { DP << getMsg(); } -bool DiagnosticInfoOptimizationRemark::isLocationAvailable() const { +bool DiagnosticInfoOptimizationRemarkBase::isLocationAvailable() const { return getFunction().getParent()->getNamedMetadata("llvm.dbg.cu") != nullptr; } -void DiagnosticInfoOptimizationRemark::getLocation(StringRef *Filename, - unsigned *Line, - unsigned *Column) const { +void DiagnosticInfoOptimizationRemarkBase::getLocation(StringRef *Filename, + unsigned *Line, + unsigned *Column) const { DILocation DIL(getDebugLoc().getAsMDNode(getFunction().getContext())); *Filename = DIL.getFilename(); *Line = DIL.getLineNumber(); *Column = DIL.getColumnNumber(); } -const std::string DiagnosticInfoOptimizationRemark::getLocationStr() const { +const std::string DiagnosticInfoOptimizationRemarkBase::getLocationStr() const { StringRef Filename(""); unsigned Line = 0; unsigned Column = 0; @@ -89,6 +90,43 @@ const std::string DiagnosticInfoOptimizationRemark::getLocationStr() const { return Twine(Filename + ":" + Twine(Line) + ":" + Twine(Column)).str(); } -void DiagnosticInfoOptimizationRemark::print(DiagnosticPrinter &DP) const { +void DiagnosticInfoOptimizationRemarkBase::print(DiagnosticPrinter &DP) const { DP << getLocationStr() << ": " << getMsg(); } + +bool +DiagnosticInfoOptimizationRemark::isEnabled(LLVMContextImpl *pImpl) const { + return pImpl->optimizationRemarkEnabledFor(this); +} + +bool DiagnosticInfoOptimizationRemarkMissed::isEnabled( + LLVMContextImpl *pImpl) const { + return pImpl->optimizationRemarkEnabledFor(this); +} + +bool DiagnosticInfoOptimizationRemarkAnalysis::isEnabled( + LLVMContextImpl *pImpl) const { + return pImpl->optimizationRemarkEnabledFor(this); +} + +void llvm::emitOptimizationRemark(LLVMContext &Ctx, const char *PassName, + const Function &Fn, const DebugLoc &DLoc, + const Twine &Msg) { + Ctx.diagnose(DiagnosticInfoOptimizationRemark(PassName, Fn, DLoc, Msg)); +} + +void llvm::emitOptimizationRemarkMissed(LLVMContext &Ctx, const char *PassName, + const Function &Fn, + const DebugLoc &DLoc, + const Twine &Msg) { + Ctx.diagnose(DiagnosticInfoOptimizationRemarkMissed(PassName, Fn, DLoc, Msg)); +} + +void llvm::emitOptimizationRemarkAnalysis(LLVMContext &Ctx, + const char *PassName, + const Function &Fn, + const DebugLoc &DLoc, + const Twine &Msg) { + Ctx.diagnose( + DiagnosticInfoOptimizationRemarkAnalysis(PassName, Fn, DLoc, Msg)); +} diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp index 5f94dca1eb96..7b75d42b85e1 100644 --- a/lib/IR/LLVMContext.cpp +++ b/lib/IR/LLVMContext.cpp @@ -142,14 +142,26 @@ void LLVMContext::diagnose(const DiagnosticInfo &DI) { return; } - // Optimization remarks are selective. They need to check whether - // the regexp pattern, passed via -pass-remarks, matches the name - // of the pass that is emitting the diagnostic. If there is no match, - // ignore the diagnostic and return. - if (DI.getKind() == llvm::DK_OptimizationRemark && - !pImpl->optimizationRemarksEnabledFor( - cast(DI).getPassName())) - return; + // Optimization remarks are selective. They need to check whether the regexp + // pattern, passed via one of the -pass-remarks* flags, matches the name of + // the pass that is emitting the diagnostic. If there is no match, ignore the + // diagnostic and return. + switch (DI.getKind()) { + case llvm::DK_OptimizationRemark: + if (!cast(DI).isEnabled(pImpl)) + return; + break; + case llvm::DK_OptimizationRemarkMissed: + if (!cast(DI).isEnabled(pImpl)) + return; + break; + case llvm::DK_OptimizationRemarkAnalysis: + if (!cast(DI).isEnabled(pImpl)) + return; + break; + default: + break; + } // Otherwise, print the message with a prefix based on the severity. std::string MsgStorage; @@ -177,13 +189,6 @@ void LLVMContext::emitError(unsigned LocCookie, const Twine &ErrorStr) { diagnose(DiagnosticInfoInlineAsm(LocCookie, ErrorStr)); } -void LLVMContext::emitOptimizationRemark(const char *PassName, - const Function &Fn, - const DebugLoc &DLoc, - const Twine &Msg) { - diagnose(DiagnosticInfoOptimizationRemark(PassName, Fn, DLoc, Msg)); -} - //===----------------------------------------------------------------------===// // Metadata Kind Uniquing //===----------------------------------------------------------------------===// diff --git a/lib/IR/LLVMContextImpl.cpp b/lib/IR/LLVMContextImpl.cpp index 2042374647dd..24d325246d25 100644 --- a/lib/IR/LLVMContextImpl.cpp +++ b/lib/IR/LLVMContextImpl.cpp @@ -14,6 +14,7 @@ #include "LLVMContextImpl.h" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Regex.h" @@ -48,20 +49,20 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C) namespace { -/// \brief Regular expression corresponding to the value given in the -/// command line flag -pass-remarks. Passes whose name matches this -/// regexp will emit a diagnostic when calling -/// LLVMContext::emitOptimizationRemark. -static Regex *OptimizationRemarkPattern = nullptr; - +/// \brief Regular expression corresponding to the value given in one of the +/// -pass-remarks* command line flags. Passes whose name matches this regexp +/// will emit a diagnostic when calling the associated diagnostic function +/// (emitOptimizationRemark, emitOptimizationRemarkMissed or +/// emitOptimizationRemarkAnalysis). struct PassRemarksOpt { - void operator=(const std::string &Val) const { + std::shared_ptr Pattern; + + void operator=(const std::string &Val) { // Create a regexp object to match pass names for emitOptimizationRemark. if (!Val.empty()) { - delete OptimizationRemarkPattern; - OptimizationRemarkPattern = new Regex(Val); + Pattern = std::make_shared(Val); std::string RegexError; - if (!OptimizationRemarkPattern->isValid(RegexError)) + if (!Pattern->isValid(RegexError)) report_fatal_error("Invalid regular expression '" + Val + "' in -pass-remarks: " + RegexError, false); @@ -70,31 +71,62 @@ struct PassRemarksOpt { }; static PassRemarksOpt PassRemarksOptLoc; +static PassRemarksOpt PassRemarksMissedOptLoc; +static PassRemarksOpt PassRemarksAnalysisOptLoc; // -pass-remarks -// Command line flag to enable LLVMContext::emitOptimizationRemark() -// and LLVMContext::emitOptimizationNote() calls. +// Command line flag to enable emitOptimizationRemark() static cl::opt> PassRemarks("pass-remarks", cl::value_desc("pattern"), cl::desc("Enable optimization remarks from passes whose name match " "the given regular expression"), cl::Hidden, cl::location(PassRemarksOptLoc), cl::ValueRequired, cl::ZeroOrMore); + +// -pass-remarks-missed +// Command line flag to enable emitOptimizationRemarkMissed() +static cl::opt> PassRemarksMissed( + "pass-remarks-missed", cl::value_desc("pattern"), + cl::desc("Enable missed optimization remarks from passes whose name match " + "the given regular expression"), + cl::Hidden, cl::location(PassRemarksMissedOptLoc), cl::ValueRequired, + cl::ZeroOrMore); + +// -pass-remarks-analysis +// Command line flag to enable emitOptimizationRemarkAnalysis() +static cl::opt> +PassRemarksAnalysis( + "pass-remarks-analysis", cl::value_desc("pattern"), + cl::desc( + "Enable optimization analysis remarks from passes whose name match " + "the given regular expression"), + cl::Hidden, cl::location(PassRemarksAnalysisOptLoc), cl::ValueRequired, + cl::ZeroOrMore); } -bool -LLVMContextImpl::optimizationRemarksEnabledFor(const char *PassName) const { - return OptimizationRemarkPattern && - OptimizationRemarkPattern->match(PassName); +bool LLVMContextImpl::optimizationRemarkEnabledFor( + const DiagnosticInfoOptimizationRemark *DI) const { + return PassRemarksOptLoc.Pattern && + PassRemarksOptLoc.Pattern->match(DI->getPassName()); } +bool LLVMContextImpl::optimizationRemarkEnabledFor( + const DiagnosticInfoOptimizationRemarkMissed *DI) const { + return PassRemarksMissedOptLoc.Pattern && + PassRemarksMissedOptLoc.Pattern->match(DI->getPassName()); +} + +bool LLVMContextImpl::optimizationRemarkEnabledFor( + const DiagnosticInfoOptimizationRemarkAnalysis *DI) const { + return PassRemarksAnalysisOptLoc.Pattern && + PassRemarksAnalysisOptLoc.Pattern->match(DI->getPassName()); +} namespace { struct DropReferences { // Takes the value_type of a ConstantUniqueMap's internal map, whose 'second' // is a Constant*. - template - void operator()(const PairT &P) { + template void operator()(const PairT &P) { P.second->dropAllReferences(); } }; diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h index b1ad9ff4a5a8..6ad9b8a70afb 100644 --- a/lib/IR/LLVMContextImpl.h +++ b/lib/IR/LLVMContextImpl.h @@ -37,6 +37,9 @@ namespace llvm { class ConstantInt; class ConstantFP; +class DiagnosticInfoOptimizationRemark; +class DiagnosticInfoOptimizationRemarkMissed; +class DiagnosticInfoOptimizationRemarkAnalysis; class LLVMContext; class Type; class Value; @@ -373,7 +376,12 @@ class LLVMContextImpl { /// \brief Return true if the given pass name should emit optimization /// remarks. - bool optimizationRemarksEnabledFor(const char *PassName) const; + bool optimizationRemarkEnabledFor( + const DiagnosticInfoOptimizationRemark *DI) const; + bool optimizationRemarkEnabledFor( + const DiagnosticInfoOptimizationRemarkMissed *DI) const; + bool optimizationRemarkEnabledFor( + const DiagnosticInfoOptimizationRemarkAnalysis *DI) const; int getOrAddScopeRecordIdxEntry(MDNode *N, int ExistingIdx); int getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,int ExistingIdx); diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 10b20cfc917f..9087ab23bb70 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -301,6 +301,13 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const { return thres; } +static void emitAnalysis(CallSite CS, const Twine &Msg) { + Function *Caller = CS.getCaller(); + LLVMContext &Ctx = Caller->getContext(); + DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); + emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg); +} + /// shouldInline - Return true if the inliner should attempt to inline /// at the given CallSite. bool Inliner::shouldInline(CallSite CS) { @@ -309,12 +316,16 @@ bool Inliner::shouldInline(CallSite CS) { if (IC.isAlways()) { DEBUG(dbgs() << " Inlining: cost=always" << ", Call: " << *CS.getInstruction() << "\n"); + emitAnalysis(CS, Twine(CS.getCalledFunction()->getName()) + + " should always be inlined (cost=always)"); return true; } if (IC.isNever()) { DEBUG(dbgs() << " NOT Inlining: cost=never" << ", Call: " << *CS.getInstruction() << "\n"); + emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() + + " should never be inlined (cost=never)")); return false; } @@ -323,6 +334,10 @@ bool Inliner::shouldInline(CallSite CS) { DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost() << ", thres=" << (IC.getCostDelta() + IC.getCost()) << ", Call: " << *CS.getInstruction() << "\n"); + emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() + + " too costly to inline (cost=") + + Twine(IC.getCost()) + ", threshold=" + + Twine(IC.getCostDelta() + IC.getCost()) + ")"); return false; } @@ -390,6 +405,11 @@ bool Inliner::shouldInline(CallSite CS) { DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << " Cost = " << IC.getCost() << ", outer Cost = " << TotalSecondaryCost << '\n'); + emitAnalysis( + CS, Twine("Not inlining. Cost of inlining " + + CS.getCalledFunction()->getName() + + " increases the cost of inlining " + + CS.getCaller()->getName() + " in other contexts")); return false; } } @@ -397,6 +417,10 @@ bool Inliner::shouldInline(CallSite CS) { DEBUG(dbgs() << " Inlining: cost=" << IC.getCost() << ", thres=" << (IC.getCostDelta() + IC.getCost()) << ", Call: " << *CS.getInstruction() << '\n'); + emitAnalysis( + CS, CS.getCalledFunction()->getName() + Twine(" can be inlined into ") + + CS.getCaller()->getName() + " with cost=" + Twine(IC.getCost()) + + " (threshold=" + Twine(IC.getCostDelta() + IC.getCost()) + ")"); return true; } @@ -518,24 +542,35 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) continue; - - // If the policy determines that we should inline this function, - // try to do so. - if (!shouldInline(CS)) - continue; + LLVMContext &CallerCtx = Caller->getContext(); // Get DebugLoc to report. CS will be invalid after Inliner. DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); + // If the policy determines that we should inline this function, + // try to do so. + if (!shouldInline(CS)) { + emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc, + Twine(Callee->getName() + + " will not be inlined into " + + Caller->getName())); + continue; + } + // Attempt to inline the function. if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas, - InlineHistoryID, InsertLifetime, DL)) + InlineHistoryID, InsertLifetime, DL)) { + emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc, + Twine(Callee->getName() + + " will not be inlined into " + + Caller->getName())); continue; + } ++NumInlined; // Report the inline decision. - Caller->getContext().emitOptimizationRemark( - DEBUG_TYPE, *Caller, DLoc, + emitOptimizationRemark( + CallerCtx, DEBUG_TYPE, *Caller, DLoc, Twine(Callee->getName() + " inlined into " + Caller->getName())); // If inlining this function gave us any new call sites, throw them diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 946af800776f..05b9892470bb 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -64,6 +64,7 @@ #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -318,8 +319,8 @@ bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) { break; } if (SafeToTail) { - F.getContext().emitOptimizationRemark( - "tailcallelim", F, CI->getDebugLoc(), + emitOptimizationRemark( + F.getContext(), "tailcallelim", F, CI->getDebugLoc(), "marked this readnone call a tail call candidate"); CI->setTailCall(); Modified = true; @@ -365,9 +366,9 @@ bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) { if (Visited[CI->getParent()] != ESCAPED) { // If the escape point was part way through the block, calls after the // escape point wouldn't have been put into DeferredTails. - F.getContext().emitOptimizationRemark( - "tailcallelim", F, CI->getDebugLoc(), - "marked this call a tail call candidate"); + emitOptimizationRemark(F.getContext(), "tailcallelim", F, + CI->getDebugLoc(), + "marked this call a tail call candidate"); CI->setTailCall(); Modified = true; } else { @@ -678,9 +679,8 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, BasicBlock *BB = Ret->getParent(); Function *F = BB->getParent(); - F->getContext().emitOptimizationRemark( - "tailcallelim", *F, CI->getDebugLoc(), - "transforming tail recursion to loop"); + emitOptimizationRemark(F->getContext(), "tailcallelim", *F, CI->getDebugLoc(), + "transforming tail recursion to loop"); // OK! We can transform this tail call. If this is the first one found, // create the new entry block, allowing us to branch back to the old entry. diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index faaab5c12e4c..d953e3073109 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -237,9 +238,9 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, if (CompletelyUnroll) { DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() << " with trip count " << TripCount << "!\n"); - Ctx.emitOptimizationRemark(DEBUG_TYPE, *F, LoopLoc, - Twine("completely unrolled loop with ") + - Twine(TripCount) + " iterations"); + emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, + Twine("completely unrolled loop with ") + + Twine(TripCount) + " iterations"); } else { DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " << Count); @@ -255,7 +256,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, DiagMsg.concat(" with run-time trip count"); } DEBUG(dbgs() << "!\n"); - Ctx.emitOptimizationRemark(DEBUG_TYPE, *F, LoopLoc, DiagMsg); + emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, DiagMsg); } bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 0fd185816fd5..3b61bb575a8d 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" @@ -789,9 +790,9 @@ struct StrLenOpt : public LibCallOptimization { uint64_t LenTrue = GetStringLength(SI->getTrueValue()); uint64_t LenFalse = GetStringLength(SI->getFalseValue()); if (LenTrue && LenFalse) { - Context->emitOptimizationRemark( - "simplify-libcalls", *Caller, SI->getDebugLoc(), - "folded strlen(select) to select of constants"); + emitOptimizationRemark(*Context, "simplify-libcalls", *Caller, + SI->getDebugLoc(), + "folded strlen(select) to select of constants"); return B.CreateSelect(SI->getCondition(), ConstantInt::get(CI->getType(), LenTrue-1), ConstantInt::get(CI->getType(), LenFalse-1)); diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index d57fae34dd61..34d8a1053fa1 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -67,6 +67,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -1213,10 +1214,10 @@ struct LoopVectorize : public FunctionPass { DEBUG(dbgs() << "LV: Trying to at least unroll the loops.\n"); // Report the unrolling decision. - F->getContext().emitOptimizationRemark( - DEBUG_TYPE, *F, L->getStartLoc(), - Twine("unrolled with interleaving factor " + Twine(UF) + - " (vectorization not beneficial)")); + emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), + Twine("unrolled with interleaving factor " + + Twine(UF) + + " (vectorization not beneficial)")); // We decided not to vectorize, but we may want to unroll. InnerLoopUnroller Unroller(L, SE, LI, DT, DL, TLI, UF); @@ -1228,8 +1229,8 @@ struct LoopVectorize : public FunctionPass { ++LoopsVectorized; // Report the vectorization decision. - F->getContext().emitOptimizationRemark( - DEBUG_TYPE, *F, L->getStartLoc(), + emitOptimizationRemark( + F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), Twine("vectorized loop (vectorization factor: ") + Twine(VF.Width) + ", unrolling interleave factor: " + Twine(UF) + ")"); } diff --git a/test/Transforms/Inline/optimization-remarks.ll b/test/Transforms/Inline/optimization-remarks.ll new file mode 100644 index 000000000000..9108f3ab14d6 --- /dev/null +++ b/test/Transforms/Inline/optimization-remarks.ll @@ -0,0 +1,60 @@ +; RUN: opt < %s -inline -pass-remarks=inline -pass-remarks-missed=inline -pass-remarks-analysis=inline -S 2>&1 | FileCheck %s + +; CHECK: foo should always be inlined (cost=always) +; CHECK: foo inlined into bar +; CHECK: foz should never be inlined (cost=never) +; CHECK: foz will not be inlined into bar + +; Function Attrs: alwaysinline nounwind uwtable +define i32 @foo(i32 %x, i32 %y) #0 { +entry: + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %0 = load i32* %x.addr, align 4 + %1 = load i32* %y.addr, align 4 + %add = add nsw i32 %0, %1 + ret i32 %add +} + +; Function Attrs: noinline nounwind uwtable +define float @foz(i32 %x, i32 %y) #1 { +entry: + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %0 = load i32* %x.addr, align 4 + %1 = load i32* %y.addr, align 4 + %mul = mul nsw i32 %0, %1 + %conv = sitofp i32 %mul to float + ret float %conv +} + +; Function Attrs: nounwind uwtable +define i32 @bar(i32 %j) #2 { +entry: + %j.addr = alloca i32, align 4 + store i32 %j, i32* %j.addr, align 4 + %0 = load i32* %j.addr, align 4 + %1 = load i32* %j.addr, align 4 + %sub = sub nsw i32 %1, 2 + %call = call i32 @foo(i32 %0, i32 %sub) + %conv = sitofp i32 %call to float + %2 = load i32* %j.addr, align 4 + %sub1 = sub nsw i32 %2, 2 + %3 = load i32* %j.addr, align 4 + %call2 = call float @foz(i32 %sub1, i32 %3) + %mul = fmul float %conv, %call2 + %conv3 = fptosi float %mul to i32 + ret i32 %conv3 +} + +attributes #0 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { noinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = metadata !{metadata !"clang version 3.5.0 "} From 634d64e3bbe1b6f793cbcf0b55e40d4dbb2be329 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 22 May 2014 14:20:05 +0000 Subject: [PATCH 078/906] ARM64: remove '#' from annotation of add/sub immediate The full string used to be "// =#12" for example, which looks too busy. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209443 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp b/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp index 48fba37146e5..529b450352eb 100644 --- a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp +++ b/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp @@ -948,7 +948,7 @@ void ARM64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum, printShifter(MI, OpNum + 1, O); if (CommentStream) - *CommentStream << "=#" << (Val << Shift) << '\n'; + *CommentStream << '=' << (Val << Shift) << '\n'; } else { assert(MO.isExpr() && "Unexpected operand type!"); O << *MO.getExpr(); From 3957d4245ffd0ce8c9e610b7fbc8430e910ddf84 Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Thu, 22 May 2014 16:21:39 +0000 Subject: [PATCH 079/906] [X86] Improve the lowering of BITCAST from MVT::f64 to MVT::v4i16/MVT::v8i8. This patch teaches the x86 backend how to efficiently lower ISD::BITCAST dag nodes from MVT::f64 to MVT::v4i16 (and vice versa), and from MVT::f64 to MVT::v8i8 (and vice versa). This patch extends the logic from revision 208107 to also handle MVT::v4i16 and MVT::v8i8. Also, this patch correctly propagates Undef values when performing the widening of a vector (example: when widening from v2i32 to v4i32, the upper 64bits of the resulting vector are 'undef'). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209451 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 56 ++++++--- test/CodeGen/X86/lower-bitcast-v2i32.ll | 80 ------------ test/CodeGen/X86/lower-bitcast.ll | 155 ++++++++++++++++++++++++ test/CodeGen/X86/ret-mmx.ll | 4 +- 4 files changed, 195 insertions(+), 100 deletions(-) delete mode 100644 test/CodeGen/X86/lower-bitcast-v2i32.ll create mode 100644 test/CodeGen/X86/lower-bitcast.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 61828759fc2c..c3006377a6c4 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1040,6 +1040,8 @@ void X86TargetLowering::resetOperationActions() { setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal); setOperationAction(ISD::BITCAST, MVT::v2i32, Custom); + setOperationAction(ISD::BITCAST, MVT::v4i16, Custom); + setOperationAction(ISD::BITCAST, MVT::v8i8, Custom); } if (!TM.Options.UseSoftFloat && Subtarget->hasSSE41()) { @@ -14276,19 +14278,31 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget, MVT SrcVT = Op.getOperand(0).getSimpleValueType(); MVT DstVT = Op.getSimpleValueType(); - if (SrcVT == MVT::v2i32) { + if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8) { assert(Subtarget->hasSSE2() && "Requires at least SSE2!"); if (DstVT != MVT::f64) // This conversion needs to be expanded. return SDValue(); + SDValue InVec = Op->getOperand(0); SDLoc dl(Op); - SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, - Op->getOperand(0), DAG.getIntPtrConstant(0)); - SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, - Op->getOperand(0), DAG.getIntPtrConstant(1)); - SDValue Elts[] = {Elt0, Elt1, Elt0, Elt0}; - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Elts); + unsigned NumElts = SrcVT.getVectorNumElements(); + EVT SVT = SrcVT.getVectorElementType(); + + // Widen the vector in input in the case of MVT::v2i32. + // Example: from MVT::v2i32 to MVT::v4i32. + SmallVector Elts; + for (unsigned i = 0, e = NumElts; i != e; ++i) + Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, InVec, + DAG.getIntPtrConstant(i))); + + // Explicitly mark the extra elements as Undef. + SDValue Undef = DAG.getUNDEF(SVT); + for (unsigned i = NumElts, e = NumElts * 2; i != e; ++i) + Elts.push_back(Undef); + + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2); + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Elts); SDValue ToV2F64 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, BV); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, ToV2F64, DAG.getIntPtrConstant(0)); @@ -14758,17 +14772,23 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, EVT DstVT = N->getValueType(0); EVT SrcVT = N->getOperand(0)->getValueType(0); - if (SrcVT == MVT::f64 && DstVT == MVT::v2i32) { - SDValue Expanded = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, - MVT::v2f64, N->getOperand(0)); - SDValue ToV4I32 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Expanded); - SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, - ToV4I32, DAG.getIntPtrConstant(0)); - SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, - ToV4I32, DAG.getIntPtrConstant(1)); - SDValue Elts[] = {Elt0, Elt1}; - Results.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Elts)); - } + if (SrcVT != MVT::f64 || + (DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8)) + return; + + unsigned NumElts = DstVT.getVectorNumElements(); + EVT SVT = DstVT.getVectorElementType(); + EVT WiderVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2); + SDValue Expanded = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, + MVT::v2f64, N->getOperand(0)); + SDValue ToVecInt = DAG.getNode(ISD::BITCAST, dl, WiderVT, Expanded); + + SmallVector Elts; + for (unsigned i = 0, e = NumElts; i != e; ++i) + Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, + ToVecInt, DAG.getIntPtrConstant(i))); + + Results.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, DstVT, Elts)); } } } diff --git a/test/CodeGen/X86/lower-bitcast-v2i32.ll b/test/CodeGen/X86/lower-bitcast-v2i32.ll deleted file mode 100644 index 1c0de630ef8c..000000000000 --- a/test/CodeGen/X86/lower-bitcast-v2i32.ll +++ /dev/null @@ -1,80 +0,0 @@ -; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 | FileCheck %s - - -define double @test1(double %A) { - %1 = bitcast double %A to <2 x i32> - %add = add <2 x i32> %1, - %2 = bitcast <2 x i32> %add to double - ret double %2 -} -; FIXME: Ideally we should be able to fold the entire body of @test1 into a -; single paddd instruction. At the moment we produce the sequence -; pshufd+paddq+pshufd. - -; CHECK-LABEL: test1 -; CHECK-NOT: movsd -; CHECK: pshufd -; CHECK-NEXT: paddq -; CHECK-NEXT: pshufd -; CHECK-NEXT: ret - - -define double @test2(double %A, double %B) { - %1 = bitcast double %A to <2 x i32> - %2 = bitcast double %B to <2 x i32> - %add = add <2 x i32> %1, %2 - %3 = bitcast <2 x i32> %add to double - ret double %3 -} -; FIXME: Ideally we should be able to fold the entire body of @test2 into a -; single 'paddd %xmm1, %xmm0' instruction. At the moment we produce the -; sequence pshufd+pshufd+paddq+pshufd. - -; CHECK-LABEL: test2 -; CHECK-NOT: movsd -; CHECK: pshufd -; CHECK-NEXT: pshufd -; CHECK-NEXT: paddq -; CHECK-NEXT: pshufd -; CHECK-NEXT: ret - - -define i64 @test3(i64 %A) { - %1 = bitcast i64 %A to <2 x float> - %add = fadd <2 x float> %1, - %2 = bitcast <2 x float> %add to i64 - ret i64 %2 -} -; CHECK-LABEL: test3 -; CHECK-NOT: pshufd -; CHECK: addps -; CHECK-NOT: pshufd -; CHECK: ret - - -define i64 @test4(i64 %A) { - %1 = bitcast i64 %A to <2 x i32> - %add = add <2 x i32> %1, - %2 = bitcast <2 x i32> %add to i64 - ret i64 %2 -} -; FIXME: At the moment we still produce the sequence pshufd+paddq+pshufd. -; Ideally, we should fold that sequence into a single paddd. - -; CHECK-LABEL: test4 -; CHECK: pshufd -; CHECK-NEXT: paddq -; CHECK-NEXT: pshufd -; CHECK: ret - - -define double @test5(double %A) { - %1 = bitcast double %A to <2 x float> - %add = fadd <2 x float> %1, - %2 = bitcast <2 x float> %add to double - ret double %2 -} -; CHECK-LABEL: test5 -; CHECK: addps -; CHECK-NEXT: ret - diff --git a/test/CodeGen/X86/lower-bitcast.ll b/test/CodeGen/X86/lower-bitcast.ll new file mode 100644 index 000000000000..b9b29a558e26 --- /dev/null +++ b/test/CodeGen/X86/lower-bitcast.ll @@ -0,0 +1,155 @@ +; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 | FileCheck %s + + +define double @test1(double %A) { + %1 = bitcast double %A to <2 x i32> + %add = add <2 x i32> %1, + %2 = bitcast <2 x i32> %add to double + ret double %2 +} +; FIXME: Ideally we should be able to fold the entire body of @test1 into a +; single paddd instruction. At the moment we produce the sequence +; pshufd+paddq+pshufd. + +; CHECK-LABEL: test1 +; CHECK-NOT: movsd +; CHECK: pshufd +; CHECK-NEXT: paddq +; CHECK-NEXT: pshufd +; CHECK-NEXT: ret + + +define double @test2(double %A, double %B) { + %1 = bitcast double %A to <2 x i32> + %2 = bitcast double %B to <2 x i32> + %add = add <2 x i32> %1, %2 + %3 = bitcast <2 x i32> %add to double + ret double %3 +} +; FIXME: Ideally we should be able to fold the entire body of @test2 into a +; single 'paddd %xmm1, %xmm0' instruction. At the moment we produce the +; sequence pshufd+pshufd+paddq+pshufd. + +; CHECK-LABEL: test2 +; CHECK-NOT: movsd +; CHECK: pshufd +; CHECK-NEXT: pshufd +; CHECK-NEXT: paddq +; CHECK-NEXT: pshufd +; CHECK-NEXT: ret + + +define i64 @test3(i64 %A) { + %1 = bitcast i64 %A to <2 x float> + %add = fadd <2 x float> %1, + %2 = bitcast <2 x float> %add to i64 + ret i64 %2 +} +; CHECK-LABEL: test3 +; CHECK-NOT: pshufd +; CHECK: addps +; CHECK-NOT: pshufd +; CHECK: ret + + +define i64 @test4(i64 %A) { + %1 = bitcast i64 %A to <2 x i32> + %add = add <2 x i32> %1, + %2 = bitcast <2 x i32> %add to i64 + ret i64 %2 +} +; FIXME: At the moment we still produce the sequence pshufd+paddq+pshufd. +; Ideally, we should fold that sequence into a single paddd. + +; CHECK-LABEL: test4 +; CHECK: pshufd +; CHECK-NEXT: paddq +; CHECK-NEXT: pshufd +; CHECK: ret + + +define double @test5(double %A) { + %1 = bitcast double %A to <2 x float> + %add = fadd <2 x float> %1, + %2 = bitcast <2 x float> %add to double + ret double %2 +} +; CHECK-LABEL: test5 +; CHECK: addps +; CHECK-NEXT: ret + + +define double @test6(double %A) { + %1 = bitcast double %A to <4 x i16> + %add = add <4 x i16> %1, + %2 = bitcast <4 x i16> %add to double + ret double %2 +} +; FIXME: Ideally we should be able to fold the entire body of @test6 into a +; single paddw instruction. + +; CHECK-LABEL: test6 +; CHECK-NOT: movsd +; CHECK: punpcklwd +; CHECK-NEXT: paddd +; CHECK-NEXT: pshufb +; CHECK-NEXT: ret + + +define double @test7(double %A, double %B) { + %1 = bitcast double %A to <4 x i16> + %2 = bitcast double %B to <4 x i16> + %add = add <4 x i16> %1, %2 + %3 = bitcast <4 x i16> %add to double + ret double %3 +} +; FIXME: Ideally we should be able to fold the entire body of @test7 into a +; single 'paddw %xmm1, %xmm0' instruction. At the moment we produce the +; sequence pshufd+pshufd+paddd+pshufd. + +; CHECK-LABEL: test7 +; CHECK-NOT: movsd +; CHECK: punpcklwd +; CHECK-NEXT: punpcklwd +; CHECK-NEXT: paddd +; CHECK-NEXT: pshufb +; CHECK-NEXT: ret + + +define double @test8(double %A) { + %1 = bitcast double %A to <8 x i8> + %add = add <8 x i8> %1, + %2 = bitcast <8 x i8> %add to double + ret double %2 +} +; FIXME: Ideally we should be able to fold the entire body of @test8 into a +; single paddb instruction. At the moment we produce the sequence +; pshufd+paddw+pshufd. + +; CHECK-LABEL: test8 +; CHECK-NOT: movsd +; CHECK: punpcklbw +; CHECK-NEXT: paddw +; CHECK-NEXT: pshufb +; CHECK-NEXT: ret + + +define double @test9(double %A, double %B) { + %1 = bitcast double %A to <8 x i8> + %2 = bitcast double %B to <8 x i8> + %add = add <8 x i8> %1, %2 + %3 = bitcast <8 x i8> %add to double + ret double %3 +} +; FIXME: Ideally we should be able to fold the entire body of @test9 into a +; single 'paddb %xmm1, %xmm0' instruction. At the moment we produce the +; sequence pshufd+pshufd+paddw+pshufd. + +; CHECK-LABEL: test9 +; CHECK-NOT: movsd +; CHECK: punpcklbw +; CHECK-NEXT: punpcklbw +; CHECK-NEXT: paddw +; CHECK-NEXT: pshufb +; CHECK-NEXT: ret + diff --git a/test/CodeGen/X86/ret-mmx.ll b/test/CodeGen/X86/ret-mmx.ll index 7adf307ce0fb..fc9c78d1bbeb 100644 --- a/test/CodeGen/X86/ret-mmx.ll +++ b/test/CodeGen/X86/ret-mmx.ll @@ -33,8 +33,8 @@ define <2 x i32> @t3() nounwind { define double @t4() nounwind { ret double bitcast (<2 x i32> to double) ; CHECK-LABEL: t4: -; CHECK-NOT: movl $1 +; CHECK: movl $1 ; CHECK-NOT: pshufd -; CHECK: movsd {{.*}}, %xmm0 +; CHECK: movd {{.*}}, %xmm0 } From c64771062bcf54f51887df75e8b06f80bfca9497 Mon Sep 17 00:00:00 2001 From: Diego Novillo Date: Thu, 22 May 2014 17:19:01 +0000 Subject: [PATCH 080/906] Remove LLVMContextImpl::optimizationRemarkEnabledFor. Summary: This patch moves the handling of -pass-remarks* over to lib/DiagnosticInfo.cpp. This allows the removal of the optimizationRemarkEnabledFor functions from LLVMContextImpl, as they're not needed anymore. Reviewers: qcolombet Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D3878 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209453 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/DiagnosticInfo.h | 11 ++--- lib/IR/DiagnosticInfo.cpp | 77 ++++++++++++++++++++++++++++---- lib/IR/LLVMContext.cpp | 6 +-- lib/IR/LLVMContextImpl.cpp | 77 -------------------------------- lib/IR/LLVMContextImpl.h | 9 ---- 5 files changed, 75 insertions(+), 105 deletions(-) diff --git a/include/llvm/IR/DiagnosticInfo.h b/include/llvm/IR/DiagnosticInfo.h index 8e334c996d95..e78a42b99779 100644 --- a/include/llvm/IR/DiagnosticInfo.h +++ b/include/llvm/IR/DiagnosticInfo.h @@ -271,10 +271,7 @@ class DiagnosticInfoOptimizationRemarkBase : public DiagnosticInfo { /// or -pass-remarks-analysis). Note that this only handles the LLVM /// flags. We cannot access Clang flags from here (they are handled /// in BackendConsumer::OptimizationRemarkHandler). - /// - /// \p pImpl points to the current LLVM context. It is needed to query the - /// value of the command line flag associated with this remark. - virtual bool isEnabled(LLVMContextImpl *pImpl) const = 0; + virtual bool isEnabled() const = 0; /// Return true if location information is available for this diagnostic. bool isLocationAvailable() const; @@ -332,7 +329,7 @@ class DiagnosticInfoOptimizationRemark } /// \see DiagnosticInfoOptimizationRemarkBase::isEnabled. - virtual bool isEnabled(LLVMContextImpl *pImpl) const override; + virtual bool isEnabled() const override; }; /// Diagnostic information for missed-optimization remarks. @@ -359,7 +356,7 @@ class DiagnosticInfoOptimizationRemarkMissed } /// \see DiagnosticInfoOptimizationRemarkBase::isEnabled. - virtual bool isEnabled(LLVMContextImpl *pImpl) const override; + virtual bool isEnabled() const override; }; /// Diagnostic information for optimization analysis remarks. @@ -387,7 +384,7 @@ class DiagnosticInfoOptimizationRemarkAnalysis } /// \see DiagnosticInfoOptimizationRemarkBase::isEnabled. - virtual bool isEnabled(LLVMContextImpl *pImpl) const override; + virtual bool isEnabled() const override; }; // Create wrappers for C Binding types (see CBindingWrapping.h). diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp index 68a69183afff..6eeb16220eac 100644 --- a/lib/IR/DiagnosticInfo.cpp +++ b/lib/IR/DiagnosticInfo.cpp @@ -23,10 +23,69 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Support/Atomic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Regex.h" #include using namespace llvm; +namespace { + +/// \brief Regular expression corresponding to the value given in one of the +/// -pass-remarks* command line flags. Passes whose name matches this regexp +/// will emit a diagnostic when calling the associated diagnostic function +/// (emitOptimizationRemark, emitOptimizationRemarkMissed or +/// emitOptimizationRemarkAnalysis). +struct PassRemarksOpt { + std::shared_ptr Pattern; + + void operator=(const std::string &Val) { + // Create a regexp object to match pass names for emitOptimizationRemark. + if (!Val.empty()) { + Pattern = std::make_shared(Val); + std::string RegexError; + if (!Pattern->isValid(RegexError)) + report_fatal_error("Invalid regular expression '" + Val + + "' in -pass-remarks: " + RegexError, + false); + } + }; +}; + +static PassRemarksOpt PassRemarksOptLoc; +static PassRemarksOpt PassRemarksMissedOptLoc; +static PassRemarksOpt PassRemarksAnalysisOptLoc; + +// -pass-remarks +// Command line flag to enable emitOptimizationRemark() +static cl::opt> +PassRemarks("pass-remarks", cl::value_desc("pattern"), + cl::desc("Enable optimization remarks from passes whose name match " + "the given regular expression"), + cl::Hidden, cl::location(PassRemarksOptLoc), cl::ValueRequired, + cl::ZeroOrMore); + +// -pass-remarks-missed +// Command line flag to enable emitOptimizationRemarkMissed() +static cl::opt> PassRemarksMissed( + "pass-remarks-missed", cl::value_desc("pattern"), + cl::desc("Enable missed optimization remarks from passes whose name match " + "the given regular expression"), + cl::Hidden, cl::location(PassRemarksMissedOptLoc), cl::ValueRequired, + cl::ZeroOrMore); + +// -pass-remarks-analysis +// Command line flag to enable emitOptimizationRemarkAnalysis() +static cl::opt> +PassRemarksAnalysis( + "pass-remarks-analysis", cl::value_desc("pattern"), + cl::desc( + "Enable optimization analysis remarks from passes whose name match " + "the given regular expression"), + cl::Hidden, cl::location(PassRemarksAnalysisOptLoc), cl::ValueRequired, + cl::ZeroOrMore); +} + int llvm::getNextAvailablePluginDiagnosticKind() { static sys::cas_flag PluginKindID = DK_FirstPluginKind; return (int)sys::AtomicIncrement(&PluginKindID); @@ -94,19 +153,19 @@ void DiagnosticInfoOptimizationRemarkBase::print(DiagnosticPrinter &DP) const { DP << getLocationStr() << ": " << getMsg(); } -bool -DiagnosticInfoOptimizationRemark::isEnabled(LLVMContextImpl *pImpl) const { - return pImpl->optimizationRemarkEnabledFor(this); +bool DiagnosticInfoOptimizationRemark::isEnabled() const { + return PassRemarksOptLoc.Pattern && + PassRemarksOptLoc.Pattern->match(getPassName()); } -bool DiagnosticInfoOptimizationRemarkMissed::isEnabled( - LLVMContextImpl *pImpl) const { - return pImpl->optimizationRemarkEnabledFor(this); +bool DiagnosticInfoOptimizationRemarkMissed::isEnabled() const { + return PassRemarksMissedOptLoc.Pattern && + PassRemarksMissedOptLoc.Pattern->match(getPassName()); } -bool DiagnosticInfoOptimizationRemarkAnalysis::isEnabled( - LLVMContextImpl *pImpl) const { - return pImpl->optimizationRemarkEnabledFor(this); +bool DiagnosticInfoOptimizationRemarkAnalysis::isEnabled() const { + return PassRemarksAnalysisOptLoc.Pattern && + PassRemarksAnalysisOptLoc.Pattern->match(getPassName()); } void llvm::emitOptimizationRemark(LLVMContext &Ctx, const char *PassName, diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp index 7b75d42b85e1..de825f00b207 100644 --- a/lib/IR/LLVMContext.cpp +++ b/lib/IR/LLVMContext.cpp @@ -148,15 +148,15 @@ void LLVMContext::diagnose(const DiagnosticInfo &DI) { // diagnostic and return. switch (DI.getKind()) { case llvm::DK_OptimizationRemark: - if (!cast(DI).isEnabled(pImpl)) + if (!cast(DI).isEnabled()) return; break; case llvm::DK_OptimizationRemarkMissed: - if (!cast(DI).isEnabled(pImpl)) + if (!cast(DI).isEnabled()) return; break; case llvm::DK_OptimizationRemarkAnalysis: - if (!cast(DI).isEnabled(pImpl)) + if (!cast(DI).isEnabled()) return; break; default: diff --git a/lib/IR/LLVMContextImpl.cpp b/lib/IR/LLVMContextImpl.cpp index 24d325246d25..4c2791f0a8d5 100644 --- a/lib/IR/LLVMContextImpl.cpp +++ b/lib/IR/LLVMContextImpl.cpp @@ -16,8 +16,6 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Module.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Regex.h" #include using namespace llvm; @@ -47,81 +45,6 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C) NamedStructTypesUniqueID = 0; } -namespace { - -/// \brief Regular expression corresponding to the value given in one of the -/// -pass-remarks* command line flags. Passes whose name matches this regexp -/// will emit a diagnostic when calling the associated diagnostic function -/// (emitOptimizationRemark, emitOptimizationRemarkMissed or -/// emitOptimizationRemarkAnalysis). -struct PassRemarksOpt { - std::shared_ptr Pattern; - - void operator=(const std::string &Val) { - // Create a regexp object to match pass names for emitOptimizationRemark. - if (!Val.empty()) { - Pattern = std::make_shared(Val); - std::string RegexError; - if (!Pattern->isValid(RegexError)) - report_fatal_error("Invalid regular expression '" + Val + - "' in -pass-remarks: " + RegexError, - false); - } - }; -}; - -static PassRemarksOpt PassRemarksOptLoc; -static PassRemarksOpt PassRemarksMissedOptLoc; -static PassRemarksOpt PassRemarksAnalysisOptLoc; - -// -pass-remarks -// Command line flag to enable emitOptimizationRemark() -static cl::opt> -PassRemarks("pass-remarks", cl::value_desc("pattern"), - cl::desc("Enable optimization remarks from passes whose name match " - "the given regular expression"), - cl::Hidden, cl::location(PassRemarksOptLoc), cl::ValueRequired, - cl::ZeroOrMore); - -// -pass-remarks-missed -// Command line flag to enable emitOptimizationRemarkMissed() -static cl::opt> PassRemarksMissed( - "pass-remarks-missed", cl::value_desc("pattern"), - cl::desc("Enable missed optimization remarks from passes whose name match " - "the given regular expression"), - cl::Hidden, cl::location(PassRemarksMissedOptLoc), cl::ValueRequired, - cl::ZeroOrMore); - -// -pass-remarks-analysis -// Command line flag to enable emitOptimizationRemarkAnalysis() -static cl::opt> -PassRemarksAnalysis( - "pass-remarks-analysis", cl::value_desc("pattern"), - cl::desc( - "Enable optimization analysis remarks from passes whose name match " - "the given regular expression"), - cl::Hidden, cl::location(PassRemarksAnalysisOptLoc), cl::ValueRequired, - cl::ZeroOrMore); -} - -bool LLVMContextImpl::optimizationRemarkEnabledFor( - const DiagnosticInfoOptimizationRemark *DI) const { - return PassRemarksOptLoc.Pattern && - PassRemarksOptLoc.Pattern->match(DI->getPassName()); -} - -bool LLVMContextImpl::optimizationRemarkEnabledFor( - const DiagnosticInfoOptimizationRemarkMissed *DI) const { - return PassRemarksMissedOptLoc.Pattern && - PassRemarksMissedOptLoc.Pattern->match(DI->getPassName()); -} - -bool LLVMContextImpl::optimizationRemarkEnabledFor( - const DiagnosticInfoOptimizationRemarkAnalysis *DI) const { - return PassRemarksAnalysisOptLoc.Pattern && - PassRemarksAnalysisOptLoc.Pattern->match(DI->getPassName()); -} - namespace { struct DropReferences { // Takes the value_type of a ConstantUniqueMap's internal map, whose 'second' diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h index 6ad9b8a70afb..808c239bff5e 100644 --- a/lib/IR/LLVMContextImpl.h +++ b/lib/IR/LLVMContextImpl.h @@ -374,15 +374,6 @@ class LLVMContextImpl { typedef DenseMap PrefixDataMapTy; PrefixDataMapTy PrefixDataMap; - /// \brief Return true if the given pass name should emit optimization - /// remarks. - bool optimizationRemarkEnabledFor( - const DiagnosticInfoOptimizationRemark *DI) const; - bool optimizationRemarkEnabledFor( - const DiagnosticInfoOptimizationRemarkMissed *DI) const; - bool optimizationRemarkEnabledFor( - const DiagnosticInfoOptimizationRemarkAnalysis *DI) const; - int getOrAddScopeRecordIdxEntry(MDNode *N, int ExistingIdx); int getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,int ExistingIdx); From 351c658fc228e92d887bbad470e20ce9bb662049 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 22 May 2014 17:45:20 +0000 Subject: [PATCH 081/906] R600/SI: Move instruction pattern to instruction definition git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209454 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 76906f86929e..439815104234 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1252,7 +1252,9 @@ defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32", let neverHasSideEffects = 1 in { defm V_MAD_LEGACY_F32 : VOP3_32 <0x00000140, "V_MAD_LEGACY_F32", []>; -defm V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32", []>; +defm V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32", + [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))] +>; defm V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24", [(set i32:$dst, (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2))] >; @@ -2075,11 +2077,6 @@ def : Pat < // VOP3 Patterns //===----------------------------------------------------------------------===// -def : Pat < - (f32 (fadd (fmul f32:$src0, f32:$src1), f32:$src2)), - (V_MAD_F32 $src0, $src1, $src2) ->; - def : Pat < (fadd f64:$src0, f64:$src1), (V_ADD_F64 $src0, $src1, (i64 0)) From bd0283a5f29ad439ca6ffec2f44ffb3f503e8ba0 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 22 May 2014 17:49:33 +0000 Subject: [PATCH 082/906] Return false if we're not going to do anything. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209455 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/EarlyIfConversion.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index 2d47be71bea0..c4706328ea52 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -780,7 +780,7 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { if (!MF.getTarget() .getSubtarget() .enableEarlyIfConversion()) - return true; + return false; TII = MF.getTarget().getInstrInfo(); TRI = MF.getTarget().getRegisterInfo(); From f49da4338a71b7a6053ce8bfcb070a9f145b4d69 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 22 May 2014 18:00:15 +0000 Subject: [PATCH 083/906] R600: Add intrinsics for mad24 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209456 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 10 ++++++++++ lib/Target/R600/AMDGPUISelLowering.h | 2 ++ lib/Target/R600/AMDGPUInstrInfo.td | 7 +++++++ lib/Target/R600/AMDGPUInstructions.td | 11 +++++++++++ lib/Target/R600/AMDGPUIntrinsics.td | 2 ++ lib/Target/R600/CaymanInstructions.td | 4 +++- lib/Target/R600/SIInstructions.td | 8 ++++++-- test/CodeGen/R600/llvm.AMDGPU.imad24.ll | 14 ++++++++++++++ test/CodeGen/R600/llvm.AMDGPU.umad24.ll | 12 ++++++++++++ 9 files changed, 67 insertions(+), 3 deletions(-) create mode 100644 test/CodeGen/R600/llvm.AMDGPU.imad24.ll create mode 100644 test/CodeGen/R600/llvm.AMDGPU.umad24.ll diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index d63cd1d281c5..30a9ed7ea744 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -740,6 +740,14 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(AMDGPUISD::MUL_I24, DL, VT, Op.getOperand(1), Op.getOperand(2)); + case AMDGPUIntrinsic::AMDGPU_umad24: + return DAG.getNode(AMDGPUISD::MAD_U24, DL, VT, + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case AMDGPUIntrinsic::AMDGPU_imad24: + return DAG.getNode(AMDGPUISD::MAD_I24, DL, VT, + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case AMDGPUIntrinsic::AMDGPU_bfe_i32: return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT, Op.getOperand(1), @@ -1432,6 +1440,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(BFM) NODE_NAME_CASE(MUL_U24) NODE_NAME_CASE(MUL_I24) + NODE_NAME_CASE(MAD_U24) + NODE_NAME_CASE(MAD_I24) NODE_NAME_CASE(URECIP) NODE_NAME_CASE(DOT4) NODE_NAME_CASE(EXPORT) diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index bf6916259aff..b876c521d322 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -186,6 +186,8 @@ enum { BFM, // Insert a range of bits into a 32-bit word. MUL_U24, MUL_I24, + MAD_U24, + MAD_I24, TEXTURE_FETCH, EXPORT, CONST_ADDRESS, diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td index 258d5a60ef37..f96dbb4d8a1b 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.td +++ b/lib/Target/R600/AMDGPUInstrInfo.td @@ -100,3 +100,10 @@ def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp, def AMDGPUmul_i24 : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp, [SDNPCommutative] >; + +def AMDGPUmad_u24 : SDNode<"AMDGPUISD::MAD_U24", AMDGPUDTIntTernaryOp, + [] +>; +def AMDGPUmad_i24 : SDNode<"AMDGPUISD::MAD_I24", AMDGPUDTIntTernaryOp, + [] +>; diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index ba7cac476190..127b74a0edbb 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -423,6 +423,17 @@ class UMUL24Pattern : Pat < >; */ +class IMad24Pat : Pat < + (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2), + (Inst $src0, $src1, $src2) +>; + +class UMad24Pat : Pat < + (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2), + (Inst $src0, $src1, $src2) +>; + + include "R600Instructions.td" include "R700Instructions.td" include "EvergreenInstructions.td" diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td index 9f30bd8f1c98..9ad5e72d3f0c 100644 --- a/lib/Target/R600/AMDGPUIntrinsics.td +++ b/lib/Target/R600/AMDGPUIntrinsics.td @@ -51,6 +51,8 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_umul24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_imul24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_imad24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_umad24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_AMDGPU_bfi : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; diff --git a/lib/Target/R600/CaymanInstructions.td b/lib/Target/R600/CaymanInstructions.td index 837d6025339f..6dded24c6fef 100644 --- a/lib/Target/R600/CaymanInstructions.td +++ b/lib/Target/R600/CaymanInstructions.td @@ -21,12 +21,14 @@ def isCayman : Predicate<"Subtarget.hasCaymanISA()">; let Predicates = [isCayman] in { def MULADD_INT24_cm : R600_3OP <0x08, "MULADD_INT24", - [(set i32:$dst, (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2))], VecALU + [(set i32:$dst, (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2))], VecALU >; def MUL_INT24_cm : R600_2OP <0x5B, "MUL_INT24", [(set i32:$dst, (AMDGPUmul_i24 i32:$src0, i32:$src1))], VecALU >; +def : IMad24Pat; + let isVector = 1 in { def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 439815104234..500fa7894c29 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1256,13 +1256,14 @@ defm V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32", [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))] >; defm V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24", - [(set i32:$dst, (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2))] + [(set i32:$dst, (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2))] >; defm V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24", - [(set i32:$dst, (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2))] + [(set i32:$dst, (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2))] >; } // End neverHasSideEffects + defm V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>; defm V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>; defm V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>; @@ -2077,6 +2078,9 @@ def : Pat < // VOP3 Patterns //===----------------------------------------------------------------------===// +def : IMad24Pat; +def : UMad24Pat; + def : Pat < (fadd f64:$src0, f64:$src1), (V_ADD_F64 $src0, $src1, (i64 0)) diff --git a/test/CodeGen/R600/llvm.AMDGPU.imad24.ll b/test/CodeGen/R600/llvm.AMDGPU.imad24.ll new file mode 100644 index 000000000000..c7a3660b0b9a --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.imad24.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s + +declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) nounwind readnone + +; FUNC-LABEL: @test_imad24 +; SI: V_MAD_I32_I24 +; CM: MULADD_INT24 +define void @test_imad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind { + %mad = call i32 @llvm.AMDGPU.imad24(i32 %src0, i32 %src1, i32 %src2) nounwind readnone + store i32 %mad, i32 addrspace(1)* %out, align 4 + ret void +} + diff --git a/test/CodeGen/R600/llvm.AMDGPU.umad24.ll b/test/CodeGen/R600/llvm.AMDGPU.umad24.ll new file mode 100644 index 000000000000..08daac54424a --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.umad24.ll @@ -0,0 +1,12 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare i32 @llvm.AMDGPU.umad24(i32, i32, i32) nounwind readnone + +; FUNC-LABEL: @test_umad24 +; SI: V_MAD_U32_U24 +define void @test_umad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind { + %mad = call i32 @llvm.AMDGPU.umad24(i32 %src0, i32 %src1, i32 %src2) nounwind readnone + store i32 %mad, i32 addrspace(1)* %out, align 4 + ret void +} + From 21851f9adb7d538df6d350c7adcd22ff44b31cf7 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 22 May 2014 18:00:20 +0000 Subject: [PATCH 084/906] R600: Expand mad24 for GPUs without it git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209457 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUInstructions.td | 10 ++++++++++ lib/Target/R600/CaymanInstructions.td | 1 + lib/Target/R600/EvergreenInstructions.td | 8 +++++++- lib/Target/R600/R600Instructions.td | 6 ++++++ test/CodeGen/R600/llvm.AMDGPU.imad24.ll | 7 +++++++ test/CodeGen/R600/llvm.AMDGPU.umad24.ll | 7 +++++++ 6 files changed, 38 insertions(+), 1 deletion(-) diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 127b74a0edbb..8a9ab54657ff 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -433,6 +433,16 @@ class UMad24Pat : Pat < (Inst $src0, $src1, $src2) >; +class IMad24ExpandPat : Pat < + (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2), + (AddInst (MulInst $src0, $src1), $src2) +>; + +class UMad24ExpandPat : Pat < + (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2), + (AddInst (MulInst $src0, $src1), $src2) +>; + include "R600Instructions.td" include "R700Instructions.td" diff --git a/lib/Target/R600/CaymanInstructions.td b/lib/Target/R600/CaymanInstructions.td index 6dded24c6fef..58424a61722d 100644 --- a/lib/Target/R600/CaymanInstructions.td +++ b/lib/Target/R600/CaymanInstructions.td @@ -49,6 +49,7 @@ def COS_cm : COS_Common<0x8E>; def : POW_Common ; defm DIV_cm : DIV_Common; +def : UMad24ExpandPat; // RECIP_UINT emulation for Cayman // The multiplication scales from [0,1] to the unsigned integer range diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td index d9931c81d625..77416671a903 100644 --- a/lib/Target/R600/EvergreenInstructions.td +++ b/lib/Target/R600/EvergreenInstructions.td @@ -75,6 +75,9 @@ def COS_eg : COS_Common<0x8E>; def : POW_Common ; def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; +def : IMad24ExpandPat; +def : UMad24ExpandPat; + //===----------------------------------------------------------------------===// // Memory read/write instructions //===----------------------------------------------------------------------===// @@ -301,8 +304,11 @@ def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT", >; def MULADD_UINT24_eg : R600_3OP <0x10, "MULADD_UINT24", - [(set i32:$dst, (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2))], VecALU + [(set i32:$dst, (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2))], VecALU >; + +def : UMad24Pat; + def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>; def : ROTRPattern ; def MULADD_eg : MULADD_Common<0x14>; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index d2075c0577b0..0c804ffe5d4b 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1625,6 +1625,12 @@ def : DwordAddrPat ; } // End isR600toCayman Predicate +let Predicates = [isR600] in { +// Intrinsic patterns +def : IMad24ExpandPat; +def : UMad24ExpandPat; +} // End isR600 + def getLDSNoRetOp : InstrMapping { let FilterClass = "R600_LDS_1A1D"; let RowFields = ["BaseOp"]; diff --git a/test/CodeGen/R600/llvm.AMDGPU.imad24.ll b/test/CodeGen/R600/llvm.AMDGPU.imad24.ll index c7a3660b0b9a..95795ea63b93 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.imad24.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.imad24.ll @@ -1,11 +1,18 @@ ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; XUN: llc -march=r600 -mcpu=r770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s + +; FIXME: Store of i32 seems to be broken pre-EG somehow? declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) nounwind readnone ; FUNC-LABEL: @test_imad24 ; SI: V_MAD_I32_I24 ; CM: MULADD_INT24 +; R600: MULLO_INT +; R600: ADD_INT define void @test_imad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind { %mad = call i32 @llvm.AMDGPU.imad24(i32 %src0, i32 %src1, i32 %src2) nounwind readnone store i32 %mad, i32 addrspace(1)* %out, align 4 diff --git a/test/CodeGen/R600/llvm.AMDGPU.umad24.ll b/test/CodeGen/R600/llvm.AMDGPU.umad24.ll index 08daac54424a..afdfb18a563b 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.umad24.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.umad24.ll @@ -1,9 +1,16 @@ ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; XUN: llc -march=r600 -mcpu=rv770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s declare i32 @llvm.AMDGPU.umad24(i32, i32, i32) nounwind readnone ; FUNC-LABEL: @test_umad24 ; SI: V_MAD_U32_U24 +; EG: MULADD_UINT24 +; R600: MULLO_UINT +; R600: ADD_INT define void @test_umad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind { %mad = call i32 @llvm.AMDGPU.umad24(i32 %src0, i32 %src1, i32 %src2) nounwind readnone store i32 %mad, i32 addrspace(1)* %out, align 4 From cb0402e9a4361c747aacfc5b9afc5946055bce3a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 22 May 2014 18:00:24 +0000 Subject: [PATCH 085/906] R600: Expand mul24 for GPUs without it git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209458 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUInstructions.td | 29 +++++++++++++++++------- lib/Target/R600/CaymanInstructions.td | 2 +- lib/Target/R600/EvergreenInstructions.td | 3 +-- lib/Target/R600/R600Instructions.td | 4 ++-- test/CodeGen/R600/llvm.AMDGPU.imul24.ll | 3 ++- test/CodeGen/R600/llvm.AMDGPU.umul24.ll | 10 ++++++-- 6 files changed, 35 insertions(+), 16 deletions(-) diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 8a9ab54657ff..80bdf5b86c48 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -433,16 +433,29 @@ class UMad24Pat : Pat < (Inst $src0, $src1, $src2) >; -class IMad24ExpandPat : Pat < - (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2), - (AddInst (MulInst $src0, $src1), $src2) ->; +multiclass Expand24IBitOps { + def _expand_imad24 : Pat < + (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2), + (AddInst (MulInst $src0, $src1), $src2) + >; -class UMad24ExpandPat : Pat < - (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2), - (AddInst (MulInst $src0, $src1), $src2) ->; + def _expand_imul24 : Pat < + (AMDGPUmul_i24 i32:$src0, i32:$src1), + (MulInst $src0, $src1) + >; +} +multiclass Expand24UBitOps { + def _expand_umad24 : Pat < + (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2), + (AddInst (MulInst $src0, $src1), $src2) + >; + + def _expand_umul24 : Pat < + (AMDGPUmul_u24 i32:$src0, i32:$src1), + (MulInst $src0, $src1) + >; +} include "R600Instructions.td" include "R700Instructions.td" diff --git a/lib/Target/R600/CaymanInstructions.td b/lib/Target/R600/CaymanInstructions.td index 58424a61722d..26303452c101 100644 --- a/lib/Target/R600/CaymanInstructions.td +++ b/lib/Target/R600/CaymanInstructions.td @@ -49,7 +49,7 @@ def COS_cm : COS_Common<0x8E>; def : POW_Common ; defm DIV_cm : DIV_Common; -def : UMad24ExpandPat; +defm : Expand24UBitOps; // RECIP_UINT emulation for Cayman // The multiplication scales from [0,1] to the unsigned integer range diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td index 77416671a903..20654419a8f4 100644 --- a/lib/Target/R600/EvergreenInstructions.td +++ b/lib/Target/R600/EvergreenInstructions.td @@ -75,8 +75,7 @@ def COS_eg : COS_Common<0x8E>; def : POW_Common ; def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; -def : IMad24ExpandPat; -def : UMad24ExpandPat; +defm : Expand24IBitOps; //===----------------------------------------------------------------------===// // Memory read/write instructions diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 0c804ffe5d4b..590fde20293e 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1627,8 +1627,8 @@ def : DwordAddrPat ; let Predicates = [isR600] in { // Intrinsic patterns -def : IMad24ExpandPat; -def : UMad24ExpandPat; +defm : Expand24IBitOps; +defm : Expand24UBitOps; } // End isR600 def getLDSNoRetOp : InstrMapping { diff --git a/test/CodeGen/R600/llvm.AMDGPU.imul24.ll b/test/CodeGen/R600/llvm.AMDGPU.imul24.ll index 33a1b8204e23..8ee3520daeae 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.imul24.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.imul24.ll @@ -1,14 +1,15 @@ ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s declare i32 @llvm.AMDGPU.imul24(i32, i32) nounwind readnone ; FUNC-LABEL: @test_imul24 ; SI: V_MUL_I32_I24 ; CM: MUL_INT24 +; R600: MULLO_INT define void @test_imul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { %mul = call i32 @llvm.AMDGPU.imul24(i32 %src0, i32 %src1) nounwind readnone store i32 %mul, i32 addrspace(1)* %out, align 4 ret void } - diff --git a/test/CodeGen/R600/llvm.AMDGPU.umul24.ll b/test/CodeGen/R600/llvm.AMDGPU.umul24.ll index 21f824a65fc7..72a36029fb31 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.umul24.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.umul24.ll @@ -1,11 +1,17 @@ ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; XUN: llc -march=r600 -mcpu=r770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s declare i32 @llvm.AMDGPU.umul24(i32, i32) nounwind readnone -; SI-LABEL: @test_umul24 +; FUNC-LABEL: @test_umul24 +; SI: V_MUL_U32_U24 +; R600: MUL_UINT24 +; R600: MULLO_UINT define void @test_umul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { %mul = call i32 @llvm.AMDGPU.umul24(i32 %src0, i32 %src1) nounwind readnone store i32 %mul, i32 addrspace(1)* %out, align 4 ret void } - From 9859540b065992fe3975ef2e4c9ad79069a59d7b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 22 May 2014 18:09:00 +0000 Subject: [PATCH 086/906] R600: Implement computeMaskedBitsForTargetNode for BFE git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209459 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 30 +++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 30a9ed7ea744..870b39eca75c 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -1482,8 +1482,14 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( unsigned Depth) const { KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything. + + APInt KnownZero2; + APInt KnownOne2; unsigned Opc = Op.getOpcode(); + switch (Opc) { + default: + break; case ISD::INTRINSIC_WO_CHAIN: { // FIXME: The intrinsic should just use the node. switch (cast(Op.getOperand(0))->getZExtValue()) { @@ -1507,7 +1513,29 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( computeKnownBitsForMinMax(Op.getOperand(0), Op.getOperand(1), KnownZero, KnownOne, DAG, Depth); break; - default: + + case AMDGPUISD::BFE_I32: + case AMDGPUISD::BFE_U32: { + ConstantSDNode *CWidth = dyn_cast(Op.getOperand(2)); + if (!CWidth) + return; + + unsigned BitWidth = 32; + uint32_t Width = CWidth->getZExtValue() & 0x1f; + if (Width == 0) { + KnownZero = APInt::getAllOnesValue(BitWidth); + KnownOne = APInt::getNullValue(BitWidth); + return; + } + + // FIXME: This could do a lot more. If offset is 0, should be the same as + // sign_extend_inreg implementation, but that involves duplicating it. + if (Opc == AMDGPUISD::BFE_I32) + KnownOne = APInt::getHighBitsSet(BitWidth, BitWidth - Width); + else + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - Width); + break; } + } } From 7e12b8262512c6ac28bdaab08b0c560fb8b6eef1 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 22 May 2014 18:09:03 +0000 Subject: [PATCH 087/906] R600: Implement ComputeNumSignBitsForTargetNode for BFE git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209460 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 25 +++++++++++++++++++++++++ lib/Target/R600/AMDGPUISelLowering.h | 5 +++++ test/CodeGen/R600/sext-in-reg.ll | 15 +++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 870b39eca75c..78e79bc98a66 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -1539,3 +1539,28 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( } } } + +unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode( + SDValue Op, + const SelectionDAG &DAG, + unsigned Depth) const { + switch (Op.getOpcode()) { + case AMDGPUISD::BFE_I32: { + ConstantSDNode *Width = dyn_cast(Op.getOperand(2)); + if (!Width) + return 1; + + unsigned SignBits = 32 - Width->getZExtValue() + 1; + ConstantSDNode *Offset = dyn_cast(Op.getOperand(1)); + if (!Offset || !Offset->isNullValue()) + return SignBits; + + // TODO: Could probably figure something out with non-0 offsets. + unsigned Op0SignBits = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1); + return std::max(SignBits, Op0SignBits); + } + + default: + return 1; + } +} diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index b876c521d322..d5d821d9364b 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -124,6 +124,11 @@ class AMDGPUTargetLowering : public TargetLowering { const SelectionDAG &DAG, unsigned Depth = 0) const override; + virtual unsigned ComputeNumSignBitsForTargetNode( + SDValue Op, + const SelectionDAG &DAG, + unsigned Depth = 0) const override; + // Functions defined in AMDILISelLowering.cpp public: bool getTgtMemIntrinsic(IntrinsicInfo &Info, diff --git a/test/CodeGen/R600/sext-in-reg.ll b/test/CodeGen/R600/sext-in-reg.ll index 90c4ba6af8f2..d2ab0b928798 100644 --- a/test/CodeGen/R600/sext-in-reg.ll +++ b/test/CodeGen/R600/sext-in-reg.ll @@ -379,3 +379,18 @@ define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 ad store i16 %tmp6, i16 addrspace(1)* %out, align 2 ret void } + +declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone + +; Make sure there isn't a redundant BFE +; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe +; SI: S_BFE_I32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000 +; SI-NOT: BFE +define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %c = add i32 %a, %b ; add to prevent folding into extload + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 0, i32 8) nounwind readnone + %shl = shl i32 %bfe, 24 + %ashr = ashr i32 %shl, 24 + store i32 %ashr, i32 addrspace(1)* %out, align 4 + ret void +} From e3ed4046729f083071ac0efcf2986c14a21207ae Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 22 May 2014 18:09:07 +0000 Subject: [PATCH 088/906] R600: Add dag combine for BFE git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209461 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 74 ++++ lib/Target/R600/R600ISelLowering.cpp | 3 +- lib/Target/R600/SIISelLowering.cpp | 3 +- test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll | 248 ++++++++++++++ test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll | 419 +++++++++++++++++++++++ test/CodeGen/R600/sext-in-reg.ll | 84 +++++ 6 files changed, 829 insertions(+), 2 deletions(-) diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 78e79bc98a66..a4cb4f5c405d 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -1292,6 +1292,17 @@ static void simplifyI24(SDValue Op, TargetLowering::DAGCombinerInfo &DCI) { DCI.CommitTargetLoweringOpt(TLO); } +template +static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0, + uint32_t Offset, uint32_t Width) { + if (Width + Offset < 32) { + IntTy Result = (Src0 << (32 - Offset - Width)) >> (32 - Width); + return DAG.getConstant(Result, MVT::i32); + } + + return DAG.getConstant(Src0 >> Offset, MVT::i32); +} + SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -1338,6 +1349,64 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SELECT_CC: { return CombineMinMax(N, DAG); } + case AMDGPUISD::BFE_I32: + case AMDGPUISD::BFE_U32: { + assert(!N->getValueType(0).isVector() && + "Vector handling of BFE not implemented"); + ConstantSDNode *Width = dyn_cast(N->getOperand(2)); + if (!Width) + break; + + uint32_t WidthVal = Width->getZExtValue() & 0x1f; + if (WidthVal == 0) + return DAG.getConstant(0, MVT::i32); + + ConstantSDNode *Offset = dyn_cast(N->getOperand(1)); + if (!Offset) + break; + + SDValue BitsFrom = N->getOperand(0); + uint32_t OffsetVal = Offset->getZExtValue() & 0x1f; + + bool Signed = N->getOpcode() == AMDGPUISD::BFE_I32; + + if (OffsetVal == 0) { + // This is already sign / zero extended, so try to fold away extra BFEs. + unsigned SignBits = Signed ? (32 - WidthVal + 1) : (32 - WidthVal); + + unsigned OpSignBits = DAG.ComputeNumSignBits(BitsFrom); + if (OpSignBits >= SignBits) + return BitsFrom; + } + + if (ConstantSDNode *Val = dyn_cast(N->getOperand(0))) { + if (Signed) { + return constantFoldBFE(DAG, + Val->getSExtValue(), + OffsetVal, + WidthVal); + } + + return constantFoldBFE(DAG, + Val->getZExtValue(), + OffsetVal, + WidthVal); + } + + APInt Demanded = APInt::getBitsSet(32, + OffsetVal, + OffsetVal + WidthVal); + APInt KnownZero, KnownOne; + TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) || + TLI.SimplifyDemandedBits(BitsFrom, Demanded, KnownZero, KnownOne, TLO)) { + DCI.CommitTargetLoweringOpt(TLO); + } + + break; + } } return SDValue(); } @@ -1560,6 +1629,11 @@ unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode( return std::max(SignBits, Op0SignBits); } + case AMDGPUISD::BFE_U32: { + ConstantSDNode *Width = dyn_cast(Op.getOperand(2)); + return Width ? 32 - (Width->getZExtValue() & 0x1f) : 1; + } + default: return 1; } diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 489565e65795..d6c68305da76 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -1762,7 +1762,8 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, NewArgs); } } - return SDValue(); + + return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); } static bool diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index b51c46c59e6e..c9e247c16656 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -1075,7 +1075,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, break; } } - return SDValue(); + + return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); } /// \brief Test if RegClass is one of the VSrc classes diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll b/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll index b3fec06f18d9..71d2b6e7c2ee 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll @@ -48,3 +48,251 @@ define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) no store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 ret void } + +; FUNC-LABEL: @bfe_i32_arg_0_width_reg_offset +; SI-NOT: BFE +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 0) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_arg_0_width_imm_offset +; SI-NOT: BFE +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 8, i32 0) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_0 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 0) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_1 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 12334, i32 0, i32 0) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_2 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 1) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_3 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 1, i32 0, i32 1) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_4 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 0, i32 1) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_5 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 7, i32 1) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_6 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0xffffff80 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 0, i32 8) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_7 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 0, i32 8) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_8 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 6, i32 8) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_9 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65536, i32 16, i32 8) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_10 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65535, i32 16, i32 16) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_11 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -6 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 4) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_12 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 31, i32 1) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_13 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 131070, i32 16, i32 16) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_14 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 40 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 2, i32 30) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_15 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 28) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_16 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 1, i32 7) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_17 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 1, i32 31) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_18 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 31, i32 1) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll b/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll index 0d478638219f..6ed1ad5d2e65 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll @@ -38,3 +38,422 @@ define void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) n store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 ret void } + +; FUNC-LABEL: @bfe_u32_arg_0_width_reg_offset +; SI-NOT: BFE +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 0) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_arg_0_width_imm_offset +; SI-NOT: BFE +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 8, i32 0) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_zextload_i8 +; SI: BUFFER_LOAD_UBYTE +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { + %load = load i8 addrspace(1)* %in + %ext = zext i8 %load to i32 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_zext_in_reg_i8 +; SI: BUFFER_LOAD_DWORD +; SI: V_ADD_I32 +; SI-NEXT: V_AND_B32_e32 +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %load = load i32 addrspace(1)* %in, align 4 + %add = add i32 %load, 1 + %ext = and i32 %add, 255 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_zext_in_reg_i16 +; SI: BUFFER_LOAD_DWORD +; SI: V_ADD_I32 +; SI-NEXT: V_AND_B32_e32 +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %load = load i32 addrspace(1)* %in, align 4 + %add = add i32 %load, 1 + %ext = and i32 %add, 65535 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 16) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_zext_in_reg_i8_offset_1 +; SI: BUFFER_LOAD_DWORD +; SI: V_ADD_I32 +; SI: BFE +; SI: S_ENDPGM +define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %load = load i32 addrspace(1)* %in, align 4 + %add = add i32 %load, 1 + %ext = and i32 %add, 255 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 1, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_zext_in_reg_i8_offset_3 +; SI: BUFFER_LOAD_DWORD +; SI: V_ADD_I32 +; SI-NEXT: V_AND_B32_e32 {{v[0-9]+}}, 0xf8 +; SI-NEXT: BFE +; SI: S_ENDPGM +define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %load = load i32 addrspace(1)* %in, align 4 + %add = add i32 %load, 1 + %ext = and i32 %add, 255 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 3, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_zext_in_reg_i8_offset_7 +; SI: BUFFER_LOAD_DWORD +; SI: V_ADD_I32 +; SI-NEXT: V_AND_B32_e32 {{v[0-9]+}}, 0x80 +; SI-NEXT: BFE +; SI: S_ENDPGM +define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %load = load i32 addrspace(1)* %in, align 4 + %add = add i32 %load, 1 + %ext = and i32 %add, 255 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 7, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_zext_in_reg_i16_offset_8 +; SI: BUFFER_LOAD_DWORD +; SI: V_ADD_I32 +; SI-NEXT: BFE +; SI: S_ENDPGM +define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %load = load i32 addrspace(1)* %in, align 4 + %add = add i32 %load, 1 + %ext = and i32 %add, 65535 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 8, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_1 +; SI: BUFFER_LOAD_DWORD +; SI: V_BFE_U32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1 +; SI: S_ENDPGM +; EG: BFE_UINT +define void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 0, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +define void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +define void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_4 +; SI-NOT: LSHL +; SI-NOT: SHR +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %shr = lshr i32 %shl, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_5 +; SI: BUFFER_LOAD_DWORD +; SI-NOT: LSHL +; SI-NOT: SHR +; SI: V_BFE_I32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1 +; SI: S_ENDPGM +define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %shr = ashr i32 %shl, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 0, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_6 +; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} +; SI: V_BFE_U32 {{v[0-9]+}}, {{v[0-9]+}}, 1, 31 +; SI: S_ENDPGM +define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 1, i32 31) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_7 +; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 31) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_8 +; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} +; SI: V_BFE_U32 {{v[0-9]+}}, {{v[0-9]+}}, 31, 1 +; SI: S_ENDPGM +define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_0 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 0) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_1 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 12334, i32 0, i32 0) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_2 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 1) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_3 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 1, i32 0, i32 1) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_4 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 0, i32 1) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_5 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 7, i32 1) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_6 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x80 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 0, i32 8) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_7 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 0, i32 8) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_8 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 6, i32 8) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_9 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFEfppppppppppppp +define void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65536, i32 16, i32 8) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_10 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65535, i32 16, i32 16) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_11 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 4) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_12 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 31, i32 1) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_13 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 131070, i32 16, i32 16) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_14 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 40 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 2, i32 30) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_15 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 28) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_16 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 1, i32 7) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/sext-in-reg.ll b/test/CodeGen/R600/sext-in-reg.ll index d2ab0b928798..404c9b8b8123 100644 --- a/test/CodeGen/R600/sext-in-reg.ll +++ b/test/CodeGen/R600/sext-in-reg.ll @@ -382,10 +382,57 @@ define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 ad declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone +; FUNC-LABEL: @bfe_0_width +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %load = load i32 addrspace(1)* %ptr, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_8_bfe_8 +; SI: V_BFE_I32 +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %load = load i32 addrspace(1)* %ptr, align 4 + %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone + %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone + store i32 %bfe1, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_8_bfe_16 +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 +; SI: S_ENDPGM +define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %load = load i32 addrspace(1)* %ptr, align 4 + %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone + %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 16) nounwind readnone + store i32 %bfe1, i32 addrspace(1)* %out, align 4 + ret void +} + +; This really should be folded into 1 +; FUNC-LABEL: @bfe_16_bfe_8 +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16 +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 +; SI: S_ENDPGM +define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %load = load i32 addrspace(1)* %ptr, align 4 + %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone + %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone + store i32 %bfe1, i32 addrspace(1)* %out, align 4 + ret void +} + ; Make sure there isn't a redundant BFE ; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe ; SI: S_BFE_I32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000 ; SI-NOT: BFE +; SI: S_ENDPGM define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %c = add i32 %a, %b ; add to prevent folding into extload %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 0, i32 8) nounwind readnone @@ -394,3 +441,40 @@ define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) n store i32 %ashr, i32 addrspace(1)* %out, align 4 ret void } + +; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe_wrong +define void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %c = add i32 %a, %b ; add to prevent folding into extload + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 8, i32 0) nounwind readnone + %shl = shl i32 %bfe, 24 + %ashr = ashr i32 %shl, 24 + store i32 %ashr, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @sextload_i8_to_i32_bfe +; SI: BUFFER_LOAD_SBYTE +; SI-NOT: BFE +; SI: S_ENDPGM +define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind { + %load = load i8 addrspace(1)* %ptr, align 1 + %sext = sext i8 %load to i32 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 0, i32 8) nounwind readnone + %shl = shl i32 %bfe, 24 + %ashr = ashr i32 %shl, 24 + store i32 %ashr, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @sextload_i8_to_i32_bfe_0: +; SI-NOT: BFE +; SI: S_ENDPGM +define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind { + %load = load i8 addrspace(1)* %ptr, align 1 + %sext = sext i8 %load to i32 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 8, i32 0) nounwind readnone + %shl = shl i32 %bfe, 24 + %ashr = ashr i32 %shl, 24 + store i32 %ashr, i32 addrspace(1)* %out, align 4 + ret void +} From 3c698f35e0ce7e8ecb761dab6a261b98996543f7 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 22 May 2014 18:09:12 +0000 Subject: [PATCH 089/906] R600: Try to convert BFE back to standard bit ops when possible. This allows existing DAG combines to work on them, and then we can re-match to BFE if necessary during instruction selection. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209462 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 21 ++++ test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll | 128 +++++++++++++++++++++++ test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll | 105 ++++++++++++++++++- test/CodeGen/R600/sext-in-reg.ll | 48 ++++++++- test/CodeGen/R600/udivrem64.ll | 6 +- 5 files changed, 297 insertions(+), 11 deletions(-) diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index a4cb4f5c405d..6c443ea828b7 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -1377,6 +1377,20 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, unsigned OpSignBits = DAG.ComputeNumSignBits(BitsFrom); if (OpSignBits >= SignBits) return BitsFrom; + + EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), WidthVal); + if (Signed) { + // This is a sign_extend_inreg. Replace it to take advantage of existing + // DAG Combines. If not eliminated, we will match back to BFE during + // selection. + + // TODO: The sext_inreg of extended types ends, although we can could + // handle them in a single BFE. + return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, BitsFrom, + DAG.getValueType(SmallVT)); + } + + return DAG.getZeroExtendInReg(BitsFrom, DL, SmallVT); } if (ConstantSDNode *Val = dyn_cast(N->getOperand(0))) { @@ -1396,6 +1410,13 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, APInt Demanded = APInt::getBitsSet(32, OffsetVal, OffsetVal + WidthVal); + + if ((OffsetVal + WidthVal) >= 32) { + SDValue ShiftVal = DAG.getConstant(OffsetVal, MVT::i32); + return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32, + BitsFrom, ShiftVal); + } + APInt KnownZero, KnownOne; TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll b/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll index 71d2b6e7c2ee..eb5094232825 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll @@ -69,6 +69,115 @@ define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i ret void } +; FUNC-LABEL: @bfe_i32_test_6 +; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} +; SI: S_ENDPGM +define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 1, i32 31) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_test_7 +; SI-NOT: SHL +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 0, i32 31) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FIXME: The shifts should be 1 BFE +; FUNC-LABEL: @bfe_i32_test_8 +; SI: BUFFER_LOAD_DWORD +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1 +; SI: S_ENDPGM +define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_test_9 +; SI-NOT: BFE +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_test_10 +; SI-NOT: BFE +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 1, i32 31) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_test_11 +; SI-NOT: BFE +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 8, i32 24) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_test_12 +; SI-NOT: BFE +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 24, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_test_13 +; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = ashr i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void +} + +; FUNC-LABEL: @bfe_i32_test_14 +; SI-NOT: LSHR +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = lshr i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void +} + ; FUNC-LABEL: @bfe_i32_constant_fold_test_0 ; SI-NOT: BFE ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 @@ -296,3 +405,22 @@ define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind { store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 ret void } + +; XXX - This should really be a single BFE, but the sext_inreg of the +; extended type i24 is never custom lowered. +; FUNC-LABEL: @bfe_sext_in_reg_i24 +; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]], +; SI: V_LSHLREV_B32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}} +; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}} +; XSI: V_BFE_I32 [[BFE:v[0-9]+]], [[LOAD]], 0, 8 +; XSI-NOT: SHL +; XSI-NOT: SHR +; XSI: BUFFER_STORE_DWORD [[BFE]], +define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 0, i32 24) + %shl = shl i32 %bfe, 8 + %ashr = ashr i32 %shl, 8 + store i32 %ashr, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll b/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll index 6ed1ad5d2e65..1a62253eeb74 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll @@ -161,9 +161,9 @@ define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrsp ; FUNC-LABEL: @bfe_u32_test_1 ; SI: BUFFER_LOAD_DWORD -; SI: V_BFE_U32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1 +; SI: V_AND_B32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}} ; SI: S_ENDPGM -; EG: BFE_UINT +; EG: AND_INT T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, 1, define void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { %x = load i32 addrspace(1)* %in, align 4 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 0, i32 1) @@ -220,7 +220,7 @@ define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw ; FUNC-LABEL: @bfe_u32_test_6 ; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} -; SI: V_BFE_U32 {{v[0-9]+}}, {{v[0-9]+}}, 1, 31 +; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} ; SI: S_ENDPGM define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { %x = load i32 addrspace(1)* %in, align 4 @@ -243,8 +243,9 @@ define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw } ; FUNC-LABEL: @bfe_u32_test_8 -; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} -; SI: V_BFE_U32 {{v[0-9]+}}, {{v[0-9]+}}, 31, 1 +; SI-NOT: BFE +; SI: V_AND_B32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}} +; SI-NOT: BFE ; SI: S_ENDPGM define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { %x = load i32 addrspace(1)* %in, align 4 @@ -254,6 +255,76 @@ define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw ret void } +; FUNC-LABEL: @bfe_u32_test_9 +; SI-NOT: BFE +; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_10 +; SI-NOT: BFE +; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 1, i32 31) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_11 +; SI-NOT: BFE +; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 8, i32 24) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_12 +; SI-NOT: BFE +; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 24, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_13 +; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = ashr i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void +} + +; FUNC-LABEL: @bfe_u32_test_14 +; SI-NOT: LSHR +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = lshr i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void +} + ; FUNC-LABEL: @bfe_u32_constant_fold_test_0 ; SI-NOT: BFE ; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 @@ -457,3 +528,27 @@ define void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind { store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 ret void } + +; FUNC-LABEL: @bfe_u32_constant_fold_test_17 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 1, i32 31) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_18 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 31, i32 1) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/sext-in-reg.ll b/test/CodeGen/R600/sext-in-reg.ll index 404c9b8b8123..1b02e4bf8015 100644 --- a/test/CodeGen/R600/sext-in-reg.ll +++ b/test/CodeGen/R600/sext-in-reg.ll @@ -417,8 +417,8 @@ define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwi ; This really should be folded into 1 ; FUNC-LABEL: @bfe_16_bfe_8 -; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 +; SI-NOT: BFE ; SI: S_ENDPGM define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { %load = load i32 addrspace(1)* %ptr, align 4 @@ -430,7 +430,7 @@ define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwi ; Make sure there isn't a redundant BFE ; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe -; SI: S_BFE_I32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000 +; SI: S_SEXT_I32_I8 s{{[0-9]+}}, s{{[0-9]+}} ; SI-NOT: BFE ; SI: S_ENDPGM define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { @@ -478,3 +478,47 @@ define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* % store i32 %ashr, i32 addrspace(1)* %out, align 4 ret void } + +; FUNC-LABEL: @sext_in_reg_i1_bfe_offset_0: +; SI-NOT: SHR +; SI-NOT: SHL +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1 +; SI: S_ENDPGM +define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %shr = ashr i32 %shl, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 0, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @sext_in_reg_i1_bfe_offset_1 +; SI: BUFFER_LOAD_DWORD +; SI-NOT: SHL +; SI-NOT: SHR +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1 +; SI: S_ENDPGM +define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 30 + %shr = ashr i32 %shl, 30 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @sext_in_reg_i2_bfe_offset_1: +; SI: BUFFER_LOAD_DWORD +; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}} +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}} +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2 +; SI: S_ENDPGM +define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 30 + %shr = ashr i32 %shl, 30 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 2) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/udivrem64.ll b/test/CodeGen/R600/udivrem64.ll index b3caebf123d7..a71315a12d80 100644 --- a/test/CodeGen/R600/udivrem64.ll +++ b/test/CodeGen/R600/udivrem64.ll @@ -3,8 +3,7 @@ ;FUNC-LABEL: @test_udiv ;EG: RECIP_UINT -;EG: BFE_UINT -;EG: BFE_UINT +;EG: LSHL {{.*}}, 1, ;EG: BFE_UINT ;EG: BFE_UINT ;EG: BFE_UINT @@ -74,8 +73,7 @@ define void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { ;EG: BFE_UINT ;EG: BFE_UINT ;EG: BFE_UINT -;EG: BFE_UINT -;EG: BFE_UINT +;EG: AND_INT {{.*}}, 1, ;SI: S_ENDPGM define void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) { %result = urem i64 %x, %y From fed4bab1480554965abfd2beaee9ddd38e65c84d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 22 May 2014 18:27:07 +0000 Subject: [PATCH 090/906] R600: Add definition for flat address space ID. Use 4 since that's probably what it will be for spir. Move ADDRESS_NONE to the end to keep the constant_buffer_* values unchanged, since apparently a bunch of r600 tests use those directly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209463 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPU.h | 7 ++++--- lib/Target/R600/AMDGPUTargetMachine.cpp | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index 5d0cf81c4025..949fdfb217ab 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -80,8 +80,8 @@ enum AddressSpaces { GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). CONSTANT_ADDRESS = 2, ///< Address space for constant memory LOCAL_ADDRESS = 3, ///< Address space for local memory. - REGION_ADDRESS = 4, ///< Address space for region memory. - ADDRESS_NONE = 5, ///< Address space for unknown memory. + FLAT_ADDRESS = 4, ///< Address space for flat memory. + REGION_ADDRESS = 5, ///< Address space for region memory. PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0) PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1) @@ -106,7 +106,8 @@ enum AddressSpaces { CONSTANT_BUFFER_13 = 21, CONSTANT_BUFFER_14 = 22, CONSTANT_BUFFER_15 = 23, - LAST_ADDRESS = 24 + ADDRESS_NONE = 24, ///< Address space for unknown memory. + LAST_ADDRESS = ADDRESS_NONE }; } // namespace AMDGPUAS diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 6b68c2abe367..174fdca3bd77 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -54,7 +54,7 @@ static std::string computeDataLayout(const AMDGPUSubtarget &ST) { if (ST.is64bit()) { // 32-bit private, local, and region pointers. 64-bit global and constant. - Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:64:64"; + Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64"; } Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256" From ff93350aa69cc7fa4ade346e550eef51fff46ceb Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 22 May 2014 19:38:25 +0000 Subject: [PATCH 091/906] Update some AliasAnalysis pass docs for getAdjustedAnalysisPointer. Patch by George Burgess. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209467 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/AliasAnalysis.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/AliasAnalysis.rst b/docs/AliasAnalysis.rst index 3bfb0953aff2..1cbaee703f30 100644 --- a/docs/AliasAnalysis.rst +++ b/docs/AliasAnalysis.rst @@ -246,6 +246,20 @@ analysis run method (``run`` for a ``Pass``, ``runOnFunction`` for a return false; } +Required methods to override +---------------------------- + +You must override the ``getAdjustedAnalysisPointer`` method on all subclasses +of ``AliasAnalysis``. An example implementation of this method would look like: + +.. code-block:: c++ + + void *getAdjustedAnalysisPointer(const void* ID) override { + if (ID == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + Interfaces which may be specified --------------------------------- From 8f33e4c5e430764ffd95d4d0cf86ef8816e745b6 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Thu, 22 May 2014 22:30:13 +0000 Subject: [PATCH 092/906] [RuntimeDyld] Teach RuntimeDyldMachO how to handle scattered VANILLA relocs on i386. This fixes two more MCJIT regression tests on i386: ExecutionEngine/MCJIT/2003-05-06-LivenessClobber.ll ExecutionEngine/MCJIT/2013-04-04-RelocAddend.ll The implementation of processScatteredVANILLA is tasteless (*ba-dum-ching*), but I'm working on a substantial tidy-up of RuntimeDyldMachO that should improve things. This patch also fixes a type-o in RuntimeDyldMachO::processSECTDIFFRelocation, and teaches that method to skip over the PAIR reloc following the SECTDIFF. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209478 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../RuntimeDyld/RuntimeDyldMachO.cpp | 64 ++++++++++++++++--- .../RuntimeDyld/RuntimeDyldMachO.h | 6 ++ 2 files changed, 61 insertions(+), 9 deletions(-) diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp index 30529808d0d5..a70b03d95cf8 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -460,7 +460,7 @@ relocation_iterator RuntimeDyldMachO::processSECTDIFFRelocation( uint32_t AddrB = MachO->getScatteredRelocationValue(RE2); section_iterator SBI = getSectionByAddress(*MachO, AddrB); - assert(SBI != MachO->section_end() && "Can't find seciton for address B"); + assert(SBI != MachO->section_end() && "Can't find section for address B"); uint64_t SectionBBase; SBI->getAddress(SectionBBase); uint64_t SectionBOffset = AddrB - SectionBBase; @@ -483,7 +483,48 @@ relocation_iterator RuntimeDyldMachO::processSECTDIFFRelocation( addRelocationForSection(R, SectionAID); addRelocationForSection(R, SectionBID); - return RelI; + return ++RelI; +} + +relocation_iterator RuntimeDyldMachO::processI386ScatteredVANILLA( + unsigned SectionID, + relocation_iterator RelI, + ObjectImage &Obj, + ObjSectionToIDMap &ObjSectionToID) { + const MachOObjectFile *MachO = + static_cast(Obj.getObjectFile()); + MachO::any_relocation_info RE = + MachO->getRelocation(RelI->getRawDataRefImpl()); + + SectionEntry &Section = Sections[SectionID]; + uint32_t RelocType = MachO->getAnyRelocationType(RE); + bool IsPCRel = MachO->getAnyRelocationPCRel(RE); + unsigned Size = MachO->getAnyRelocationLength(RE); + uint64_t Offset; + RelI->getOffset(Offset); + uint8_t *LocalAddress = Section.Address + Offset; + unsigned NumBytes = 1 << Size; + int64_t Addend = 0; + memcpy(&Addend, LocalAddress, NumBytes); + + unsigned SymbolBaseAddr = MachO->getScatteredRelocationValue(RE); + section_iterator TargetSI = getSectionByAddress(*MachO, SymbolBaseAddr); + assert(TargetSI != MachO->section_end() && "Can't find section for symbol"); + uint64_t SectionBaseAddr; + TargetSI->getAddress(SectionBaseAddr); + SectionRef TargetSection = *TargetSI; + bool IsCode; + TargetSection.isText(IsCode); + uint32_t TargetSectionID = findOrEmitSection(Obj, TargetSection, IsCode, + ObjSectionToID); + + Addend -= SectionBaseAddr; + RelocationEntry R(SectionID, Offset, RelocType, Addend, + IsPCRel, Size); + + addRelocationForSection(R, TargetSectionID); + + return ++RelI; } relocation_iterator RuntimeDyldMachO::processRelocationRef( @@ -498,17 +539,22 @@ relocation_iterator RuntimeDyldMachO::processRelocationRef( uint32_t RelType = MachO->getAnyRelocationType(RE); // FIXME: Properly handle scattered relocations. - // For now, optimistically skip these: they can often be ignored, as - // the static linker will already have applied the relocation, and it - // only needs to be reapplied if symbols move relative to one another. - // Note: This will fail horribly where the relocations *do* need to be - // applied, but that was already the case. + // Special case the couple of scattered relocations that we know how + // to handle: SECTDIFF relocations, and scattered VANILLA relocations + // on I386. + // For all other scattered relocations, just bail out and hope for the + // best, since the offsets computed by scattered relocations have often + // been optimisticaly filled in by the compiler. This will fail + // horribly where the relocations *do* need to be applied, but that was + // already the case. if (MachO->isRelocationScattered(RE)) { if (RelType == MachO::GENERIC_RELOC_SECTDIFF || RelType == MachO::GENERIC_RELOC_LOCAL_SECTDIFF) return processSECTDIFFRelocation(SectionID, RelI, Obj, ObjSectionToID); - - return ++RelI; + else if (Arch == Triple::x86 && RelType == MachO::GENERIC_RELOC_VANILLA) + return processI386ScatteredVANILLA(SectionID, RelI, Obj, ObjSectionToID); + else + return ++RelI; } RelocationValueRef Value; diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h index 138c59b95cea..6911f2f07aaa 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h @@ -71,6 +71,12 @@ class RuntimeDyldMachO : public RuntimeDyldImpl { ObjectImage &ObjImg, ObjSectionToIDMap &ObjSectionToID); + relocation_iterator processI386ScatteredVANILLA( + unsigned SectionID, + relocation_iterator RelI, + ObjectImage &ObjImg, + ObjSectionToIDMap &ObjSectionToID); + struct EHFrameRelatedSections { EHFrameRelatedSections() : EHFrameSID(RTDYLD_INVALID_SECTION_ID), From ef518f1cbb7311808398b008e6628bcc995790b3 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 22 May 2014 23:09:57 +0000 Subject: [PATCH 093/906] Make these bool bitfields. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209481 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/MC/MCTargetOptions.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/llvm/MC/MCTargetOptions.h b/include/llvm/MC/MCTargetOptions.h index b4f5a979720d..80cc8befb7a7 100644 --- a/include/llvm/MC/MCTargetOptions.h +++ b/include/llvm/MC/MCTargetOptions.h @@ -22,13 +22,13 @@ class MCTargetOptions { /// Enables AddressSanitizer instrumentation at machine level. bool SanitizeAddress : 1; - unsigned MCRelaxAll : 1; - unsigned MCNoExecStack : 1; - unsigned MCSaveTempLabels : 1; - unsigned MCUseDwarfDirectory : 1; - unsigned ShowMCEncoding : 1; - unsigned ShowMCInst : 1; - unsigned AsmVerbose : 1; + bool MCRelaxAll : 1; + bool MCNoExecStack : 1; + bool MCSaveTempLabels : 1; + bool MCUseDwarfDirectory : 1; + bool ShowMCEncoding : 1; + bool ShowMCInst : 1; + bool AsmVerbose : 1; MCTargetOptions(); }; From c86ebbd01eba02ff04ffb57537e24f2aa7b8795d Mon Sep 17 00:00:00 2001 From: "Michael J. Spencer" Date: Thu, 22 May 2014 23:32:18 +0000 Subject: [PATCH 094/906] [Graph Writer] Limit the length of the graph name because Windows can't handle it. Windows can't handle paths longer than 260 code points without \\?\. Even with \\?\ it can't handle path components longer than 255 code points. So limit graph names to the arbitrary length of 140. Random characters are still added to the end, so it's ok if graph names collide. Differential Revision: http://reviews.llvm.org/D3883 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209483 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/GraphWriter.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/llvm/Support/GraphWriter.h b/include/llvm/Support/GraphWriter.h index 660c519ed64c..539673a8beee 100644 --- a/include/llvm/Support/GraphWriter.h +++ b/include/llvm/Support/GraphWriter.h @@ -325,7 +325,10 @@ template std::string WriteGraph(const GraphType &G, const Twine &Name, bool ShortNames = false, const Twine &Title = "") { int FD; - std::string Filename = createGraphFilename(Name, FD); + // Windows can't always handle long paths, so limit the length of the name. + std::string N = Name.str(); + N = N.substr(0, std::min(N.size(), 140)); + std::string Filename = createGraphFilename(N, FD); raw_fd_ostream O(FD, /*shouldClose=*/ true); if (FD == -1) { From e318ce611f265b0a1e0bfd6e51cd3cad28324278 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Fri, 23 May 2014 00:06:56 +0000 Subject: [PATCH 095/906] ScalarEvolution: Fix handling of AddRecs in isKnownPredicate ScalarEvolution::isKnownPredicate() can wrongly reduce a comparison when both the LHS and RHS are SCEVAddRecExprs. This checks that both LHS and RHS are guarded in the case when both are SCEVAddRecExprs. The test case is against indvars because I could not find a way to directly test SCEV. Patch by Sanjay Patel! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209487 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 36 +++++++++++++++-------- test/Transforms/IndVarSimplify/pr18223.ll | 30 +++++++++++++++++++ 2 files changed, 54 insertions(+), 12 deletions(-) create mode 100644 test/Transforms/IndVarSimplify/pr18223.ll diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index dad8e07dadb1..d27afb09cf3a 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -6135,18 +6135,30 @@ bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred, // If LHS or RHS is an addrec, check to see if the condition is true in // every iteration of the loop. - if (const SCEVAddRecExpr *AR = dyn_cast(LHS)) - if (isLoopEntryGuardedByCond( - AR->getLoop(), Pred, AR->getStart(), RHS) && - isLoopBackedgeGuardedByCond( - AR->getLoop(), Pred, AR->getPostIncExpr(*this), RHS)) - return true; - if (const SCEVAddRecExpr *AR = dyn_cast(RHS)) - if (isLoopEntryGuardedByCond( - AR->getLoop(), Pred, LHS, AR->getStart()) && - isLoopBackedgeGuardedByCond( - AR->getLoop(), Pred, LHS, AR->getPostIncExpr(*this))) - return true; + // If LHS and RHS are both addrec, both conditions must be true in + // every iteration of the loop. + const SCEVAddRecExpr *LAR = dyn_cast(LHS); + const SCEVAddRecExpr *RAR = dyn_cast(RHS); + bool LeftGuarded = false; + bool RightGuarded = false; + if (LAR) { + const Loop *L = LAR->getLoop(); + if (isLoopEntryGuardedByCond(L, Pred, LAR->getStart(), RHS) && + isLoopBackedgeGuardedByCond(L, Pred, LAR->getPostIncExpr(*this), RHS)) { + if (!RAR) return true; + LeftGuarded = true; + } + } + if (RAR) { + const Loop *L = RAR->getLoop(); + if (isLoopEntryGuardedByCond(L, Pred, LHS, RAR->getStart()) && + isLoopBackedgeGuardedByCond(L, Pred, LHS, RAR->getPostIncExpr(*this))) { + if (!LAR) return true; + RightGuarded = true; + } + } + if (LeftGuarded && RightGuarded) + return true; // Otherwise see what can be done with known constant ranges. return isKnownPredicateWithRanges(Pred, LHS, RHS); diff --git a/test/Transforms/IndVarSimplify/pr18223.ll b/test/Transforms/IndVarSimplify/pr18223.ll new file mode 100644 index 000000000000..738f75c0fe03 --- /dev/null +++ b/test/Transforms/IndVarSimplify/pr18223.ll @@ -0,0 +1,30 @@ +; RUN: opt -indvars -S < %s | FileCheck %s + +; indvars should transform the phi node pair from the for-loop +; CHECK-LABEL: @main( +; CHECK: ret = phi i32 [ 0, %entry ], [ 0, {{.*}} ] + +@c = common global i32 0, align 4 + +define i32 @main() #0 { +entry: + %0 = load i32* @c, align 4 + %tobool = icmp eq i32 %0, 0 + br i1 %tobool, label %for.body, label %exit + +for.body: + %inc2 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %sub = add i32 %inc2, -1 + %cmp1 = icmp uge i32 %sub, %inc2 + %conv = zext i1 %cmp1 to i32 + br label %for.inc + +for.inc: + %inc = add nsw i32 %inc2, 1 + %cmp = icmp slt i32 %inc, 5 + br i1 %cmp, label %for.body, label %exit + +exit: + %ret = phi i32 [ 0, %entry ], [ %conv, %for.inc ] + ret i32 %ret +} From 0e93fa9d16b44dd93fbb7f40715ad344812185b0 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Fri, 23 May 2014 01:22:46 +0000 Subject: [PATCH 096/906] Attempt to placate compilers that warn on casts between pointer-to-object and pointer-to-function types. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209490 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ADT/STLExtras.h | 40 ++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h index 5b7b88b90085..1cef3933b5d6 100644 --- a/include/llvm/ADT/STLExtras.h +++ b/include/llvm/ADT/STLExtras.h @@ -67,11 +67,11 @@ template class function_ref; template class function_ref { - Ret (*callback)(void *callable, Params ...params); - void *callable; + Ret (*callback)(intptr_t callable, Params ...params); + intptr_t callable; template - static Ret callback_fn(void *callable, Params ...params) { + static Ret callback_fn(intptr_t callable, Params ...params) { return (*reinterpret_cast(callable))( std::forward(params)...); } @@ -80,7 +80,7 @@ class function_ref { template function_ref(Callable &&callable) : callback(callback_fn::type>), - callable(reinterpret_cast(&callable)) {} + callable(reinterpret_cast(&callable)) {} Ret operator()(Params ...params) const { return callback(callable, std::forward(params)...); } @@ -90,11 +90,11 @@ class function_ref { template class function_ref { - Ret (*callback)(void *callable); - void *callable; + Ret (*callback)(intptr_t callable); + intptr_t callable; template - static Ret callback_fn(void *callable) { + static Ret callback_fn(intptr_t callable) { return (*reinterpret_cast(callable))(); } @@ -102,17 +102,17 @@ class function_ref { template function_ref(Callable &&callable) : callback(callback_fn::type>), - callable(reinterpret_cast(&callable)) {} + callable(reinterpret_cast(&callable)) {} Ret operator()() const { return callback(callable); } }; template class function_ref { - Ret (*callback)(void *callable, Param1 param1); - void *callable; + Ret (*callback)(intptr_t callable, Param1 param1); + intptr_t callable; template - static Ret callback_fn(void *callable, Param1 param1) { + static Ret callback_fn(intptr_t callable, Param1 param1) { return (*reinterpret_cast(callable))( std::forward(param1)); } @@ -121,7 +121,7 @@ class function_ref { template function_ref(Callable &&callable) : callback(callback_fn::type>), - callable(reinterpret_cast(&callable)) {} + callable(reinterpret_cast(&callable)) {} Ret operator()(Param1 param1) { return callback(callable, std::forward(param1)); } @@ -129,11 +129,11 @@ class function_ref { template class function_ref { - Ret (*callback)(void *callable, Param1 param1, Param2 param2); - void *callable; + Ret (*callback)(intptr_t callable, Param1 param1, Param2 param2); + intptr_t callable; template - static Ret callback_fn(void *callable, Param1 param1, Param2 param2) { + static Ret callback_fn(intptr_t callable, Param1 param1, Param2 param2) { return (*reinterpret_cast(callable))( std::forward(param1), std::forward(param2)); @@ -143,7 +143,7 @@ class function_ref { template function_ref(Callable &&callable) : callback(callback_fn::type>), - callable(reinterpret_cast(&callable)) {} + callable(reinterpret_cast(&callable)) {} Ret operator()(Param1 param1, Param2 param2) { return callback(callable, std::forward(param1), @@ -153,11 +153,11 @@ class function_ref { template class function_ref { - Ret (*callback)(void *callable, Param1 param1, Param2 param2, Param3 param3); - void *callable; + Ret (*callback)(intptr_t callable, Param1 param1, Param2 param2, Param3 param3); + intptr_t callable; template - static Ret callback_fn(void *callable, Param1 param1, Param2 param2, + static Ret callback_fn(intptr_t callable, Param1 param1, Param2 param2, Param3 param3) { return (*reinterpret_cast(callable))( std::forward(param1), @@ -169,7 +169,7 @@ class function_ref { template function_ref(Callable &&callable) : callback(callback_fn::type>), - callable(reinterpret_cast(&callable)) {} + callable(reinterpret_cast(&callable)) {} Ret operator()(Param1 param1, Param2 param2, Param3 param3) { return callback(callable, std::forward(param1), From d7689b6ff6faa91f8c27b632a9e51ea55d638644 Mon Sep 17 00:00:00 2001 From: Jiangning Liu Date: Fri, 23 May 2014 02:54:50 +0000 Subject: [PATCH 097/906] [ARM64] Fix a bug in shuffle vector lowering to generate corect vext ISD with swapped input vectors. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209495 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM64/ARM64ISelLowering.cpp | 29 ++--- test/CodeGen/ARM64/vext_reverse.ll | 172 +++++++++++++++++++++++++ 2 files changed, 186 insertions(+), 15 deletions(-) create mode 100644 test/CodeGen/ARM64/vext_reverse.ll diff --git a/lib/Target/ARM64/ARM64ISelLowering.cpp b/lib/Target/ARM64/ARM64ISelLowering.cpp index 385373116de8..c24b7deea94a 100644 --- a/lib/Target/ARM64/ARM64ISelLowering.cpp +++ b/lib/Target/ARM64/ARM64ISelLowering.cpp @@ -4270,23 +4270,22 @@ static bool isEXTMask(ArrayRef M, EVT VT, bool &ReverseEXT, // The index of an EXT is the first element if it is not UNDEF. // Watch out for the beginning UNDEFs. The EXT index should be the expected - // value of the first element. - // E.g. <-1, -1, 3, ...> is treated as <1, 2, 3, ...>. - // <-1, -1, 0, 1, ...> is treated as . IDX is - // equal to the ExpectedElt. - Imm = (M[0] >= 0) ? static_cast(M[0]) : ExpectedElt.getZExtValue(); - - // If no beginning UNDEFs, do swap when M[0] >= NumElts. - if (M[0] >= 0 && Imm >= NumElts) { + // value of the first element. E.g. + // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>. + // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>. + // ExpectedElt is the last mask index plus 1. + Imm = ExpectedElt.getZExtValue(); + + // There are two difference cases requiring to reverse input vectors. + // For example, for vector <4 x i32> we have the following cases, + // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>) + // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>) + // For both cases, we finally use mask <5, 6, 7, 0>, which requires + // to reverse two input vectors. + if (Imm < NumElts) ReverseEXT = true; + else Imm -= NumElts; - } else if (M[0] < 0) { - // Only do swap when beginning UNDEFs more than the first real element, - if (*FirstRealElt < FirstRealElt - M.begin()) - ReverseEXT = true; - if (Imm >= NumElts) - Imm -= NumElts; - } return true; } diff --git a/test/CodeGen/ARM64/vext_reverse.ll b/test/CodeGen/ARM64/vext_reverse.ll new file mode 100644 index 000000000000..c45e55edeca5 --- /dev/null +++ b/test/CodeGen/ARM64/vext_reverse.ll @@ -0,0 +1,172 @@ +; RUN: llc -mtriple=arm64-linux-gnuabi < %s | FileCheck %s + +; The following tests is to check the correctness of reversing input operand +; of vext by enumerating all cases of using two undefs in shuffle masks. + +define <4 x i16> @vext_6701_0(<4 x i16> %a1, <4 x i16> %a2) { +entry: +; CHECK-LABEL: vext_6701_0: +; CHECK: ext v0.8b, v1.8b, v0.8b, #4 + %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> + ret <4 x i16> %x +} + +define <4 x i16> @vext_6701_12(<4 x i16> %a1, <4 x i16> %a2) { +entry: +; CHECK-LABEL: vext_6701_12: +; CHECK: ext v0.8b, v0.8b, v0.8b, #4 + %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> + ret <4 x i16> %x +} + +define <4 x i16> @vext_6701_13(<4 x i16> %a1, <4 x i16> %a2) { +entry: +; CHECK-LABEL: vext_6701_13: +; CHECK: ext v0.8b, v1.8b, v0.8b, #4 + %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> + ret <4 x i16> %x +} + +define <4 x i16> @vext_6701_14(<4 x i16> %a1, <4 x i16> %a2) { +entry: +; CHECK-LABEL: vext_6701_14: +; CHECK: ext v0.8b, v1.8b, v0.8b, #4 + %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> + ret <4 x i16> %x +} + +define <4 x i16> @vext_6701_23(<4 x i16> %a1, <4 x i16> %a2) { +entry: +; CHECK-LABEL: vext_6701_23: +; CHECK: ext v0.8b, v1.8b, v0.8b, #4 + %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> + ret <4 x i16> %x +} + +define <4 x i16> @vext_6701_24(<4 x i16> %a1, <4 x i16> %a2) { +entry: +; CHECK-LABEL: vext_6701_24: +; CHECK: ext v0.8b, v1.8b, v0.8b, #4 + %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> + ret <4 x i16> %x +} + +define <4 x i16> @vext_6701_34(<4 x i16> %a1, <4 x i16> %a2) { +entry: +; CHECK-LABEL: vext_6701_34: +; CHECK: ext v0.8b, v1.8b, v0.8b, #4 + %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> + ret <4 x i16> %x +} + +define <4 x i16> @vext_5670_0(<4 x i16> %a1, <4 x i16> %a2) { +entry: +; CHECK-LABEL: vext_5670_0: +; CHECK: ext v0.8b, v1.8b, v0.8b, #2 + %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> + ret <4 x i16> %x +} + +define <4 x i16> @vext_5670_12(<4 x i16> %a1, <4 x i16> %a2) { +entry: +; CHECK-LABEL: vext_5670_12: +; CHECK: ext v0.8b, v1.8b, v0.8b, #2 + %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> + ret <4 x i16> %x +} + +define <4 x i16> @vext_5670_13(<4 x i16> %a1, <4 x i16> %a2) { +entry: +; CHECK-LABEL: vext_5670_13: +; CHECK: ext v0.8b, v1.8b, v0.8b, #2 + %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> + ret <4 x i16> %x +} + +define <4 x i16> @vext_5670_14(<4 x i16> %a1, <4 x i16> %a2) { +entry: +; CHECK-LABEL: vext_5670_14: +; CHECK: ext v0.8b, v1.8b, v0.8b, #2 + %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> + ret <4 x i16> %x +} + +define <4 x i16> @vext_5670_23(<4 x i16> %a1, <4 x i16> %a2) { +entry: +; CHECK-LABEL: vext_5670_23: +; CHECK: ext v0.8b, v1.8b, v0.8b, #2 + %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> + ret <4 x i16> %x +} + +define <4 x i16> @vext_5670_24(<4 x i16> %a1, <4 x i16> %a2) { +entry: +; CHECK-LABEL: vext_5670_24: +; CHECK: rev32 v0.4h, v1.4h + %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> + ret <4 x i16> %x +} + +define <4 x i16> @vext_5670_34(<4 x i16> %a1, <4 x i16> %a2) { +entry: +; CHECK-LABEL: vext_5670_34: +; CHECK: ext v0.8b, v1.8b, v0.8b, #2 + %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> + ret <4 x i16> %x +} + +define <4 x i16> @vext_7012_0(<4 x i16> %a1, <4 x i16> %a2) { +entry: +; CHECK-LABEL: vext_7012_0: +; CHECK: ext v0.8b, v1.8b, v0.8b, #6 + %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> + ret <4 x i16> %x +} + +define <4 x i16> @vext_7012_12(<4 x i16> %a1, <4 x i16> %a2) { +entry: +; CHECK-LABEL: vext_7012_12: +; CHECK: ext v0.8b, v0.8b, v0.8b, #6 + %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> + ret <4 x i16> %x +} + +define <4 x i16> @vext_7012_13(<4 x i16> %a1, <4 x i16> %a2) { +entry: +; CHECK-LABEL: vext_7012_13: +; CHECK: rev32 v0.4h, v0.4h + %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> + ret <4 x i16> %x +} + +define <4 x i16> @vext_7012_14(<4 x i16> %a1, <4 x i16> %a2) { +entry: +; CHECK-LABEL: vext_7012_14: +; CHECK: ext v0.8b, v0.8b, v0.8b, #6 + %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> + ret <4 x i16> %x +} + +define <4 x i16> @vext_7012_23(<4 x i16> %a1, <4 x i16> %a2) { +entry: +; CHECK-LABEL: vext_7012_23: +; CHECK: ext v0.8b, v1.8b, v0.8b, #6 + %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> + ret <4 x i16> %x +} + +define <4 x i16> @vext_7012_24(<4 x i16> %a1, <4 x i16> %a2) { +entry: +; CHECK-LABEL: vext_7012_24: +; CHECK: ext v0.8b, v1.8b, v0.8b, #6 + %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> + ret <4 x i16> %x +} + +define <4 x i16> @vext_7012_34(<4 x i16> %a1, <4 x i16> %a2) { +entry: +; CHECK-LABEL: vext_7012_34: +; CHECK: ext v0.8b, v1.8b, v0.8b, #6 + %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> + ret <4 x i16> %x +} From df9a78247a13daf6de441784d16772e8b21f6d82 Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Fri, 23 May 2014 02:56:51 +0000 Subject: [PATCH 098/906] MC: remove unnecessary restriction on tests Rafael correctly pointed out that the restriction is unnecessary. Although the tests are intended to ensure that we dont abort due to an assertion, running the tests in all modes is better since it also ensures that we dont crash without assertions. Always run these tests to ensure that we can handle invalid input correctly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209496 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/MC/AsmParser/invalid-input-assertion.s | 1 - test/MC/COFF/invalid-def.s | 1 - test/MC/COFF/invalid-endef.s | 1 - test/MC/COFF/invalid-scl-range.s | 1 - test/MC/COFF/invalid-scl.s | 1 - test/MC/COFF/invalid-type-range.s | 1 - test/MC/COFF/invalid-type.s | 1 - 7 files changed, 7 deletions(-) diff --git a/test/MC/AsmParser/invalid-input-assertion.s b/test/MC/AsmParser/invalid-input-assertion.s index 68846fe6a81a..2557f6e4aa6f 100644 --- a/test/MC/AsmParser/invalid-input-assertion.s +++ b/test/MC/AsmParser/invalid-input-assertion.s @@ -1,5 +1,4 @@ // RUN: not llvm-mc -triple i686-linux -o /dev/null %s -// REQUIRES: asserts .macro macro parameter=0 .if \parameter diff --git a/test/MC/COFF/invalid-def.s b/test/MC/COFF/invalid-def.s index bfa1a54cbd75..42821c22cf71 100644 --- a/test/MC/COFF/invalid-def.s +++ b/test/MC/COFF/invalid-def.s @@ -1,5 +1,4 @@ # RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s -# REQUIRES: asserts .def first .def second diff --git a/test/MC/COFF/invalid-endef.s b/test/MC/COFF/invalid-endef.s index 543685a66c6b..c6fd8f596268 100644 --- a/test/MC/COFF/invalid-endef.s +++ b/test/MC/COFF/invalid-endef.s @@ -1,5 +1,4 @@ # RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s -# REQUIRES: asserts .endef diff --git a/test/MC/COFF/invalid-scl-range.s b/test/MC/COFF/invalid-scl-range.s index ec0c2bb19252..57225059821e 100644 --- a/test/MC/COFF/invalid-scl-range.s +++ b/test/MC/COFF/invalid-scl-range.s @@ -1,5 +1,4 @@ # RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s -# REQUIRES: asserts .def storage_class_range .scl 1337 diff --git a/test/MC/COFF/invalid-scl.s b/test/MC/COFF/invalid-scl.s index 0d62497e96d9..8565a5afe0e9 100644 --- a/test/MC/COFF/invalid-scl.s +++ b/test/MC/COFF/invalid-scl.s @@ -1,5 +1,4 @@ # RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s -# REQUIRES: asserts .scl 1337 diff --git a/test/MC/COFF/invalid-type-range.s b/test/MC/COFF/invalid-type-range.s index 9397cc50c35b..92874cc4586b 100644 --- a/test/MC/COFF/invalid-type-range.s +++ b/test/MC/COFF/invalid-type-range.s @@ -1,5 +1,4 @@ # RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s -# REQUIRES: asserts .def invalid_type_range .type 65536 diff --git a/test/MC/COFF/invalid-type.s b/test/MC/COFF/invalid-type.s index a5c61f4aba8b..a1e131e99e55 100644 --- a/test/MC/COFF/invalid-type.s +++ b/test/MC/COFF/invalid-type.s @@ -1,5 +1,4 @@ # RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s -# REQUIRES: asserts .type 65536 From e4dfe5d46fb263d0a826529041e31d15e5e8be96 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Fri, 23 May 2014 04:23:06 +0000 Subject: [PATCH 099/906] DebugInfo: Fix cross-CU references for scopes (and variables within those scopes) in abstract definitions of cross-CU inlined functions Found by Adrian Prantl during post-commit review of r209335. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209498 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 7 ++++--- test/DebugInfo/cross-cu-inlining.ll | 6 ++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 049e9e28c119..44aa529cfa46 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -528,11 +528,12 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU, // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram // was inlined from another compile unit. - DIE *ScopeDIE = SPMap[Sub]->getDIE(Sub); + DwarfCompileUnit &SPCU = *SPMap[Sub]; + DIE *ScopeDIE = SPCU.getDIE(Sub); assert(ScopeDIE); AbstractSPDies.insert(std::make_pair(Sub, ScopeDIE)); - TheCU.addUInt(*ScopeDIE, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); - createAndAddScopeChildren(TheCU, Scope, *ScopeDIE); + SPCU.addUInt(*ScopeDIE, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); + createAndAddScopeChildren(SPCU, Scope, *ScopeDIE); } DIE &DwarfDebug::constructSubprogramScopeDIE(DwarfCompileUnit &TheCU, diff --git a/test/DebugInfo/cross-cu-inlining.ll b/test/DebugInfo/cross-cu-inlining.ll index ae684ad507c4..44a1a5850693 100644 --- a/test/DebugInfo/cross-cu-inlining.ll +++ b/test/DebugInfo/cross-cu-inlining.ll @@ -27,6 +27,9 @@ ; CHECK-NEXT: DW_AT_abstract_origin {{.*}}[[ABS_FUNC:........]]) ; CHECK: DW_TAG_formal_parameter ; CHECK-NEXT: DW_AT_abstract_origin {{.*}}[[ABS_VAR:........]]) +; CHECK: 0x[[INT:.*]]: DW_TAG_base_type +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_name {{.*}} "int" ; Check the abstract definition is in the 'b.cpp' CU and doesn't contain any ; concrete information (address range or variable location) @@ -35,6 +38,9 @@ ; CHECK: 0x[[ABS_FUNC]]: DW_TAG_subprogram ; CHECK-NOT: DW_AT_low_pc ; CHECK: 0x[[ABS_VAR]]: DW_TAG_formal_parameter +; CHECK-NOT: DW_TAG +; CHECK-NOT: DW_AT_location +; CHECK: DW_AT_type [DW_FORM_ref_addr] (0x00000000[[INT]]) ; CHECK-NOT: DW_AT_location ; Check the concrete out of line definition references the abstract and From 1ed412cb72c3e0a6d5cfeaf756ab1fd014f1844b Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Fri, 23 May 2014 05:03:23 +0000 Subject: [PATCH 100/906] Rename a couple of variables to be more accurate. It's not really a "ScopeDIE", as such - it's the abstract function definition's DIE. And we usually use "SP" for subprograms, rather than "Sub". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209499 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 44aa529cfa46..06331e44d913 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -521,19 +521,19 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU, assert(Scope->isAbstractScope()); assert(!Scope->getInlinedAt()); - DISubprogram Sub(Scope->getScopeNode()); + DISubprogram SP(Scope->getScopeNode()); - if (!ProcessedSPNodes.insert(Sub)) + if (!ProcessedSPNodes.insert(SP)) return; // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram // was inlined from another compile unit. - DwarfCompileUnit &SPCU = *SPMap[Sub]; - DIE *ScopeDIE = SPCU.getDIE(Sub); - assert(ScopeDIE); - AbstractSPDies.insert(std::make_pair(Sub, ScopeDIE)); - SPCU.addUInt(*ScopeDIE, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); - createAndAddScopeChildren(SPCU, Scope, *ScopeDIE); + DwarfCompileUnit &SPCU = *SPMap[SP]; + DIE *AbsDef = SPCU.getDIE(SP); + assert(AbsDef); + AbstractSPDies.insert(std::make_pair(SP, AbsDef)); + SPCU.addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); + createAndAddScopeChildren(SPCU, Scope, *AbsDef); } DIE &DwarfDebug::constructSubprogramScopeDIE(DwarfCompileUnit &TheCU, From f97e200a98052d0f61711580b4f5142a59d9dc46 Mon Sep 17 00:00:00 2001 From: Filipe Cabecinhas Date: Fri, 23 May 2014 05:52:12 +0000 Subject: [PATCH 101/906] llvm-ar: Output the file we errored on. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209500 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Object/ar-error.test | 5 +++++ tools/llvm-ar/llvm-ar.cpp | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 test/Object/ar-error.test diff --git a/test/Object/ar-error.test b/test/Object/ar-error.test new file mode 100644 index 000000000000..9b8ddbd78fb1 --- /dev/null +++ b/test/Object/ar-error.test @@ -0,0 +1,5 @@ +Test if we get a proper error with a filename that doesn't exist + +RUN: not llvm-ar r %t.out.a sparkle.o %t 2>&1 | FileCheck %s + +CHECK: llvm-ar{{(.exe|.EXE)?}}: sparkle.o: No such file or directory diff --git a/tools/llvm-ar/llvm-ar.cpp b/tools/llvm-ar/llvm-ar.cpp index db95674d36ad..ed7291ea0c91 100644 --- a/tools/llvm-ar/llvm-ar.cpp +++ b/tools/llvm-ar/llvm-ar.cpp @@ -516,7 +516,7 @@ computeInsertAction(ArchiveOperation Operation, // We could try to optimize this to a fstat, but it is not a common // operation. sys::fs::file_status Status; - failIfError(sys::fs::status(*MI, Status)); + failIfError(sys::fs::status(*MI, Status), *MI); if (Status.getLastModificationTime() < I->getLastModified()) { if (PosName.empty()) return IA_AddOldMember; From cfce940eae2c991cc8caf2f4882bedefe928d83a Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Fri, 23 May 2014 06:30:12 +0000 Subject: [PATCH 102/906] Test commit. The keyword "virtual" is not necessary. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209501 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp index c3d9f120e996..f3a095d829e4 100644 --- a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp +++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp @@ -63,7 +63,7 @@ class NVPTXFavorNonGenericAddrSpaces : public FunctionPass { static char ID; NVPTXFavorNonGenericAddrSpaces() : FunctionPass(ID) {} - virtual bool runOnFunction(Function &F) override; + bool runOnFunction(Function &F) override; /// Optimizes load/store instructions. Idx is the index of the pointer operand /// (0 for load, and 1 for store). Returns true if it changes anything. From 7a0a087f28153b62e856ba5fad4650f4ac5073ed Mon Sep 17 00:00:00 2001 From: Yaron Keren Date: Fri, 23 May 2014 07:31:25 +0000 Subject: [PATCH 103/906] Typedef NumeredTypesMapTy is not used anywhere. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209502 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/Module.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/llvm/IR/Module.h b/include/llvm/IR/Module.h index 132d2da1a611..0c309e89c902 100644 --- a/include/llvm/IR/Module.h +++ b/include/llvm/IR/Module.h @@ -298,10 +298,6 @@ class Module { /// registered in this LLVMContext. void getMDKindNames(SmallVectorImpl &Result) const; - - typedef DenseMap > - NumeredTypesMapTy; - /// Return the type with the specified name, or null if there is none by that /// name. StructType *getTypeByName(StringRef Name) const; From 84a0dc323d90f86e899519b31b789ed5c0a0ec9a Mon Sep 17 00:00:00 2001 From: Simon Atanasyan Date: Fri, 23 May 2014 08:07:09 +0000 Subject: [PATCH 104/906] [YAML] Add an optional argument `EnumMask` to the `yaml::IO::bitSetCase()`. Some bit-set fields used in ELF file headers in fact contain two parts. The first one is a regular bit-field. The second one is an enumeraion. For example ELF header `e_flags` for MIPS target might contain the following values: Bit-set values: EF_MIPS_NOREORDER = 0x00000001 EF_MIPS_PIC = 0x00000002 EF_MIPS_CPIC = 0x00000004 EF_MIPS_ABI2 = 0x00000020 Enumeration: EF_MIPS_ARCH_32 = 0x50000000 EF_MIPS_ARCH_64 = 0x60000000 EF_MIPS_ARCH_32R2 = 0x70000000 EF_MIPS_ARCH_64R2 = 0x80000000 For printing bit-sets we use the `yaml::IO::bitSetCase()`. It does not support bit-set/enumeration combinations and prints too many flags from an enumeration part. This patch fixes this problem. New method `yaml::IO::maskedBitSetCase()` handle "enumeration" part of bitset defined by provided mask. Patch reviewed by Nick Kledzik and Sean Silva. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209504 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/YamlIO.rst | 36 +++++++++++++++++++++++++++++++ include/llvm/Support/YAMLTraits.h | 13 +++++++++++ lib/Object/ELFYAML.cpp | 36 ++++++++++++++++--------------- test/Object/obj2yaml.test | 4 ++-- 4 files changed, 70 insertions(+), 19 deletions(-) diff --git a/docs/YamlIO.rst b/docs/YamlIO.rst index dfb348da2f47..76dd021f82f5 100644 --- a/docs/YamlIO.rst +++ b/docs/YamlIO.rst @@ -399,6 +399,42 @@ the above schema, a same valid YAML document is: name: Tom flags: [ pointy, flat ] +Sometimes a "flags" field might contains an enumeration part +defined by a bit-mask. + +.. code-block:: c++ + + enum { + flagsFeatureA = 1, + flagsFeatureB = 2, + flagsFeatureC = 4, + + flagsCPUMask = 24, + + flagsCPU1 = 8, + flagsCPU2 = 16 + }; + +To support reading and writing such fields, you need to use the maskedBitSet() +method and provide the bit values, their names and the enumeration mask. + +.. code-block:: c++ + + template <> + struct ScalarBitSetTraits { + static void bitset(IO &io, MyFlags &value) { + io.bitSetCase(value, "featureA", flagsFeatureA); + io.bitSetCase(value, "featureB", flagsFeatureB); + io.bitSetCase(value, "featureC", flagsFeatureC); + io.maskedBitSetCase(value, "CPU1", flagsCPU1, flagsCPUMask); + io.maskedBitSetCase(value, "CPU2", flagsCPU2, flagsCPUMask); + } + }; + +YAML I/O (when writing) will apply the enumeration mask to the flags field, +and compare the result and values from the bitset. As in case of a regular +bitset, each that matches will cause the corresponding string to be added +to the flow sequence. Custom Scalar ------------- diff --git a/include/llvm/Support/YAMLTraits.h b/include/llvm/Support/YAMLTraits.h index 7f4a92293549..4ee05ed13359 100644 --- a/include/llvm/Support/YAMLTraits.h +++ b/include/llvm/Support/YAMLTraits.h @@ -487,6 +487,19 @@ class IO { } } + template + void maskedBitSetCase(T &Val, const char *Str, T ConstVal, T Mask) { + if (bitSetMatch(Str, outputting() && (Val & Mask) == ConstVal)) + Val = Val | ConstVal; + } + + template + void maskedBitSetCase(T &Val, const char *Str, uint32_t ConstVal, + uint32_t Mask) { + if (bitSetMatch(Str, outputting() && (Val & Mask) == ConstVal)) + Val = Val | ConstVal; + } + void *getContext(); void setContext(void *); diff --git a/lib/Object/ELFYAML.cpp b/lib/Object/ELFYAML.cpp index 8329853340cd..7d50f23417b2 100644 --- a/lib/Object/ELFYAML.cpp +++ b/lib/Object/ELFYAML.cpp @@ -246,16 +246,17 @@ void ScalarBitSetTraits::bitset(IO &IO, const auto *Object = static_cast(IO.getContext()); assert(Object && "The IO context is not initialized"); #define BCase(X) IO.bitSetCase(Value, #X, ELF::X); +#define BCaseMask(X, M) IO.maskedBitSetCase(Value, #X, ELF::X, ELF::M); switch (Object->Header.Machine) { case ELF::EM_ARM: BCase(EF_ARM_SOFT_FLOAT) BCase(EF_ARM_VFP_FLOAT) - BCase(EF_ARM_EABI_UNKNOWN) - BCase(EF_ARM_EABI_VER1) - BCase(EF_ARM_EABI_VER2) - BCase(EF_ARM_EABI_VER3) - BCase(EF_ARM_EABI_VER4) - BCase(EF_ARM_EABI_VER5) + BCaseMask(EF_ARM_EABI_UNKNOWN, EF_ARM_EABIMASK) + BCaseMask(EF_ARM_EABI_VER1, EF_ARM_EABIMASK) + BCaseMask(EF_ARM_EABI_VER2, EF_ARM_EABIMASK) + BCaseMask(EF_ARM_EABI_VER3, EF_ARM_EABIMASK) + BCaseMask(EF_ARM_EABI_VER4, EF_ARM_EABIMASK) + BCaseMask(EF_ARM_EABI_VER5, EF_ARM_EABIMASK) break; case ELF::EM_MIPS: BCase(EF_MIPS_NOREORDER) @@ -266,17 +267,17 @@ void ScalarBitSetTraits::bitset(IO &IO, BCase(EF_MIPS_ABI_O32) BCase(EF_MIPS_MICROMIPS) BCase(EF_MIPS_ARCH_ASE_M16) - BCase(EF_MIPS_ARCH_1) - BCase(EF_MIPS_ARCH_2) - BCase(EF_MIPS_ARCH_3) - BCase(EF_MIPS_ARCH_4) - BCase(EF_MIPS_ARCH_5) - BCase(EF_MIPS_ARCH_32) - BCase(EF_MIPS_ARCH_64) - BCase(EF_MIPS_ARCH_32R2) - BCase(EF_MIPS_ARCH_64R2) - BCase(EF_MIPS_ARCH_32R6) - BCase(EF_MIPS_ARCH_64R6) + BCaseMask(EF_MIPS_ARCH_1, EF_MIPS_ARCH) + BCaseMask(EF_MIPS_ARCH_2, EF_MIPS_ARCH) + BCaseMask(EF_MIPS_ARCH_3, EF_MIPS_ARCH) + BCaseMask(EF_MIPS_ARCH_4, EF_MIPS_ARCH) + BCaseMask(EF_MIPS_ARCH_5, EF_MIPS_ARCH) + BCaseMask(EF_MIPS_ARCH_32, EF_MIPS_ARCH) + BCaseMask(EF_MIPS_ARCH_64, EF_MIPS_ARCH) + BCaseMask(EF_MIPS_ARCH_32R2, EF_MIPS_ARCH) + BCaseMask(EF_MIPS_ARCH_64R2, EF_MIPS_ARCH) + BCaseMask(EF_MIPS_ARCH_32R6, EF_MIPS_ARCH) + BCaseMask(EF_MIPS_ARCH_64R6, EF_MIPS_ARCH) break; case ELF::EM_HEXAGON: BCase(EF_HEXAGON_MACH_V2) @@ -292,6 +293,7 @@ void ScalarBitSetTraits::bitset(IO &IO, llvm_unreachable("Unsupported architecture"); } #undef BCase +#undef BCaseMask } void ScalarEnumerationTraits::enumeration( diff --git a/test/Object/obj2yaml.test b/test/Object/obj2yaml.test index db03f5afa4d7..1c1526349fd5 100644 --- a/test/Object/obj2yaml.test +++ b/test/Object/obj2yaml.test @@ -191,7 +191,7 @@ ELF-MIPSEL-NEXT: Class: ELFCLASS32 ELF-MIPSEL-NEXT: Data: ELFDATA2LSB ELF-MIPSEL-NEXT: Type: ET_REL ELF-MIPSEL-NEXT: Machine: EM_MIPS -ELF-MIPSEL-NEXT: Flags: [ EF_MIPS_NOREORDER, EF_MIPS_PIC, EF_MIPS_CPIC, EF_MIPS_ABI_O32, EF_MIPS_ARCH_1, EF_MIPS_ARCH_2, EF_MIPS_ARCH_5, EF_MIPS_ARCH_32 ] +ELF-MIPSEL-NEXT: Flags: [ EF_MIPS_NOREORDER, EF_MIPS_PIC, EF_MIPS_CPIC, EF_MIPS_ABI_O32, EF_MIPS_ARCH_32 ] ELF-MIPSEL-NEXT: Sections: ELF-MIPSEL-NEXT: - Name: .text ELF-MIPSEL-NEXT: Type: SHT_PROGBITS @@ -285,7 +285,7 @@ ELF-MIPS64EL-NEXT: Class: ELFCLASS64 ELF-MIPS64EL-NEXT: Data: ELFDATA2LSB ELF-MIPS64EL-NEXT: Type: ET_REL ELF-MIPS64EL-NEXT: Machine: EM_MIPS -ELF-MIPS64EL-NEXT: Flags: [ EF_MIPS_ARCH_1, EF_MIPS_ARCH_3 ] +ELF-MIPS64EL-NEXT: Flags: [ EF_MIPS_ARCH_3 ] ELF-MIPS64EL-NEXT: Sections: ELF-MIPS64EL-NEXT: - Name: .text ELF-MIPS64EL-NEXT: Type: SHT_PROGBITS From fa16c880f2a4c579bc8e297846945fdefa8ce7ca Mon Sep 17 00:00:00 2001 From: Bradley Smith Date: Fri, 23 May 2014 10:14:13 +0000 Subject: [PATCH 105/906] Fixup sys::getHostCPUFeatures crypto names so it doesn't clash with kernel headers git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209506 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Host.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index 7e86e2ffcf28..fd0472ee2f75 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -718,10 +718,10 @@ bool sys::getHostCPUFeatures(StringMap &Features) { #if defined(__aarch64__) // Keep track of which crypto features we have seen enum { - HWCAP_AES = 0x1, - HWCAP_PMULL = 0x2, - HWCAP_SHA1 = 0x4, - HWCAP_SHA2 = 0x8 + CAP_AES = 0x1, + CAP_PMULL = 0x2, + CAP_SHA1 = 0x4, + CAP_SHA2 = 0x8 }; uint32_t crypto = 0; #endif @@ -747,13 +747,13 @@ bool sys::getHostCPUFeatures(StringMap &Features) { // We need to check crypto seperately since we need all of the crypto // extensions to enable the subtarget feature if (CPUFeatures[I] == "aes") - crypto |= HWCAP_AES; + crypto |= CAP_AES; else if (CPUFeatures[I] == "pmull") - crypto |= HWCAP_PMULL; + crypto |= CAP_PMULL; else if (CPUFeatures[I] == "sha1") - crypto |= HWCAP_SHA1; + crypto |= CAP_SHA1; else if (CPUFeatures[I] == "sha2") - crypto |= HWCAP_SHA2; + crypto |= CAP_SHA2; #endif if (LLVMFeatureStr != "") @@ -762,7 +762,7 @@ bool sys::getHostCPUFeatures(StringMap &Features) { #if defined(__aarch64__) // If we have all crypto bits we can add the feature - if (crypto == (HWCAP_AES | HWCAP_PMULL | HWCAP_SHA1 | HWCAP_SHA2)) + if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2)) Features.GetOrCreateValue("crypto").setValue(true); #endif From 4f22c980f4cf9da0af984c4368ad0fbd42db6b83 Mon Sep 17 00:00:00 2001 From: Pekka Jaaskelainen Date: Fri, 23 May 2014 11:35:46 +0000 Subject: [PATCH 106/906] Updated the llvm.mem.parallel_loop_access semantics to include the possibility to have only some of the loop's memory instructions be annotated and still _help_ the loop carried dependence analysis. This was discussed in the llvmdev ML (topic: "parallel loop metadata question"). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209507 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/LangRef.rst | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/docs/LangRef.rst b/docs/LangRef.rst index ceec1bd5476d..fa8d3c0b75fe 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -2785,15 +2785,29 @@ for optimizations are prefixed with ``llvm.mem``. '``llvm.mem.parallel_loop_access``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -For a loop to be parallel, in addition to using -the ``llvm.loop`` metadata to mark the loop latch branch instruction, -also all of the memory accessing instructions in the loop body need to be -marked with the ``llvm.mem.parallel_loop_access`` metadata. If there -is at least one memory accessing instruction not marked with the metadata, -the loop must be considered a sequential loop. This causes parallel loops to be -converted to sequential loops due to optimization passes that are unaware of -the parallel semantics and that insert new memory instructions to the loop -body. +The ``llvm.mem.parallel_loop_access`` metadata refers to a loop identifier, +or metadata containing a list of loop identifiers for nested loops. +The metadata is attached to memory accessing instructions and denotes that +no loop carried memory dependence exist between it and other instructions denoted +with the same loop identifier. + +Precisely, given two instructions ``m1`` and ``m2`` that both have the +``llvm.mem.parallel_loop_access`` metadata, with ``L1`` and ``L2`` being the +set of loops associated with that metadata, respectively, then there is no loop +carried dependence between ``m1`` and ``m2`` for loops ``L1`` or +``L2``. + +As a special case, if all memory accessing instructions in a loop have +``llvm.mem.parallel_loop_access`` metadata that refers to that loop, then the +loop has no loop carried memory dependences and is considered to be a parallel +loop. + +Note that if not all memory access instructions have such metadata referring to +the loop, then the loop is considered not being trivially parallel. Additional +memory dependence analysis is required to make that determination. As a fail +safe mechanism, this causes loops that were originally parallel to be considered +sequential (if optimization passes that are unaware of the parallel semantics +insert new memory instructions into the loop body). Example of a loop that is considered parallel due to its correct use of both ``llvm.loop`` and ``llvm.mem.parallel_loop_access`` From f2938bf8dae4dd7ef762e521c63d34767ffcd61c Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Fri, 23 May 2014 11:52:07 +0000 Subject: [PATCH 107/906] [asan] properly instrument memory accesses that have small alignment (smaller than min(8,size)) by making two checks instead of one. This may slowdown some cases, e.g. long long on 32-bit or wide loads produced after loop unrolling. The benefit is higher sencitivity. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209508 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Instrumentation/AddressSanitizer.cpp | 28 +++++++++++++------ .../Instrumentation/AddressSanitizer/basic.ll | 16 +++++++++-- .../instrumentation-with-call-threshold.ll | 8 +++--- .../AddressSanitizer/test64.ll | 10 +++++-- 4 files changed, 44 insertions(+), 18 deletions(-) diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 291ad2ed47e5..25acd2818935 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -623,26 +623,31 @@ void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { } // If I is an interesting memory access, return the PointerOperand -// and set IsWrite. Otherwise return NULL. -static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) { +// and set IsWrite/Alignment. Otherwise return NULL. +static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite, + unsigned *Alignment) { if (LoadInst *LI = dyn_cast(I)) { if (!ClInstrumentReads) return nullptr; *IsWrite = false; + *Alignment = LI->getAlignment(); return LI->getPointerOperand(); } if (StoreInst *SI = dyn_cast(I)) { if (!ClInstrumentWrites) return nullptr; *IsWrite = true; + *Alignment = SI->getAlignment(); return SI->getPointerOperand(); } if (AtomicRMWInst *RMW = dyn_cast(I)) { if (!ClInstrumentAtomics) return nullptr; *IsWrite = true; + *Alignment = 0; return RMW->getPointerOperand(); } if (AtomicCmpXchgInst *XCHG = dyn_cast(I)) { if (!ClInstrumentAtomics) return nullptr; *IsWrite = true; + *Alignment = 0; return XCHG->getPointerOperand(); } return nullptr; @@ -692,7 +697,8 @@ AddressSanitizer::instrumentPointerComparisonOrSubtraction(Instruction *I) { void AddressSanitizer::instrumentMop(Instruction *I, bool UseCalls) { bool IsWrite = false; - Value *Addr = isInterestingMemoryAccess(I, &IsWrite); + unsigned Alignment = 0; + Value *Addr = isInterestingMemoryAccess(I, &IsWrite, &Alignment); assert(Addr); if (ClOpt && ClOptGlobals) { if (GlobalVariable *G = dyn_cast(Addr)) { @@ -727,11 +733,14 @@ void AddressSanitizer::instrumentMop(Instruction *I, bool UseCalls) { else NumInstrumentedReads++; - // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check. - if (TypeSize == 8 || TypeSize == 16 || - TypeSize == 32 || TypeSize == 64 || TypeSize == 128) + unsigned Granularity = 1 << Mapping.Scale; + // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check + // if the data is properly aligned. + if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 || + TypeSize == 128) && + (Alignment >= Granularity || Alignment == 0 || Alignment >= TypeSize / 8)) return instrumentAddress(I, I, Addr, TypeSize, IsWrite, nullptr, UseCalls); - // Instrument unusual size (but still multiple of 8). + // Instrument unusual size or unusual alignment. // We can not do it with a single check, so we do 1-byte check for the first // and the last bytes. We call __asan_report_*_n(addr, real_size) to be able // to report the actual access size. @@ -1328,6 +1337,7 @@ bool AddressSanitizer::runOnFunction(Function &F) { SmallVector PointerComparisonsOrSubtracts; int NumAllocas = 0; bool IsWrite; + unsigned Alignment; // Fill the set of memory operations to instrument. for (Function::iterator FI = F.begin(), FE = F.end(); @@ -1338,7 +1348,7 @@ bool AddressSanitizer::runOnFunction(Function &F) { for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { if (LooksLikeCodeInBug11395(BI)) return false; - if (Value *Addr = isInterestingMemoryAccess(BI, &IsWrite)) { + if (Value *Addr = isInterestingMemoryAccess(BI, &IsWrite, &Alignment)) { if (ClOpt && ClOptSameTemp) { if (!TempsToInstrument.insert(Addr)) continue; // We've seen this temp in the current BB. @@ -1390,7 +1400,7 @@ bool AddressSanitizer::runOnFunction(Function &F) { Instruction *Inst = ToInstrument[i]; if (ClDebugMin < 0 || ClDebugMax < 0 || (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { - if (isInterestingMemoryAccess(Inst, &IsWrite)) + if (isInterestingMemoryAccess(Inst, &IsWrite, &Alignment)) instrumentMop(Inst, UseCalls); else instrumentMemIntrinsic(cast(Inst)); diff --git a/test/Instrumentation/AddressSanitizer/basic.ll b/test/Instrumentation/AddressSanitizer/basic.ll index 7a125a396433..7d1aa0b8530b 100644 --- a/test/Instrumentation/AddressSanitizer/basic.ll +++ b/test/Instrumentation/AddressSanitizer/basic.ll @@ -34,7 +34,7 @@ define i32 @test_load(i32* %a) sanitize_address { entry: - %tmp1 = load i32* %a + %tmp1 = load i32* %a, align 4 ret i32 %tmp1 } @@ -66,7 +66,7 @@ define void @test_store(i32* %a) sanitize_address { ; entry: - store i32 42, i32* %a + store i32 42, i32* %a, align 4 ret void } @@ -115,6 +115,18 @@ define void @i40test(i40* %a, i40* %b) nounwind uwtable sanitize_address { ; CHECK: __asan_report_store_n{{.*}}, i64 5) ; CHECK: ret void +define void @i64test_align1(i64* %b) nounwind uwtable sanitize_address { + entry: + store i64 0, i64* %b, align 1 + ret void +} + +; CHECK-LABEL: i64test_align1 +; CHECK: __asan_report_store_n{{.*}}, i64 8) +; CHECK: __asan_report_store_n{{.*}}, i64 8) +; CHECK: ret void + + define void @i80test(i80* %a, i80* %b) nounwind uwtable sanitize_address { entry: %t = load i80* %a diff --git a/test/Instrumentation/AddressSanitizer/instrumentation-with-call-threshold.ll b/test/Instrumentation/AddressSanitizer/instrumentation-with-call-threshold.ll index dd82444b17b3..adb434112cd2 100644 --- a/test/Instrumentation/AddressSanitizer/instrumentation-with-call-threshold.ll +++ b/test/Instrumentation/AddressSanitizer/instrumentation-with-call-threshold.ll @@ -20,10 +20,10 @@ entry: ; CHECK-CUSTOM-PREFIX: call void @__foo_load8 ; CHECK-CUSTOM-PREFIX: call void @__foo_loadN ; CHECK-INLINE-NOT: call void @__asan_load - %tmp1 = load i32* %a - %tmp2 = load i64* %b - %tmp3 = load i512* %c - %tmp4 = load i80* %d + %tmp1 = load i32* %a, align 4 + %tmp2 = load i64* %b, align 8 + %tmp3 = load i512* %c, align 32 + %tmp4 = load i80* %d, align 8 ret void } diff --git a/test/Instrumentation/AddressSanitizer/test64.ll b/test/Instrumentation/AddressSanitizer/test64.ll index 4f3ed5b478c9..fd93f4576ae4 100644 --- a/test/Instrumentation/AddressSanitizer/test64.ll +++ b/test/Instrumentation/AddressSanitizer/test64.ll @@ -6,7 +6,7 @@ entry: %tmp1 = load i32* %a, align 4 ret i32 %tmp1 } -; CHECK: @read_4_bytes +; CHECK-LABEL: @read_4_bytes ; CHECK-NOT: ret ; CHECK: lshr {{.*}} 3 ; Check for ASAN's Offset for 64-bit (7fff8000) @@ -19,8 +19,10 @@ entry: ret void } -; CHECK: @example_atomicrmw +; CHECK-LABEL: @example_atomicrmw ; CHECK: lshr {{.*}} 3 +; CHECK: __asan_report_store8 +; CHECK-NOT: __asan_report ; CHECK: atomicrmw ; CHECK: ret @@ -30,7 +32,9 @@ entry: ret void } -; CHECK: @example_cmpxchg +; CHECK-LABEL: @example_cmpxchg ; CHECK: lshr {{.*}} 3 +; CHECK: __asan_report_store8 +; CHECK-NOT: __asan_report ; CHECK: cmpxchg ; CHECK: ret From 36b0fd51de0c2883e5e715287691a2f3d9623c05 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 23 May 2014 13:18:02 +0000 Subject: [PATCH 108/906] [mips][mips64r6] [ls][dw][lr] are not available in MIPS32r6/MIPS64r6 Summary: Instead the system is required to provide some means of handling unaligned load/store without special instructions. Options include full hardware support, full trap-and-emulate, and hybrids such as hardware support within a cache line and trap-and-emulate for multi-line accesses. MipsSETargetLowering::allowsUnalignedMemoryAccesses() has been configured to assume that unaligned accesses are 'fast' on the basis that I expect few hardware implementations will opt for pure-software handling of unaligned accesses. The ones that do handle it purely in software can override this. mips64-load-store-left-right.ll has been merged into load-store-left-right.ll The stricter testing revealed a Bits!=Bytes bug in passByValArg(). This has been fixed and the variables renamed to clarify the units they hold. Reviewers: zoran.jovanovic, jkolek, vmedic Reviewed By: vmedic Differential Revision: http://reviews.llvm.org/D3872 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209512 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips32r6InstrInfo.td | 1 - lib/Target/Mips/Mips64InstrInfo.td | 8 +- lib/Target/Mips/Mips64r6InstrInfo.td | 1 - lib/Target/Mips/MipsISelDAGToDAG.cpp | 5 +- lib/Target/Mips/MipsISelLowering.cpp | 61 +-- lib/Target/Mips/MipsInstrInfo.td | 15 +- lib/Target/Mips/MipsSEISelLowering.cpp | 10 + lib/Target/Mips/MipsSubtarget.h | 7 +- test/CodeGen/Mips/load-store-left-right.ll | 434 +++++++++++++++++- .../Mips/mips64load-store-left-right.ll | 75 --- test/CodeGen/Mips/unalignedload.ll | 67 ++- .../Mips/mips32r6/invalid-mips1-wrong-error.s | 15 + .../Mips/mips64r6/invalid-mips1-wrong-error.s | 15 + .../Mips/mips64r6/invalid-mips3-wrong-error.s | 23 + test/MC/Mips/mips64r6/invalid-mips3.s | 8 + 15 files changed, 602 insertions(+), 143 deletions(-) delete mode 100644 test/CodeGen/Mips/mips64load-store-left-right.ll create mode 100644 test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s create mode 100644 test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s create mode 100644 test/MC/Mips/mips64r6/invalid-mips3-wrong-error.s create mode 100644 test/MC/Mips/mips64r6/invalid-mips3.s diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td index a1a3f6bc8cf3..9755159e62d4 100644 --- a/lib/Target/Mips/Mips32r6InstrInfo.td +++ b/lib/Target/Mips/Mips32r6InstrInfo.td @@ -35,7 +35,6 @@ include "Mips32r6InstrFormats.td" // Removed: jalx // Removed: ldxc1 // Removed: luxc1 -// Removed: lwl, lwr, lwle, lwre, swl, swr, swle, swre // Removed: lwxc1 // Removed: madd.[ds], nmadd.[ds], nmsub.[ds], sub.[ds] // Removed: mfhi, mflo, mthi, mtlo, madd, maddu, msub, msubu, mul diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index df49aa8e7802..43103e65375e 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -155,13 +155,13 @@ def SWR64 : StoreLeftRight<"swr", MipsSWR, GPR64Opnd, II_SWR>, LW_FM<0x2e>; } def LDL : LoadLeftRight<"ldl", MipsLDL, GPR64Opnd, II_LDL>, LW_FM<0x1a>, - ISA_MIPS3; + ISA_MIPS3_NOT_32R6_64R6; def LDR : LoadLeftRight<"ldr", MipsLDR, GPR64Opnd, II_LDR>, LW_FM<0x1b>, - ISA_MIPS3; + ISA_MIPS3_NOT_32R6_64R6; def SDL : StoreLeftRight<"sdl", MipsSDL, GPR64Opnd, II_SDL>, LW_FM<0x2c>, - ISA_MIPS3; + ISA_MIPS3_NOT_32R6_64R6; def SDR : StoreLeftRight<"sdr", MipsSDR, GPR64Opnd, II_SDR>, LW_FM<0x2d>, - ISA_MIPS3; + ISA_MIPS3_NOT_32R6_64R6; /// Load-linked, Store-conditional def LLD : LLBase<"lld", GPR64Opnd>, LW_FM<0x34>, ISA_MIPS3; diff --git a/lib/Target/Mips/Mips64r6InstrInfo.td b/lib/Target/Mips/Mips64r6InstrInfo.td index 2e87a60a1e8f..f971218779d9 100644 --- a/lib/Target/Mips/Mips64r6InstrInfo.td +++ b/lib/Target/Mips/Mips64r6InstrInfo.td @@ -17,7 +17,6 @@ // Removed: daddi // Removed: ddiv, ddivu, dmult, dmultu // Removed: div, divu -// Removed: ldl, ldr, ldle, ldre, sdl, sdr, sdle, sdre //===----------------------------------------------------------------------===// // diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 4eb9d4356ef5..90cff631931f 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -202,8 +202,9 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { #ifndef NDEBUG case ISD::LOAD: case ISD::STORE: - assert(cast(Node)->getMemoryVT().getSizeInBits() / 8 <= - cast(Node)->getAlignment() && + assert((Subtarget.systemSupportsUnalignedAccess() || + cast(Node)->getMemoryVT().getSizeInBits() / 8 <= + cast(Node)->getAlignment()) && "Unexpected unaligned loads/stores."); break; #endif diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index ff20988159af..bfe5ea1846d7 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -1941,6 +1941,9 @@ SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { LoadSDNode *LD = cast(Op); EVT MemVT = LD->getMemoryVT(); + if (Subtarget->systemSupportsUnalignedAccess()) + return Op; + // Return if load is aligned or if MemVT is neither i32 nor i64. if ((LD->getAlignment() >= MemVT.getSizeInBits() / 8) || ((MemVT != MVT::i32) && (MemVT != MVT::i64))) @@ -2064,7 +2067,8 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { EVT MemVT = SD->getMemoryVT(); // Lower unaligned integer stores. - if ((SD->getAlignment() < MemVT.getSizeInBits() / 8) && + if (!Subtarget->systemSupportsUnalignedAccess() && + (SD->getAlignment() < MemVT.getSizeInBits() / 8) && ((MemVT == MVT::i32) || (MemVT == MVT::i64))) return lowerUnalignedIntStore(SD, DAG, Subtarget->isLittle()); @@ -3485,21 +3489,22 @@ passByValArg(SDValue Chain, SDLoc DL, MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, const MipsCC &CC, const ByValArgInfo &ByVal, const ISD::ArgFlagsTy &Flags, bool isLittle) const { - unsigned ByValSize = Flags.getByValSize(); - unsigned Offset = 0; // Offset in # of bytes from the beginning of struct. - unsigned RegSize = CC.regSize(); - unsigned Alignment = std::min(Flags.getByValAlign(), RegSize); - EVT PtrTy = getPointerTy(), RegTy = MVT::getIntegerVT(RegSize * 8); + unsigned ByValSizeInBytes = Flags.getByValSize(); + unsigned OffsetInBytes = 0; // From beginning of struct + unsigned RegSizeInBytes = CC.regSize(); + unsigned Alignment = std::min(Flags.getByValAlign(), RegSizeInBytes); + EVT PtrTy = getPointerTy(), RegTy = MVT::getIntegerVT(RegSizeInBytes * 8); if (ByVal.NumRegs) { const MCPhysReg *ArgRegs = CC.intArgRegs(); - bool LeftoverBytes = (ByVal.NumRegs * RegSize > ByValSize); + bool LeftoverBytes = (ByVal.NumRegs * RegSizeInBytes > ByValSizeInBytes); unsigned I = 0; // Copy words to registers. - for (; I < ByVal.NumRegs - LeftoverBytes; ++I, Offset += RegSize) { + for (; I < ByVal.NumRegs - LeftoverBytes; + ++I, OffsetInBytes += RegSizeInBytes) { SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, - DAG.getConstant(Offset, PtrTy)); + DAG.getConstant(OffsetInBytes, PtrTy)); SDValue LoadVal = DAG.getLoad(RegTy, DL, Chain, LoadPtr, MachinePointerInfo(), false, false, false, Alignment); @@ -3509,38 +3514,38 @@ passByValArg(SDValue Chain, SDLoc DL, } // Return if the struct has been fully copied. - if (ByValSize == Offset) + if (ByValSizeInBytes == OffsetInBytes) return; // Copy the remainder of the byval argument with sub-word loads and shifts. if (LeftoverBytes) { - assert((ByValSize > Offset) && (ByValSize < Offset + RegSize) && - "Size of the remainder should be smaller than RegSize."); + assert((ByValSizeInBytes > OffsetInBytes) && + (ByValSizeInBytes < OffsetInBytes + RegSizeInBytes) && + "Size of the remainder should be smaller than RegSizeInBytes."); SDValue Val; - for (unsigned LoadSize = RegSize / 2, TotalSizeLoaded = 0; - Offset < ByValSize; LoadSize /= 2) { - unsigned RemSize = ByValSize - Offset; + for (unsigned LoadSizeInBytes = RegSizeInBytes / 2, TotalBytesLoaded = 0; + OffsetInBytes < ByValSizeInBytes; LoadSizeInBytes /= 2) { + unsigned RemainingSizeInBytes = ByValSizeInBytes - OffsetInBytes; - if (RemSize < LoadSize) + if (RemainingSizeInBytes < LoadSizeInBytes) continue; // Load subword. SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, - DAG.getConstant(Offset, PtrTy)); - SDValue LoadVal = - DAG.getExtLoad(ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr, - MachinePointerInfo(), MVT::getIntegerVT(LoadSize * 8), - false, false, Alignment); + DAG.getConstant(OffsetInBytes, PtrTy)); + SDValue LoadVal = DAG.getExtLoad( + ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr, MachinePointerInfo(), + MVT::getIntegerVT(LoadSizeInBytes * 8), false, false, Alignment); MemOpChains.push_back(LoadVal.getValue(1)); // Shift the loaded value. unsigned Shamt; if (isLittle) - Shamt = TotalSizeLoaded; + Shamt = TotalBytesLoaded * 8; else - Shamt = (RegSize - (TotalSizeLoaded + LoadSize)) * 8; + Shamt = (RegSizeInBytes - (TotalBytesLoaded + LoadSizeInBytes)) * 8; SDValue Shift = DAG.getNode(ISD::SHL, DL, RegTy, LoadVal, DAG.getConstant(Shamt, MVT::i32)); @@ -3550,9 +3555,9 @@ passByValArg(SDValue Chain, SDLoc DL, else Val = Shift; - Offset += LoadSize; - TotalSizeLoaded += LoadSize; - Alignment = std::min(Alignment, LoadSize); + OffsetInBytes += LoadSizeInBytes; + TotalBytesLoaded += LoadSizeInBytes; + Alignment = std::min(Alignment, LoadSizeInBytes); } unsigned ArgReg = ArgRegs[ByVal.FirstIdx + I]; @@ -3562,9 +3567,9 @@ passByValArg(SDValue Chain, SDLoc DL, } // Copy remainder of byval arg to it with memcpy. - unsigned MemCpySize = ByValSize - Offset; + unsigned MemCpySize = ByValSizeInBytes - OffsetInBytes; SDValue Src = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, - DAG.getConstant(Offset, PtrTy)); + DAG.getConstant(OffsetInBytes, PtrTy)); SDValue Dst = DAG.getNode(ISD::ADD, DL, PtrTy, StackPtr, DAG.getIntPtrConstant(ByVal.Address)); Chain = DAG.getMemcpy(Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, PtrTy), diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index dbcd67436901..b66501966a43 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -225,6 +225,9 @@ class ISA_MIPS1_NOT_32R6_64R6 { } class ISA_MIPS2 { list InsnPredicates = [HasMips2]; } class ISA_MIPS3 { list InsnPredicates = [HasMips3]; } +class ISA_MIPS3_NOT_32R6_64R6 { + list InsnPredicates = [HasMips3, NotMips32r6, NotMips64r6]; +} class ISA_MIPS32 { list InsnPredicates = [HasMips32]; } class ISA_MIPS32R2 { list InsnPredicates = [HasMips32r2]; } class ISA_MIPS64 { list InsnPredicates = [HasMips64]; } @@ -1087,10 +1090,14 @@ def SW : Store<"sw", GPR32Opnd, store, II_SW>, MMRel, LW_FM<0x2b>; /// load/store left/right let EncodingPredicates = [], // FIXME: Lack of HasStdEnc is probably a bug AdditionalPredicates = [NotInMicroMips] in { -def LWL : LoadLeftRight<"lwl", MipsLWL, GPR32Opnd, II_LWL>, LW_FM<0x22>; -def LWR : LoadLeftRight<"lwr", MipsLWR, GPR32Opnd, II_LWR>, LW_FM<0x26>; -def SWL : StoreLeftRight<"swl", MipsSWL, GPR32Opnd, II_SWL>, LW_FM<0x2a>; -def SWR : StoreLeftRight<"swr", MipsSWR, GPR32Opnd, II_SWR>, LW_FM<0x2e>; +def LWL : LoadLeftRight<"lwl", MipsLWL, GPR32Opnd, II_LWL>, LW_FM<0x22>, + ISA_MIPS1_NOT_32R6_64R6; +def LWR : LoadLeftRight<"lwr", MipsLWR, GPR32Opnd, II_LWR>, LW_FM<0x26>, + ISA_MIPS1_NOT_32R6_64R6; +def SWL : StoreLeftRight<"swl", MipsSWL, GPR32Opnd, II_SWL>, LW_FM<0x2a>, + ISA_MIPS1_NOT_32R6_64R6; +def SWR : StoreLeftRight<"swr", MipsSWR, GPR32Opnd, II_SWR>, LW_FM<0x2e>, + ISA_MIPS1_NOT_32R6_64R6; } def SYNC : MMRel, SYNC_FT<"sync">, SYNC_FM; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index eb9a819aa3cb..969d73018953 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -254,6 +254,16 @@ MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; + if (Subtarget->systemSupportsUnalignedAccess()) { + // MIPS32r6/MIPS64r6 is required to support unaligned access. It's + // implementation defined whether this is handled by hardware, software, or + // a hybrid of the two but it's expected that most implementations will + // handle the majority of cases in hardware. + if (Fast) + *Fast = true; + return true; + } + switch (SVT) { case MVT::i64: case MVT::i32: diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index d57e67831457..373f48136211 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -234,7 +234,12 @@ class MipsSubtarget : public MipsGenSubtargetInfo { /// \brief Reset the subtarget for the Mips target. void resetSubtarget(MachineFunction *MF); - + /// Does the system support unaligned memory access. + /// + /// MIPS32r6/MIPS64r6 require full unaligned access support but does not + /// specify which component of the system provides it. Hardware, software, and + /// hybrid implementations are all valid. + bool systemSupportsUnalignedAccess() const { return hasMips32r6(); } }; } // End llvm namespace diff --git a/test/CodeGen/Mips/load-store-left-right.ll b/test/CodeGen/Mips/load-store-left-right.ll index d0928ee26613..a3f5ebfb5460 100644 --- a/test/CodeGen/Mips/load-store-left-right.ll +++ b/test/CodeGen/Mips/load-store-left-right.ll @@ -1,29 +1,439 @@ -; RUN: llc -march=mipsel < %s | FileCheck -check-prefix=EL %s -; RUN: llc -march=mips < %s | FileCheck -check-prefix=EB %s +; RUN: llc -march=mipsel -mcpu=mips32 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32 -check-prefix=MIPS32-EL %s +; RUN: llc -march=mips -mcpu=mips32 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32 -check-prefix=MIPS32-EB %s +; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32 -check-prefix=MIPS32-EL %s +; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32 -check-prefix=MIPS32-EB %s +; RUN: llc -march=mipsel -mcpu=mips32r6 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32R6 -check-prefix=MIPS32R6-EL %s +; RUN: llc -march=mips -mcpu=mips32r6 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32R6 -check-prefix=MIPS32R6-EB %s +; RUN: llc -march=mips64el -mcpu=mips4 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EL %s +; RUN: llc -march=mips64 -mcpu=mips4 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EB %s +; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EL %s +; RUN: llc -march=mips64 -mcpu=mips64 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EB %s +; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EL %s +; RUN: llc -march=mips64 -mcpu=mips64r2 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EB %s +; RUN: llc -march=mips64el -mcpu=mips64r6 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64R6 -check-prefix=MIPS64R6-EL %s +; RUN: llc -march=mips64 -mcpu=mips64r6 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64R6 -check-prefix=MIPS64R6-EB %s +%struct.SLL = type { i64 } %struct.SI = type { i32 } +%struct.SUI = type { i32 } +@sll = common global %struct.SLL zeroinitializer, align 1 @si = common global %struct.SI zeroinitializer, align 1 +@sui = common global %struct.SUI zeroinitializer, align 1 -define i32 @foo_load_i() nounwind readonly { +define i32 @load_SI() nounwind readonly { entry: -; EL: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]]) -; EL: lwr $[[R0]], 0($[[R1]]) -; EB: lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]]) -; EB: lwr $[[R0]], 3($[[R1]]) +; ALL-LABEL: load_SI: + +; MIPS32-EL: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]]) +; MIPS32-EL: lwr $[[R0]], 0($[[R1]]) + +; MIPS32-EB: lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]]) +; MIPS32-EB: lwr $[[R0]], 3($[[R1]]) + +; MIPS32R6: lw $[[PTR:[0-9]+]], %got(si)( +; MIPS32R6: lw $2, 0($[[PTR]]) + +; MIPS64-EL: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]]) +; MIPS64-EL: lwr $[[R0]], 0($[[R1]]) + +; MIPS64-EB: lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]]) +; MIPS64-EB: lwr $[[R0]], 3($[[R1]]) + +; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(si)( +; MIPS64R6: lw $2, 0($[[PTR]]) %0 = load i32* getelementptr inbounds (%struct.SI* @si, i32 0, i32 0), align 1 ret i32 %0 } -define void @foo_store_i(i32 %a) nounwind { +define void @store_SI(i32 %a) nounwind { entry: -; EL: swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]]) -; EL: swr $[[R0]], 0($[[R1]]) -; EB: swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]]) -; EB: swr $[[R0]], 3($[[R1]]) +; ALL-LABEL: store_SI: + +; MIPS32-EL: swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]]) +; MIPS32-EL: swr $[[R0]], 0($[[R1]]) + +; MIPS32-EB: swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]]) +; MIPS32-EB: swr $[[R0]], 3($[[R1]]) + +; MIPS32R6: lw $[[PTR:[0-9]+]], %got(si)( +; MIPS32R6: sw $4, 0($[[PTR]]) + +; MIPS64-EL: swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]]) +; MIPS64-EL: swr $[[R0]], 0($[[R1]]) + +; MIPS64-EB: swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]]) +; MIPS64-EB: swr $[[R0]], 3($[[R1]]) + +; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(si)( +; MIPS64R6: sw $4, 0($[[PTR]]) store i32 %a, i32* getelementptr inbounds (%struct.SI* @si, i32 0, i32 0), align 1 ret void } +define i64 @load_SLL() nounwind readonly { +entry: +; ALL-LABEL: load_SLL: + +; MIPS32-EL: lwl $2, 3($[[R1:[0-9]+]]) +; MIPS32-EL: lwr $2, 0($[[R1]]) +; MIPS32-EL: lwl $3, 7($[[R1:[0-9]+]]) +; MIPS32-EL: lwr $3, 4($[[R1]]) + +; MIPS32-EB: lwl $2, 0($[[R1:[0-9]+]]) +; MIPS32-EB: lwr $2, 3($[[R1]]) +; MIPS32-EB: lwl $3, 4($[[R1:[0-9]+]]) +; MIPS32-EB: lwr $3, 7($[[R1]]) + +; MIPS32R6: lw $[[PTR:[0-9]+]], %got(sll)( +; MIPS32R6-DAG: lw $2, 0($[[PTR]]) +; MIPS32R6-DAG: lw $3, 4($[[PTR]]) + +; MIPS64-EL: ldl $[[R0:[0-9]+]], 7($[[R1:[0-9]+]]) +; MIPS64-EL: ldr $[[R0]], 0($[[R1]]) + +; MIPS64-EB: ldl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]]) +; MIPS64-EB: ldr $[[R0]], 7($[[R1]]) + +; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(sll)( +; MIPS64R6: ld $2, 0($[[PTR]]) + + %0 = load i64* getelementptr inbounds (%struct.SLL* @sll, i64 0, i32 0), align 1 + ret i64 %0 +} + +define i64 @load_SI_sext_to_i64() nounwind readonly { +entry: +; ALL-LABEL: load_SI_sext_to_i64: + +; MIPS32-EL: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]]) +; MIPS32-EL: lwr $[[R0]], 0($[[R1]]) + +; MIPS32-EB: lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]]) +; MIPS32-EB: lwr $[[R0]], 3($[[R1]]) + +; MIPS32R6: lw $[[PTR:[0-9]+]], %got(si)( +; MIPS32R6-EL: lw $2, 0($[[PTR]]) +; MIPS32R6-EL: sra $3, $2, 31 +; MIPS32R6-EB: lw $3, 0($[[PTR]]) +; MIPS32R6-EB: sra $2, $3, 31 + +; MIPS64-EL: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]]) +; MIPS64-EL: lwr $[[R0]], 0($[[R1]]) + +; MIPS64-EB: lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]]) +; MIPS64-EB: lwr $[[R0]], 3($[[R1]]) + +; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(si)( +; MIPS64R6: lw $2, 0($[[PTR]]) + + %0 = load i32* getelementptr inbounds (%struct.SI* @si, i64 0, i32 0), align 1 + %conv = sext i32 %0 to i64 + ret i64 %conv +} + +define i64 @load_UI() nounwind readonly { +entry: +; ALL-LABEL: load_UI: + +; MIPS32-EL-DAG: lwl $[[R2:2]], 3($[[R1:[0-9]+]]) +; MIPS32-EL-DAG: lwr $[[R2]], 0($[[R1]]) +; MIPS32-EL-DAG: addiu $3, $zero, 0 + +; MIPS32-EB-DAG: lwl $[[R2:3]], 0($[[R1:[0-9]+]]) +; MIPS32-EB-DAG: lwr $[[R2]], 3($[[R1]]) +; MIPS32-EB-DAG: addiu $2, $zero, 0 + +; MIPS32R6: lw $[[PTR:[0-9]+]], %got(sui)( +; MIPS32R6-EL-DAG: lw $2, 0($[[PTR]]) +; MIPS32R6-EL-DAG: addiu $3, $zero, 0 +; MIPS32R6-EB-DAG: lw $3, 0($[[PTR]]) +; MIPS32R6-EB-DAG: addiu $2, $zero, 0 + +; MIPS64-EL-DAG: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]]) +; MIPS64-EL-DAG: lwr $[[R0]], 0($[[R1]]) +; MIPS64-EL-DAG: daddiu $[[R2:[0-9]+]], $zero, 1 +; MIPS64-EL-DAG: dsll $[[R3:[0-9]+]], $[[R2]], 32 +; MIPS64-EL-DAG: daddiu $[[R4:[0-9]+]], $[[R3]], -1 +; MIPS64-EL-DAG: and ${{[0-9]+}}, $[[R0]], $[[R4]] + +; MIPS64-EB: lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]]) +; MIPS64-EB: lwr $[[R0]], 3($[[R1]]) + +; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(sui)( +; MIPS64R6: lwu $2, 0($[[PTR]]) + + %0 = load i32* getelementptr inbounds (%struct.SUI* @sui, i64 0, i32 0), align 1 + %conv = zext i32 %0 to i64 + ret i64 %conv +} + +define void @store_SLL(i64 %a) nounwind { +entry: +; ALL-LABEL: store_SLL: + +; MIPS32-EL-DAG: swl $[[A1:4]], 3($[[R1:[0-9]+]]) +; MIPS32-EL-DAG: swr $[[A1]], 0($[[R1]]) +; MIPS32-EL-DAG: swl $[[A2:5]], 7($[[R1:[0-9]+]]) +; MIPS32-EL-DAG: swr $[[A2]], 4($[[R1]]) + +; MIPS32-EB-DAG: swl $[[A1:4]], 0($[[R1:[0-9]+]]) +; MIPS32-EB-DAG: swr $[[A1]], 3($[[R1]]) +; MIPS32-EB-DAG: swl $[[A1:5]], 4($[[R1:[0-9]+]]) +; MIPS32-EB-DAG: swr $[[A1]], 7($[[R1]]) + +; MIPS32R6-DAG: lw $[[PTR:[0-9]+]], %got(sll)( +; MIPS32R6-DAG: sw $4, 0($[[PTR]]) +; MIPS32R6-DAG: sw $5, 4($[[PTR]]) + +; MIPS64-EL: sdl $[[R0:[0-9]+]], 7($[[R1:[0-9]+]]) +; MIPS64-EL: sdr $[[R0]], 0($[[R1]]) + +; MIPS64-EB: sdl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]]) +; MIPS64-EB: sdr $[[R0]], 7($[[R1]]) + +; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(sll)( +; MIPS64R6: sd $4, 0($[[PTR]]) + + store i64 %a, i64* getelementptr inbounds (%struct.SLL* @sll, i64 0, i32 0), align 1 + ret void +} + +define void @store_SI_trunc_from_i64(i32 %a) nounwind { +entry: +; ALL-LABEL: store_SI_trunc_from_i64: + +; MIPS32-EL: swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]]) +; MIPS32-EL: swr $[[R0]], 0($[[R1]]) + +; MIPS32-EB: swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]]) +; MIPS32-EB: swr $[[R0]], 3($[[R1]]) + +; MIPS32R6: lw $[[PTR:[0-9]+]], %got(si)( +; MIPS32R6: sw $4, 0($[[PTR]]) + +; MIPS64-EL: swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]]) +; MIPS64-EL: swr $[[R0]], 0($[[R1]]) + +; MIPS64-EB: swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]]) +; MIPS64-EB: swr $[[R0]], 3($[[R1]]) + +; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(si)( +; MIPS64R6: sw $4, 0($[[PTR]]) + + store i32 %a, i32* getelementptr inbounds (%struct.SI* @si, i64 0, i32 0), align 1 + ret void +} + +; +; Structures are simply concatenations of the members. They are unaffected by +; endianness +; + +%struct.S0 = type { i8, i8 } +@struct_s0 = common global %struct.S0 zeroinitializer, align 1 +%struct.S1 = type { i16, i16 } +@struct_s1 = common global %struct.S1 zeroinitializer, align 1 +%struct.S2 = type { i32, i32 } +@struct_s2 = common global %struct.S2 zeroinitializer, align 1 + +define void @copy_struct_S0() nounwind { +entry: +; ALL-LABEL: copy_struct_S0: + +; MIPS32-EL: lw $[[PTR:[0-9]+]], %got(struct_s0)( +; MIPS32-EB: lw $[[PTR:[0-9]+]], %got(struct_s0)( +; MIPS32R6: lw $[[PTR:[0-9]+]], %got(struct_s0)( +; MIPS64-EL: ld $[[PTR:[0-9]+]], %got_disp(struct_s0)( +; MIPS64-EB: ld $[[PTR:[0-9]+]], %got_disp(struct_s0)( +; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(struct_s0)( + +; FIXME: We should be able to do better than this on MIPS32r6/MIPS64r6 since +; we have unaligned halfword load/store available +; ALL-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]]) +; ALL-DAG: sb $[[R1]], 2($[[PTR]]) +; ALL-DAG: lbu $[[R1:[0-9]+]], 1($[[PTR]]) +; ALL-DAG: sb $[[R1]], 3($[[PTR]]) + + %0 = load %struct.S0* getelementptr inbounds (%struct.S0* @struct_s0, i32 0), align 1 + store %struct.S0 %0, %struct.S0* getelementptr inbounds (%struct.S0* @struct_s0, i32 1), align 1 + ret void +} + +define void @copy_struct_S1() nounwind { +entry: +; ALL-LABEL: copy_struct_S1: + +; MIPS32-EL: lw $[[PTR:[0-9]+]], %got(struct_s1)( +; MIPS32-EB: lw $[[PTR:[0-9]+]], %got(struct_s1)( +; MIPS32-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS32-DAG: sb $[[R1]], 4($[[PTR]]) +; MIPS32-DAG: lbu $[[R1:[0-9]+]], 1($[[PTR]]) +; MIPS32-DAG: sb $[[R1]], 5($[[PTR]]) +; MIPS32-DAG: lbu $[[R1:[0-9]+]], 2($[[PTR]]) +; MIPS32-DAG: sb $[[R1]], 6($[[PTR]]) +; MIPS32-DAG: lbu $[[R1:[0-9]+]], 3($[[PTR]]) +; MIPS32-DAG: sb $[[R1]], 7($[[PTR]]) + +; MIPS32R6: lw $[[PTR:[0-9]+]], %got(struct_s1)( +; MIPS32R6-DAG: lhu $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS32R6-DAG: sh $[[R1]], 4($[[PTR]]) +; MIPS32R6-DAG: lhu $[[R1:[0-9]+]], 2($[[PTR]]) +; MIPS32R6-DAG: sh $[[R1]], 6($[[PTR]]) + +; MIPS64-EL: ld $[[PTR:[0-9]+]], %got_disp(struct_s1)( +; MIPS64-EB: ld $[[PTR:[0-9]+]], %got_disp(struct_s1)( +; MIPS64-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS64-DAG: sb $[[R1]], 4($[[PTR]]) +; MIPS64-DAG: lbu $[[R1:[0-9]+]], 1($[[PTR]]) +; MIPS64-DAG: sb $[[R1]], 5($[[PTR]]) +; MIPS64-DAG: lbu $[[R1:[0-9]+]], 2($[[PTR]]) +; MIPS64-DAG: sb $[[R1]], 6($[[PTR]]) +; MIPS64-DAG: lbu $[[R1:[0-9]+]], 3($[[PTR]]) +; MIPS64-DAG: sb $[[R1]], 7($[[PTR]]) + +; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(struct_s1)( +; MIPS64R6-DAG: lhu $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS64R6-DAG: sh $[[R1]], 4($[[PTR]]) +; MIPS64R6-DAG: lhu $[[R1:[0-9]+]], 2($[[PTR]]) +; MIPS64R6-DAG: sh $[[R1]], 6($[[PTR]]) + + %0 = load %struct.S1* getelementptr inbounds (%struct.S1* @struct_s1, i32 0), align 1 + store %struct.S1 %0, %struct.S1* getelementptr inbounds (%struct.S1* @struct_s1, i32 1), align 1 + ret void +} + +define void @copy_struct_S2() nounwind { +entry: +; ALL-LABEL: copy_struct_S2: + +; MIPS32-EL: lw $[[PTR:[0-9]+]], %got(struct_s2)( +; MIPS32-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]]) +; MIPS32-EL-DAG: lwr $[[R1]], 0($[[PTR]]) +; MIPS32-EL-DAG: swl $[[R1]], 11($[[PTR]]) +; MIPS32-EL-DAG: swr $[[R1]], 8($[[PTR]]) +; MIPS32-EL-DAG: lwl $[[R1:[0-9]+]], 7($[[PTR]]) +; MIPS32-EL-DAG: lwr $[[R1]], 4($[[PTR]]) +; MIPS32-EL-DAG: swl $[[R1]], 15($[[PTR]]) +; MIPS32-EL-DAG: swr $[[R1]], 12($[[PTR]]) + +; MIPS32-EB: lw $[[PTR:[0-9]+]], %got(struct_s2)( +; MIPS32-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS32-EB-DAG: lwr $[[R1]], 3($[[PTR]]) +; MIPS32-EB-DAG: swl $[[R1]], 8($[[PTR]]) +; MIPS32-EB-DAG: swr $[[R1]], 11($[[PTR]]) +; MIPS32-EB-DAG: lwl $[[R1:[0-9]+]], 4($[[PTR]]) +; MIPS32-EB-DAG: lwr $[[R1]], 7($[[PTR]]) +; MIPS32-EB-DAG: swl $[[R1]], 12($[[PTR]]) +; MIPS32-EB-DAG: swr $[[R1]], 15($[[PTR]]) + +; MIPS32R6: lw $[[PTR:[0-9]+]], %got(struct_s2)( +; MIPS32R6-DAG: lw $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS32R6-DAG: sw $[[R1]], 8($[[PTR]]) +; MIPS32R6-DAG: lw $[[R1:[0-9]+]], 4($[[PTR]]) +; MIPS32R6-DAG: sw $[[R1]], 12($[[PTR]]) + +; MIPS64-EL: ld $[[PTR:[0-9]+]], %got_disp(struct_s2)( +; MIPS64-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]]) +; MIPS64-EL-DAG: lwr $[[R1]], 0($[[PTR]]) +; MIPS64-EL-DAG: swl $[[R1]], 11($[[PTR]]) +; MIPS64-EL-DAG: swr $[[R1]], 8($[[PTR]]) +; MIPS64-EL-DAG: lwl $[[R1:[0-9]+]], 7($[[PTR]]) +; MIPS64-EL-DAG: lwr $[[R1]], 4($[[PTR]]) +; MIPS64-EL-DAG: swl $[[R1]], 15($[[PTR]]) +; MIPS64-EL-DAG: swr $[[R1]], 12($[[PTR]]) + +; MIPS64-EB: ld $[[PTR:[0-9]+]], %got_disp(struct_s2)( +; MIPS64-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS64-EB-DAG: lwr $[[R1]], 3($[[PTR]]) +; MIPS64-EB-DAG: swl $[[R1]], 8($[[PTR]]) +; MIPS64-EB-DAG: swr $[[R1]], 11($[[PTR]]) +; MIPS64-EB-DAG: lwl $[[R1:[0-9]+]], 4($[[PTR]]) +; MIPS64-EB-DAG: lwr $[[R1]], 7($[[PTR]]) +; MIPS64-EB-DAG: swl $[[R1]], 12($[[PTR]]) +; MIPS64-EB-DAG: swr $[[R1]], 15($[[PTR]]) + +; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(struct_s2)( +; MIPS64R6-DAG: lw $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS64R6-DAG: sw $[[R1]], 8($[[PTR]]) +; MIPS64R6-DAG: lw $[[R1:[0-9]+]], 4($[[PTR]]) +; MIPS64R6-DAG: sw $[[R1]], 12($[[PTR]]) + + %0 = load %struct.S2* getelementptr inbounds (%struct.S2* @struct_s2, i32 0), align 1 + store %struct.S2 %0, %struct.S2* getelementptr inbounds (%struct.S2* @struct_s2, i32 1), align 1 + ret void +} + +; +; Arrays are simply concatenations of the members. They are unaffected by +; endianness +; + +@arr = common global [7 x i8] zeroinitializer, align 1 + +define void @pass_array_byval() nounwind { +entry: +; ALL-LABEL: pass_array_byval: + +; MIPS32-EL: lw $[[SPTR:[0-9]+]], %got(arr)( +; MIPS32-EL-DAG: lwl $[[R1:4]], 3($[[PTR]]) +; MIPS32-EL-DAG: lwr $[[R1]], 0($[[PTR]]) +; MIPS32-EL-DAG: lbu $[[R2:[0-9]+]], 4($[[PTR]]) +; MIPS32-EL-DAG: lbu $[[R3:[0-9]+]], 5($[[PTR]]) +; MIPS32-EL-DAG: sll $[[T0:[0-9]+]], $[[R3]], 8 +; MIPS32-EL-DAG: or $[[T1:[0-9]+]], $[[T0]], $[[R2]] +; MIPS32-EL-DAG: lbu $[[R4:[0-9]+]], 6($[[PTR]]) +; MIPS32-EL-DAG: sll $[[T2:[0-9]+]], $[[R4]], 16 +; MIPS32-EL-DAG: or $5, $[[T1]], $[[T2]] + +; MIPS32-EB: lw $[[SPTR:[0-9]+]], %got(arr)( +; MIPS32-EB-DAG: lwl $[[R1:4]], 0($[[PTR]]) +; MIPS32-EB-DAG: lwr $[[R1]], 3($[[PTR]]) +; MIPS32-EB-DAG: lbu $[[R2:[0-9]+]], 5($[[PTR]]) +; MIPS32-EB-DAG: lbu $[[R3:[0-9]+]], 4($[[PTR]]) +; MIPS32-EB-DAG: sll $[[T0:[0-9]+]], $[[R3]], 8 +; MIPS32-EB-DAG: or $[[T1:[0-9]+]], $[[T0]], $[[R2]] +; MIPS32-EB-DAG: sll $[[T1]], $[[T1]], 16 +; MIPS32-EB-DAG: lbu $[[R4:[0-9]+]], 6($[[PTR]]) +; MIPS32-EB-DAG: sll $[[T2:[0-9]+]], $[[R4]], 8 +; MIPS32-EB-DAG: or $5, $[[T1]], $[[T2]] + +; MIPS32R6: lw $[[SPTR:[0-9]+]], %got(arr)( +; MIPS32R6-DAG: lw $4, 0($[[PTR]]) +; MIPS32R6-EL-DAG: lhu $[[R2:[0-9]+]], 4($[[PTR]]) +; MIPS32R6-EL-DAG: lbu $[[R3:[0-9]+]], 6($[[PTR]]) +; MIPS32R6-EL-DAG: sll $[[T0:[0-9]+]], $[[R3]], 16 +; MIPS32R6-EL-DAG: or $5, $[[R2]], $[[T0]] + +; MIPS32R6-EB-DAG: lhu $[[R2:[0-9]+]], 4($[[PTR]]) +; MIPS32R6-EB-DAG: lbu $[[R3:[0-9]+]], 6($[[PTR]]) +; MIPS32R6-EB-DAG: sll $[[T0:[0-9]+]], $[[R2]], 16 +; MIPS32R6-EB-DAG: or $5, $[[T0]], $[[R3]] + +; MIPS64-EL: ld $[[SPTR:[0-9]+]], %got_disp(arr)( +; MIPS64-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]]) +; MIPS64-EL-DAG: lwr $[[R1]], 0($[[PTR]]) + +; MIPS64-EB: ld $[[SPTR:[0-9]+]], %got_disp(arr)( +; MIPS64-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS64-EB-DAG: lwr $[[R1]], 3($[[PTR]]) +; MIPS64-EB-DAG: dsll $[[R1]], $[[R1]], 32 +; MIPS64-EB-DAG: lbu $[[R2:[0-9]+]], 5($[[PTR]]) +; MIPS64-EB-DAG: lbu $[[R3:[0-9]+]], 4($[[PTR]]) +; MIPS64-EB-DAG: dsll $[[T0:[0-9]+]], $[[R3]], 8 +; MIPS64-EB-DAG: or $[[T1:[0-9]+]], $[[T0]], $[[R2]] +; MIPS64-EB-DAG: dsll $[[T1]], $[[T1]], 16 +; MIPS64-EB-DAG: or $[[T3:[0-9]+]], $[[R1]], $[[T1]] +; MIPS64-EB-DAG: lbu $[[R4:[0-9]+]], 6($[[PTR]]) +; MIPS64-EB-DAG: dsll $[[T4:[0-9]+]], $[[R4]], 8 +; MIPS64-EB-DAG: or $4, $[[T3]], $[[T4]] + +; MIPS64R6: ld $[[SPTR:[0-9]+]], %got_disp(arr)( + + tail call void @extern_func([7 x i8]* byval @arr) nounwind + ret void +} + +declare void @extern_func([7 x i8]* byval) diff --git a/test/CodeGen/Mips/mips64load-store-left-right.ll b/test/CodeGen/Mips/mips64load-store-left-right.ll deleted file mode 100644 index c9ba467e6c1b..000000000000 --- a/test/CodeGen/Mips/mips64load-store-left-right.ll +++ /dev/null @@ -1,75 +0,0 @@ -; RUN: llc -march=mips64el -mcpu=mips4 -mattr=n64 < %s | FileCheck -check-prefix=EL %s -; RUN: llc -march=mips64 -mcpu=mips4 -mattr=n64 < %s | FileCheck -check-prefix=EB %s -; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | FileCheck -check-prefix=EL %s -; RUN: llc -march=mips64 -mcpu=mips64 -mattr=n64 < %s | FileCheck -check-prefix=EB %s - -%struct.SLL = type { i64 } -%struct.SI = type { i32 } -%struct.SUI = type { i32 } - -@sll = common global %struct.SLL zeroinitializer, align 1 -@si = common global %struct.SI zeroinitializer, align 1 -@sui = common global %struct.SUI zeroinitializer, align 1 - -define i64 @foo_load_ll() nounwind readonly { -entry: -; EL: ldl $[[R0:[0-9]+]], 7($[[R1:[0-9]+]]) -; EL: ldr $[[R0]], 0($[[R1]]) -; EB: ldl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]]) -; EB: ldr $[[R0]], 7($[[R1]]) - - %0 = load i64* getelementptr inbounds (%struct.SLL* @sll, i64 0, i32 0), align 1 - ret i64 %0 -} - -define i64 @foo_load_i() nounwind readonly { -entry: -; EL: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]]) -; EL: lwr $[[R0]], 0($[[R1]]) -; EB: lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]]) -; EB: lwr $[[R0]], 3($[[R1]]) - - %0 = load i32* getelementptr inbounds (%struct.SI* @si, i64 0, i32 0), align 1 - %conv = sext i32 %0 to i64 - ret i64 %conv -} - -define i64 @foo_load_ui() nounwind readonly { -entry: -; EL: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]]) -; EL: lwr $[[R0]], 0($[[R1]]) -; EL: daddiu $[[R2:[0-9]+]], $zero, 1 -; EL: dsll $[[R3:[0-9]+]], $[[R2]], 32 -; EL: daddiu $[[R4:[0-9]+]], $[[R3]], -1 -; EL: and ${{[0-9]+}}, $[[R0]], $[[R4]] -; EB: lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]]) -; EB: lwr $[[R0]], 3($[[R1]]) - - - %0 = load i32* getelementptr inbounds (%struct.SUI* @sui, i64 0, i32 0), align 1 - %conv = zext i32 %0 to i64 - ret i64 %conv -} - -define void @foo_store_ll(i64 %a) nounwind { -entry: -; EL: sdl $[[R0:[0-9]+]], 7($[[R1:[0-9]+]]) -; EL: sdr $[[R0]], 0($[[R1]]) -; EB: sdl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]]) -; EB: sdr $[[R0]], 7($[[R1]]) - - store i64 %a, i64* getelementptr inbounds (%struct.SLL* @sll, i64 0, i32 0), align 1 - ret void -} - -define void @foo_store_i(i32 %a) nounwind { -entry: -; EL: swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]]) -; EL: swr $[[R0]], 0($[[R1]]) -; EB: swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]]) -; EB: swr $[[R0]], 3($[[R1]]) - - store i32 %a, i32* getelementptr inbounds (%struct.SI* @si, i64 0, i32 0), align 1 - ret void -} - diff --git a/test/CodeGen/Mips/unalignedload.ll b/test/CodeGen/Mips/unalignedload.ll index e86b1bae113d..2002b1c60abe 100644 --- a/test/CodeGen/Mips/unalignedload.ll +++ b/test/CodeGen/Mips/unalignedload.ll @@ -1,5 +1,9 @@ -; RUN: llc < %s -march=mipsel | FileCheck %s -check-prefix=ALL -check-prefix=CHECK-EL -; RUN: llc < %s -march=mips | FileCheck %s -check-prefix=ALL -check-prefix=CHECK-EB +; RUN: llc < %s -march=mipsel -mcpu=mips32 | FileCheck %s -check-prefix=ALL -check-prefix=ALL-EL -check-prefix=MIPS32-EL +; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s -check-prefix=ALL -check-prefix=ALL-EB -check-prefix=MIPS32-EB +; RUN: llc < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=ALL -check-prefix=ALL-EL -check-prefix=MIPS32-EL +; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s -check-prefix=ALL -check-prefix=ALL-EB -check-prefix=MIPS32-EB +; RUN: llc < %s -march=mipsel -mcpu=mips32r6 | FileCheck %s -check-prefix=ALL -check-prefix=ALL-EL -check-prefix=MIPS32R6-EL +; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s -check-prefix=ALL -check-prefix=ALL-EB -check-prefix=MIPS32R6-EB %struct.S2 = type { %struct.S1, %struct.S1 } %struct.S1 = type { i8, i8 } %struct.S4 = type { [7 x i8] } @@ -11,17 +15,20 @@ define void @bar1() nounwind { entry: ; ALL-LABEL: bar1: -; ALL-DAG: lw $[[R0:[0-9]+]], %got(s2)( +; ALL-DAG: lw $[[R0:[0-9]+]], %got(s2)( -; ALL-DAG: lbu $[[PART1:[0-9]+]], 2($[[R0]]) -; ALL-DAG: lbu $[[PART2:[0-9]+]], 3($[[R0]]) +; MIPS32-EL-DAG: lbu $[[PART1:[0-9]+]], 2($[[R0]]) +; MIPS32-EL-DAG: lbu $[[PART2:[0-9]+]], 3($[[R0]]) +; MIPS32-EL-DAG: sll $[[T0:[0-9]+]], $[[PART2]], 8 +; MIPS32-EL-DAG: or $4, $[[T0]], $[[PART1]] -; CHECK-EL-DAG: sll $[[T0:[0-9]+]], $[[PART2]], 8 -; CHECK-EL-DAG: or $4, $[[T0]], $[[PART1]] +; MIPS32-EB-DAG: lbu $[[PART1:[0-9]+]], 2($[[R0]]) +; MIPS32-EB-DAG: lbu $[[PART2:[0-9]+]], 3($[[R0]]) +; MIPS32-EB-DAG: sll $[[T0:[0-9]+]], $[[PART1]], 8 +; MIPS32-EB-DAG: or $[[T1:[0-9]+]], $[[T0]], $[[PART2]] +; MIPS32-EB-DAG: sll $4, $[[T1]], 16 -; CHECK-EB-DAG: sll $[[T0:[0-9]+]], $[[PART1]], 8 -; CHECK-EB-DAG: or $[[T1:[0-9]+]], $[[T0]], $[[PART2]] -; CHECK-EB-DAG: sll $4, $[[T1]], 16 +; MIPS32R6-DAG: lhu $[[PART1:[0-9]+]], 2($[[R0]]) tail call void @foo2(%struct.S1* byval getelementptr inbounds (%struct.S2* @s2, i32 0, i32 1)) nounwind ret void @@ -31,13 +38,43 @@ define void @bar2() nounwind { entry: ; ALL-LABEL: bar2: -; ALL-DAG: lw $[[R2:[0-9]+]], %got(s4)( +; ALL-DAG: lw $[[R2:[0-9]+]], %got(s4)( -; CHECK-EL-DAG: lwl $[[R1:4]], 3($[[R2]]) -; CHECK-EL-DAG: lwr $[[R1]], 0($[[R2]]) +; MIPS32-EL-DAG: lwl $[[R1:4]], 3($[[R2]]) +; MIPS32-EL-DAG: lwr $[[R1]], 0($[[R2]]) +; MIPS32-EL-DAG: lbu $[[T0:[0-9]+]], 4($[[R2]]) +; MIPS32-EL-DAG: lbu $[[T1:[0-9]+]], 5($[[R2]]) +; MIPS32-EL-DAG: lbu $[[T2:[0-9]+]], 6($[[R2]]) +; MIPS32-EL-DAG: sll $[[T3:[0-9]+]], $[[T1]], 8 +; MIPS32-EL-DAG: or $[[T4:[0-9]+]], $[[T3]], $[[T0]] +; MIPS32-EL-DAG: sll $[[T5:[0-9]+]], $[[T2]], 16 +; MIPS32-EL-DAG: or $5, $[[T4]], $[[T5]] -; CHECK-EB-DAG: lwl $[[R1:4]], 0($[[R2]]) -; CHECK-EB-DAG: lwr $[[R1]], 3($[[R2]]) +; MIPS32-EB-DAG: lwl $[[R1:4]], 0($[[R2]]) +; MIPS32-EB-DAG: lwr $[[R1]], 3($[[R2]]) +; MIPS32-EB-DAG: lbu $[[T0:[0-9]+]], 4($[[R2]]) +; MIPS32-EB-DAG: lbu $[[T1:[0-9]+]], 5($[[R2]]) +; MIPS32-EB-DAG: lbu $[[T2:[0-9]+]], 6($[[R2]]) +; MIPS32-EB-DAG: sll $[[T3:[0-9]+]], $[[T0]], 8 +; MIPS32-EB-DAG: or $[[T4:[0-9]+]], $[[T3]], $[[T1]] +; MIPS32-EB-DAG: sll $[[T5:[0-9]+]], $[[T4]], 16 +; MIPS32-EB-DAG: sll $[[T6:[0-9]+]], $[[T2]], 8 +; MIPS32-EB-DAG: or $5, $[[T5]], $[[T6]] + +; FIXME: We should be able to do better than this using lhu +; MIPS32R6-EL-DAG: lw $4, 0($[[R2]]) +; MIPS32R6-EL-DAG: lhu $[[T0:[0-9]+]], 4($[[R2]]) +; MIPS32R6-EL-DAG: lbu $[[T1:[0-9]+]], 6($[[R2]]) +; MIPS32R6-EL-DAG: sll $[[T2:[0-9]+]], $[[T1]], 16 +; MIPS32R6-EL-DAG: or $5, $[[T0]], $[[T2]] + +; FIXME: We should be able to do better than this using lhu +; MIPS32R6-EB-DAG: lw $4, 0($[[R2]]) +; MIPS32R6-EB-DAG: lhu $[[T0:[0-9]+]], 4($[[R2]]) +; MIPS32R6-EB-DAG: lbu $[[T1:[0-9]+]], 6($[[R2]]) +; MIPS32R6-EB-DAG: sll $[[T2:[0-9]+]], $[[T0]], 16 +; MIPS32R6-EB-DAG: sll $[[T3:[0-9]+]], $[[T1]], 8 +; MIPS32R6-EB-DAG: or $5, $[[T2]], $[[T3]] tail call void @foo4(%struct.S4* byval @s4) nounwind ret void diff --git a/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s b/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s new file mode 100644 index 000000000000..aee068a93a8f --- /dev/null +++ b/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s @@ -0,0 +1,15 @@ +# Instructions that are invalid +# +# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 \ +# RUN: 2>%t1 +# RUN: FileCheck %s < %t1 + + .set noat + lwl $s4,-4231($15) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lwr $zero,-19147($gp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + swl $15,13694($s3) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + swr $s1,-26590($14) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lwle $s4,-4231($15) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + lwre $zero,-19147($gp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + swle $15,13694($s3) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + swre $s1,-26590($14) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction diff --git a/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s b/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s new file mode 100644 index 000000000000..f7949bb00282 --- /dev/null +++ b/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s @@ -0,0 +1,15 @@ +# Instructions that are invalid +# +# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 \ +# RUN: 2>%t1 +# RUN: FileCheck %s < %t1 + + .set noat + lwl $s4,-4231($15) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lwr $zero,-19147($gp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + swl $15,13694($s3) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + swr $s1,-26590($14) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lwle $s4,-4231($15) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + lwre $zero,-19147($gp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + swle $15,13694($s3) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + swre $s1,-26590($14) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction diff --git a/test/MC/Mips/mips64r6/invalid-mips3-wrong-error.s b/test/MC/Mips/mips64r6/invalid-mips3-wrong-error.s new file mode 100644 index 000000000000..7424f493bf56 --- /dev/null +++ b/test/MC/Mips/mips64r6/invalid-mips3-wrong-error.s @@ -0,0 +1,23 @@ +# Instructions that are invalid +# +# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 \ +# RUN: 2>%t1 +# RUN: FileCheck %s < %t1 + + .set noat + ldl $s4,-4231($15) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + ldr $zero,-19147($gp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + sdl $15,13694($s3) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + sdr $s1,-26590($14) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + ldle $s4,-4231($15) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + ldre $zero,-19147($gp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + sdle $15,13694($s3) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + sdre $s1,-26590($14) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + lwl $s4,-4231($15) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lwr $zero,-19147($gp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + swl $15,13694($s3) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + swr $s1,-26590($14) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lwle $s4,-4231($15) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + lwre $zero,-19147($gp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + swle $15,13694($s3) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction + swre $s1,-26590($14) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction diff --git a/test/MC/Mips/mips64r6/invalid-mips3.s b/test/MC/Mips/mips64r6/invalid-mips3.s new file mode 100644 index 000000000000..1225005ec84e --- /dev/null +++ b/test/MC/Mips/mips64r6/invalid-mips3.s @@ -0,0 +1,8 @@ +# Instructions that are invalid +# +# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 \ +# RUN: 2>%t1 +# RUN: FileCheck %s < %t1 + + .set noat + addi $13,$9,26322 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled From ff87630a77a49b43cc4c91cd0ad70d2b4e2de63d Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 23 May 2014 13:24:08 +0000 Subject: [PATCH 109/906] [mips][mips64r6] t(eq|ge|lt|ne)i and t(ge|lt)iu are not available in MIPS32r6/MIPS64r6 Summary: Depends on D3872 Reviewers: jkolek, zoran.jovanovic, vmedic Reviewed By: vmedic Differential Revision: http://reviews.llvm.org/D3891 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209513 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips32r6InstrInfo.td | 1 - lib/Target/Mips/MipsInstrInfo.td | 21 +++++++++++++++------ test/MC/Mips/mips32r6/invalid-mips2.s | 14 ++++++++++++++ test/MC/Mips/mips64r6/invalid-mips2.s | 14 ++++++++++++++ test/MC/Mips/mips64r6/invalid-mips3.s | 6 ++++++ 5 files changed, 49 insertions(+), 7 deletions(-) create mode 100644 test/MC/Mips/mips32r6/invalid-mips2.s create mode 100644 test/MC/Mips/mips64r6/invalid-mips2.s diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td index 9755159e62d4..ffaf9657b6c7 100644 --- a/lib/Target/Mips/Mips32r6InstrInfo.td +++ b/lib/Target/Mips/Mips32r6InstrInfo.td @@ -46,7 +46,6 @@ include "Mips32r6InstrFormats.td" // Removed: sdxc1 // Removed: suxc1 // Removed: swxc1 -// Removed: teqi, tgei, tgeiu, tlti, tltiu, tnei // Rencoded: [ls][wd]c2 def brtarget21 : Operand { diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index b66501966a43..0d3cb7578e25 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -224,6 +224,9 @@ class ISA_MIPS1_NOT_32R6_64R6 { list InsnPredicates = [NotMips32r6, NotMips64r6]; } class ISA_MIPS2 { list InsnPredicates = [HasMips2]; } +class ISA_MIPS2_NOT_32R6_64R6 { + list InsnPredicates = [HasMips2, NotMips32r6, NotMips64r6]; +} class ISA_MIPS3 { list InsnPredicates = [HasMips3]; } class ISA_MIPS3_NOT_32R6_64R6 { list InsnPredicates = [HasMips3, NotMips32r6, NotMips64r6]; @@ -1108,12 +1111,18 @@ def TLT : MMRel, TEQ_FT<"tlt", GPR32Opnd>, TEQ_FM<0x32>; def TLTU : MMRel, TEQ_FT<"tltu", GPR32Opnd>, TEQ_FM<0x33>; def TNE : MMRel, TEQ_FT<"tne", GPR32Opnd>, TEQ_FM<0x36>; -def TEQI : MMRel, TEQI_FT<"teqi", GPR32Opnd>, TEQI_FM<0xc>, ISA_MIPS2; -def TGEI : MMRel, TEQI_FT<"tgei", GPR32Opnd>, TEQI_FM<0x8>, ISA_MIPS2; -def TGEIU : MMRel, TEQI_FT<"tgeiu", GPR32Opnd>, TEQI_FM<0x9>, ISA_MIPS2; -def TLTI : MMRel, TEQI_FT<"tlti", GPR32Opnd>, TEQI_FM<0xa>, ISA_MIPS2; -def TTLTIU : MMRel, TEQI_FT<"tltiu", GPR32Opnd>, TEQI_FM<0xb>, ISA_MIPS2; -def TNEI : MMRel, TEQI_FT<"tnei", GPR32Opnd>, TEQI_FM<0xe>, ISA_MIPS2; +def TEQI : MMRel, TEQI_FT<"teqi", GPR32Opnd>, TEQI_FM<0xc>, + ISA_MIPS2_NOT_32R6_64R6; +def TGEI : MMRel, TEQI_FT<"tgei", GPR32Opnd>, TEQI_FM<0x8>, + ISA_MIPS2_NOT_32R6_64R6; +def TGEIU : MMRel, TEQI_FT<"tgeiu", GPR32Opnd>, TEQI_FM<0x9>, + ISA_MIPS2_NOT_32R6_64R6; +def TLTI : MMRel, TEQI_FT<"tlti", GPR32Opnd>, TEQI_FM<0xa>, + ISA_MIPS2_NOT_32R6_64R6; +def TTLTIU : MMRel, TEQI_FT<"tltiu", GPR32Opnd>, TEQI_FM<0xb>, + ISA_MIPS2_NOT_32R6_64R6; +def TNEI : MMRel, TEQI_FT<"tnei", GPR32Opnd>, TEQI_FM<0xe>, + ISA_MIPS2_NOT_32R6_64R6; def BREAK : MMRel, BRK_FT<"break">, BRK_FM<0xd>; def SYSCALL : MMRel, SYS_FT<"syscall">, SYS_FM<0xc>; diff --git a/test/MC/Mips/mips32r6/invalid-mips2.s b/test/MC/Mips/mips32r6/invalid-mips2.s new file mode 100644 index 000000000000..0638e78ecb4c --- /dev/null +++ b/test/MC/Mips/mips32r6/invalid-mips2.s @@ -0,0 +1,14 @@ +# Instructions that are invalid +# +# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 \ +# RUN: 2>%t1 +# RUN: FileCheck %s < %t1 + + .set noat + addi $13,$9,26322 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + teqi $s5,-17504 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + tgei $s1,5025 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + tgeiu $sp,-28621 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + tlti $14,-21059 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + tltiu $ra,-5076 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + tnei $12,-29647 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled diff --git a/test/MC/Mips/mips64r6/invalid-mips2.s b/test/MC/Mips/mips64r6/invalid-mips2.s new file mode 100644 index 000000000000..0638e78ecb4c --- /dev/null +++ b/test/MC/Mips/mips64r6/invalid-mips2.s @@ -0,0 +1,14 @@ +# Instructions that are invalid +# +# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 \ +# RUN: 2>%t1 +# RUN: FileCheck %s < %t1 + + .set noat + addi $13,$9,26322 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + teqi $s5,-17504 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + tgei $s1,5025 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + tgeiu $sp,-28621 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + tlti $14,-21059 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + tltiu $ra,-5076 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + tnei $12,-29647 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled diff --git a/test/MC/Mips/mips64r6/invalid-mips3.s b/test/MC/Mips/mips64r6/invalid-mips3.s index 1225005ec84e..0638e78ecb4c 100644 --- a/test/MC/Mips/mips64r6/invalid-mips3.s +++ b/test/MC/Mips/mips64r6/invalid-mips3.s @@ -6,3 +6,9 @@ .set noat addi $13,$9,26322 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + teqi $s5,-17504 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + tgei $s1,5025 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + tgeiu $sp,-28621 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + tlti $14,-21059 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + tltiu $ra,-5076 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + tnei $12,-29647 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled From b3fa233048eb74109d0274bda43e3a829950d4e8 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 23 May 2014 13:35:24 +0000 Subject: [PATCH 110/906] [mips] Work around inconsistency in llvm-mc's placement of fixup markers Summary: Add a second fixup table to MipsAsmBackend::getFixupKindInfo() to correctly position llvm-mc's fixup placeholders for big-endian. See PR19836 for full details of the issue. To summarize, the fixup placeholders do not account for endianness properly and the implementations of getFixupKindInfo() for each target are measuring MCFixupKindInfo.TargetOffset from different ends of the instruction encoding to compensate. Reviewers: jkolek, zoran.jovanovic, vmedic Reviewed By: vmedic Differential Revision: http://reviews.llvm.org/D3889 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209514 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Mips/MCTargetDesc/MipsAsmBackend.cpp | 64 ++++++++++++++++++- test/MC/Mips/llvm-mc-fixup-endianness.s | 6 ++ test/MC/Mips/mips_directives.s | 2 +- test/MC/Mips/mips_gprel16.s | 3 + test/MC/Mips/msa/test_cbranch.s | 20 +++--- 5 files changed, 82 insertions(+), 13 deletions(-) create mode 100644 test/MC/Mips/llvm-mc-fixup-endianness.s diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 153974e470f9..332f7ea7a261 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -189,7 +189,7 @@ void MipsAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, const MCFixupKindInfo &MipsAsmBackend:: getFixupKindInfo(MCFixupKind Kind) const { - const static MCFixupKindInfo Infos[Mips::NumTargetFixupKinds] = { + const static MCFixupKindInfo LittleEndianInfos[Mips::NumTargetFixupKinds] = { // This table *must* be in same the order of fixup_* kinds in // MipsFixupKinds.h. // @@ -246,12 +246,72 @@ getFixupKindInfo(MCFixupKind Kind) const { { "fixup_MICROMIPS_TLS_TPREL_LO16", 0, 16, 0 } }; + const static MCFixupKindInfo BigEndianInfos[Mips::NumTargetFixupKinds] = { + // This table *must* be in same the order of fixup_* kinds in + // MipsFixupKinds.h. + // + // name offset bits flags + { "fixup_Mips_16", 16, 16, 0 }, + { "fixup_Mips_32", 0, 32, 0 }, + { "fixup_Mips_REL32", 0, 32, 0 }, + { "fixup_Mips_26", 6, 26, 0 }, + { "fixup_Mips_HI16", 16, 16, 0 }, + { "fixup_Mips_LO16", 16, 16, 0 }, + { "fixup_Mips_GPREL16", 16, 16, 0 }, + { "fixup_Mips_LITERAL", 16, 16, 0 }, + { "fixup_Mips_GOT_Global", 16, 16, 0 }, + { "fixup_Mips_GOT_Local", 16, 16, 0 }, + { "fixup_Mips_PC16", 16, 16, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Mips_CALL16", 16, 16, 0 }, + { "fixup_Mips_GPREL32", 0, 32, 0 }, + { "fixup_Mips_SHIFT5", 21, 5, 0 }, + { "fixup_Mips_SHIFT6", 21, 5, 0 }, + { "fixup_Mips_64", 0, 64, 0 }, + { "fixup_Mips_TLSGD", 16, 16, 0 }, + { "fixup_Mips_GOTTPREL", 16, 16, 0 }, + { "fixup_Mips_TPREL_HI", 16, 16, 0 }, + { "fixup_Mips_TPREL_LO", 16, 16, 0 }, + { "fixup_Mips_TLSLDM", 16, 16, 0 }, + { "fixup_Mips_DTPREL_HI", 16, 16, 0 }, + { "fixup_Mips_DTPREL_LO", 16, 16, 0 }, + { "fixup_Mips_Branch_PCRel",16, 16, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Mips_GPOFF_HI", 16, 16, 0 }, + { "fixup_Mips_GPOFF_LO", 16, 16, 0 }, + { "fixup_Mips_GOT_PAGE", 16, 16, 0 }, + { "fixup_Mips_GOT_OFST", 16, 16, 0 }, + { "fixup_Mips_GOT_DISP", 16, 16, 0 }, + { "fixup_Mips_HIGHER", 16, 16, 0 }, + { "fixup_Mips_HIGHEST", 16, 16, 0 }, + { "fixup_Mips_GOT_HI16", 16, 16, 0 }, + { "fixup_Mips_GOT_LO16", 16, 16, 0 }, + { "fixup_Mips_CALL_HI16", 16, 16, 0 }, + { "fixup_Mips_CALL_LO16", 16, 16, 0 }, + { "fixup_MICROMIPS_26_S1", 6, 26, 0 }, + { "fixup_MICROMIPS_HI16", 16, 16, 0 }, + { "fixup_MICROMIPS_LO16", 16, 16, 0 }, + { "fixup_MICROMIPS_GOT16", 16, 16, 0 }, + { "fixup_MICROMIPS_PC16_S1",16, 16, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_MICROMIPS_CALL16", 16, 16, 0 }, + { "fixup_MICROMIPS_GOT_DISP", 16, 16, 0 }, + { "fixup_MICROMIPS_GOT_PAGE", 16, 16, 0 }, + { "fixup_MICROMIPS_GOT_OFST", 16, 16, 0 }, + { "fixup_MICROMIPS_TLS_GD", 16, 16, 0 }, + { "fixup_MICROMIPS_TLS_LDM", 16, 16, 0 }, + { "fixup_MICROMIPS_TLS_DTPREL_HI16", 16, 16, 0 }, + { "fixup_MICROMIPS_TLS_DTPREL_LO16", 16, 16, 0 }, + { "fixup_MICROMIPS_TLS_TPREL_HI16", 16, 16, 0 }, + { "fixup_MICROMIPS_TLS_TPREL_LO16", 16, 16, 0 } + }; + if (Kind < FirstTargetFixupKind) return MCAsmBackend::getFixupKindInfo(Kind); assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && "Invalid kind!"); - return Infos[Kind - FirstTargetFixupKind]; + + if (IsLittle) + return LittleEndianInfos[Kind - FirstTargetFixupKind]; + return BigEndianInfos[Kind - FirstTargetFixupKind]; } /// WriteNopData - Write an (optimal) nop sequence of Count bytes diff --git a/test/MC/Mips/llvm-mc-fixup-endianness.s b/test/MC/Mips/llvm-mc-fixup-endianness.s new file mode 100644 index 000000000000..bc6a5d96632c --- /dev/null +++ b/test/MC/Mips/llvm-mc-fixup-endianness.s @@ -0,0 +1,6 @@ +# RUN: llvm-mc -show-encoding -mcpu=mips32 -triple mips-unknown-unknown %s | FileCheck -check-prefix=BE %s +# RUN: llvm-mc -show-encoding -mcpu=mips32 -triple mipsel-unknown-unknown %s | FileCheck -check-prefix=LE %s +# + .text + b foo # BE: b foo # encoding: [0x10,0x00,A,A] + # LE: b foo # encoding: [A,A,0x00,0x10] diff --git a/test/MC/Mips/mips_directives.s b/test/MC/Mips/mips_directives.s index 6780dd0b3ca5..1a7d61f3ad4f 100644 --- a/test/MC/Mips/mips_directives.s +++ b/test/MC/Mips/mips_directives.s @@ -51,7 +51,7 @@ $BB0_4: .set $tmp7, $BB0_4-$BB0_2 .set f6,$f6 # CHECK: abs.s $f6, $f7 # encoding: [0x46,0x00,0x39,0x85] -# CHECK: lui $1, %hi($tmp7) # encoding: [0x3c'A',0x01'A',0x00,0x00] +# CHECK: lui $1, %hi($tmp7) # encoding: [0x3c,0x01,A,A] # CHECK: # fixup A - offset: 0, value: ($tmp7)@ABS_HI, kind: fixup_Mips_HI16 abs.s f6,FPU_MASK lui $1, %hi($tmp7) diff --git a/test/MC/Mips/mips_gprel16.s b/test/MC/Mips/mips_gprel16.s index 716c75ec88d4..9dd3fa3281c2 100644 --- a/test/MC/Mips/mips_gprel16.s +++ b/test/MC/Mips/mips_gprel16.s @@ -5,6 +5,9 @@ // RUN: llvm-mc -mcpu=mips32r2 -triple=mipsel-pc-linux -filetype=obj -relocation-model=static %s -o - \ // RUN: | llvm-objdump -disassemble -mattr +mips32r2 - \ +// RUN: | FileCheck %s +// RUN: llvm-mc -mcpu=mips32r2 -triple=mips-pc-linux -filetype=obj -relocation-model=static %s -o - \ +// RUN: | llvm-objdump -disassemble -mattr +mips32r2 - \ // RUN: | FileCheck %s .text diff --git a/test/MC/Mips/msa/test_cbranch.s b/test/MC/Mips/msa/test_cbranch.s index 37b887256029..aa6779b1b46e 100644 --- a/test/MC/Mips/msa/test_cbranch.s +++ b/test/MC/Mips/msa/test_cbranch.s @@ -7,22 +7,22 @@ #CHECK: bnz.w $w2, 128 # encoding: [0x47,0xc2,0x00,0x20] #CHECK: nop # encoding: [0x00,0x00,0x00,0x00] #CHECK: bnz.d $w3, -128 # encoding: [0x47,0xe3,0xff,0xe0] -#CHECK: bnz.b $w0, SYMBOL0 # encoding: [0x47'A',0x80'A',0x00,0x00] +#CHECK: bnz.b $w0, SYMBOL0 # encoding: [0x47,0x80,A,A] # fixup A - offset: 0, value: SYMBOL0, kind: fixup_Mips_PC16 #CHECK: nop # encoding: [0x00,0x00,0x00,0x00] -#CHECK: bnz.h $w1, SYMBOL1 # encoding: [0x47'A',0xa1'A',0x00,0x00] +#CHECK: bnz.h $w1, SYMBOL1 # encoding: [0x47,0xa1,A,A] # fixup A - offset: 0, value: SYMBOL1, kind: fixup_Mips_PC16 #CHECK: nop # encoding: [0x00,0x00,0x00,0x00] -#CHECK: bnz.w $w2, SYMBOL2 # encoding: [0x47'A',0xc2'A',0x00,0x00] +#CHECK: bnz.w $w2, SYMBOL2 # encoding: [0x47,0xc2,A,A] # fixup A - offset: 0, value: SYMBOL2, kind: fixup_Mips_PC16 #CHECK: nop # encoding: [0x00,0x00,0x00,0x00] -#CHECK: bnz.d $w3, SYMBOL3 # encoding: [0x47'A',0xe3'A',0x00,0x00] +#CHECK: bnz.d $w3, SYMBOL3 # encoding: [0x47,0xe3,A,A] # fixup A - offset: 0, value: SYMBOL3, kind: fixup_Mips_PC16 #CHECK: nop # encoding: [0x00,0x00,0x00,0x00] #CHECK: bnz.v $w0, 4 # encoding: [0x45,0xe0,0x00,0x01] #CHECK: nop # encoding: [0x00,0x00,0x00,0x00] -#CHECK: bnz.v $w0, SYMBOL0 # encoding: [0x45'A',0xe0'A',0x00,0x00] +#CHECK: bnz.v $w0, SYMBOL0 # encoding: [0x45,0xe0,A,A] # fixup A - offset: 0, value: SYMBOL0, kind: fixup_Mips_PC16 #CHECK: nop # encoding: [0x00,0x00,0x00,0x00] @@ -34,22 +34,22 @@ #CHECK: nop # encoding: [0x00,0x00,0x00,0x00] #CHECK: bz.d $w3, -1024 # encoding: [0x47,0x63,0xff,0x00] #CHECK: nop # encoding: [0x00,0x00,0x00,0x00] -#CHECK: bz.b $w0, SYMBOL0 # encoding: [0x47'A',A,0x00,0x00] +#CHECK: bz.b $w0, SYMBOL0 # encoding: [0x47,0x00,A,A] # fixup A - offset: 0, value: SYMBOL0, kind: fixup_Mips_PC16 #CHECK: nop # encoding: [0x00,0x00,0x00,0x00] -#CHECK: bz.h $w1, SYMBOL1 # encoding: [0x47'A',0x21'A',0x00,0x00] +#CHECK: bz.h $w1, SYMBOL1 # encoding: [0x47,0x21,A,A] # fixup A - offset: 0, value: SYMBOL1, kind: fixup_Mips_PC16 #CHECK: nop # encoding: [0x00,0x00,0x00,0x00] -#CHECK: bz.w $w2, SYMBOL2 # encoding: [0x47'A',0x42'A',0x00,0x00] +#CHECK: bz.w $w2, SYMBOL2 # encoding: [0x47,0x42,A,A] # fixup A - offset: 0, value: SYMBOL2, kind: fixup_Mips_PC16 #CHECK: nop # encoding: [0x00,0x00,0x00,0x00] -#CHECK: bz.d $w3, SYMBOL3 # encoding: [0x47'A',0x63'A',0x00,0x00] +#CHECK: bz.d $w3, SYMBOL3 # encoding: [0x47,0x63,A,A] # fixup A - offset: 0, value: SYMBOL3, kind: fixup_Mips_PC16 #CHECK: nop # encoding: [0x00,0x00,0x00,0x00] #CHECK: bz.v $w0, 4 # encoding: [0x45,0x60,0x00,0x01] #CHECK: nop # encoding: [0x00,0x00,0x00,0x00] -#CHECK: bz.v $w0, SYMBOL0 # encoding: [0x45'A',0x60'A',0x00,0x00] +#CHECK: bz.v $w0, SYMBOL0 # encoding: [0x45,0x60,A,A] # fixup A - offset: 0, value: SYMBOL0, kind: fixup_Mips_PC16 #CHECK: nop # encoding: [0x00,0x00,0x00,0x00] From dc3ce836da72a2898779c3262701a5dde82f8f10 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 23 May 2014 15:07:51 +0000 Subject: [PATCH 111/906] Delete dead code. GV is never used past this point. This was probably a copy and paste error. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209518 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c3006377a6c4..cbaf44e35e12 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8945,10 +8945,6 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // Windows 64bit: gs:0x58 // Windows 32bit: fs:__tls_array - // If GV is an alias then use the aliasee for determining - // thread-localness. - if (const GlobalAlias *GA = dyn_cast(GV)) - GV = GA->getAliasee(); SDLoc dl(GA); SDValue Chain = DAG.getEntryNode(); From a4df7d473f1b638e1bef6417d5430488da4f0bf8 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 23 May 2014 15:18:06 +0000 Subject: [PATCH 112/906] Aliases are always definition, delete dead code. While at it, use a range loop. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209519 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/LTO/LTOModule.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp index d73a7e342ae7..255951a70706 100644 --- a/lib/LTO/LTOModule.cpp +++ b/lib/LTO/LTOModule.cpp @@ -800,14 +800,8 @@ bool LTOModule::parseSymbols(std::string &errMsg) { return true; // add aliases - for (Module::alias_iterator a = _module->alias_begin(), - e = _module->alias_end(); a != e; ++a) { - if (isDeclaration(*a->getAliasee())) - // Is an alias to a declaration. - addPotentialUndefinedSymbol(a, false); - else - addDefinedDataSymbol(a); - } + for (const auto &Alias : _module->aliases()) + addDefinedDataSymbol(&Alias); // make symbols for all undefines for (StringMap::iterator u =_undefines.begin(), From 2bd2c560eeb5aeb29a2f33db267db16e62fb25ec Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Fri, 23 May 2014 15:33:39 +0000 Subject: [PATCH 113/906] Teach the table generated emitPseudoExpansionLowering function to not emit a switch statement containing only a default statement (and no cases). Updated some of the code to use range-based for loops as well. No functional changes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209521 91177308-0d34-0410-b5e6-96231b3b80d8 --- utils/TableGen/PseudoLoweringEmitter.cpp | 126 ++++++++++++----------- 1 file changed, 65 insertions(+), 61 deletions(-) diff --git a/utils/TableGen/PseudoLoweringEmitter.cpp b/utils/TableGen/PseudoLoweringEmitter.cpp index 70b677fd1b9f..3b74ac41e9b3 100644 --- a/utils/TableGen/PseudoLoweringEmitter.cpp +++ b/utils/TableGen/PseudoLoweringEmitter.cpp @@ -200,70 +200,74 @@ void PseudoLoweringEmitter::emitLoweringEmitter(raw_ostream &o) { o << "bool " << Target.getName() + "AsmPrinter" << "::\n" << "emitPseudoExpansionLowering(MCStreamer &OutStreamer,\n" - << " const MachineInstr *MI) {\n" - << " switch (MI->getOpcode()) {\n" - << " default: return false;\n"; - for (unsigned i = 0, e = Expansions.size(); i != e; ++i) { - PseudoExpansion &Expansion = Expansions[i]; - CodeGenInstruction &Source = Expansion.Source; - CodeGenInstruction &Dest = Expansion.Dest; - o << " case " << Source.Namespace << "::" - << Source.TheDef->getName() << ": {\n" - << " MCInst TmpInst;\n" - << " MCOperand MCOp;\n" - << " TmpInst.setOpcode(" << Dest.Namespace << "::" - << Dest.TheDef->getName() << ");\n"; - - // Copy the operands from the source instruction. - // FIXME: Instruction operands with defaults values (predicates and cc_out - // in ARM, for example shouldn't need explicit values in the - // expansion DAG. - unsigned MIOpNo = 0; - for (unsigned OpNo = 0, E = Dest.Operands.size(); OpNo != E; - ++OpNo) { - o << " // Operand: " << Dest.Operands[OpNo].Name << "\n"; - for (unsigned i = 0, e = Dest.Operands[OpNo].MINumOperands; - i != e; ++i) { - switch (Expansion.OperandMap[MIOpNo + i].Kind) { - case OpData::Operand: - o << " lowerOperand(MI->getOperand(" - << Source.Operands[Expansion.OperandMap[MIOpNo].Data - .Operand].MIOperandNo + i - << "), MCOp);\n" - << " TmpInst.addOperand(MCOp);\n"; - break; - case OpData::Imm: - o << " TmpInst.addOperand(MCOperand::CreateImm(" - << Expansion.OperandMap[MIOpNo + i].Data.Imm << "));\n"; - break; - case OpData::Reg: { - Record *Reg = Expansion.OperandMap[MIOpNo + i].Data.Reg; - o << " TmpInst.addOperand(MCOperand::CreateReg("; - // "zero_reg" is special. - if (Reg->getName() == "zero_reg") - o << "0"; - else - o << Reg->getValueAsString("Namespace") << "::" << Reg->getName(); - o << "));\n"; - break; - } + << " const MachineInstr *MI) {\n"; + + if (!Expansions.empty()) { + o << " switch (MI->getOpcode()) {\n" + << " default: return false;\n"; + for (auto &Expansion : Expansions) { + CodeGenInstruction &Source = Expansion.Source; + CodeGenInstruction &Dest = Expansion.Dest; + o << " case " << Source.Namespace << "::" + << Source.TheDef->getName() << ": {\n" + << " MCInst TmpInst;\n" + << " MCOperand MCOp;\n" + << " TmpInst.setOpcode(" << Dest.Namespace << "::" + << Dest.TheDef->getName() << ");\n"; + + // Copy the operands from the source instruction. + // FIXME: Instruction operands with defaults values (predicates and cc_out + // in ARM, for example shouldn't need explicit values in the + // expansion DAG. + unsigned MIOpNo = 0; + for (const auto &DestOperand : Dest.Operands) { + o << " // Operand: " << DestOperand.Name << "\n"; + for (unsigned i = 0, e = DestOperand.MINumOperands; i != e; ++i) { + switch (Expansion.OperandMap[MIOpNo + i].Kind) { + case OpData::Operand: + o << " lowerOperand(MI->getOperand(" + << Source.Operands[Expansion.OperandMap[MIOpNo].Data + .Operand].MIOperandNo + i + << "), MCOp);\n" + << " TmpInst.addOperand(MCOp);\n"; + break; + case OpData::Imm: + o << " TmpInst.addOperand(MCOperand::CreateImm(" + << Expansion.OperandMap[MIOpNo + i].Data.Imm << "));\n"; + break; + case OpData::Reg: { + Record *Reg = Expansion.OperandMap[MIOpNo + i].Data.Reg; + o << " TmpInst.addOperand(MCOperand::CreateReg("; + // "zero_reg" is special. + if (Reg->getName() == "zero_reg") + o << "0"; + else + o << Reg->getValueAsString("Namespace") << "::" + << Reg->getName(); + o << "));\n"; + break; + } + } } + MIOpNo += DestOperand.MINumOperands; } - MIOpNo += Dest.Operands[OpNo].MINumOperands; - } - if (Dest.Operands.isVariadic) { - MIOpNo = Source.Operands.size() + 1; - o << " // variable_ops\n"; - o << " for (unsigned i = " << MIOpNo - << ", e = MI->getNumOperands(); i != e; ++i)\n" - << " if (lowerOperand(MI->getOperand(i), MCOp))\n" - << " TmpInst.addOperand(MCOp);\n"; + if (Dest.Operands.isVariadic) { + MIOpNo = Source.Operands.size() + 1; + o << " // variable_ops\n"; + o << " for (unsigned i = " << MIOpNo + << ", e = MI->getNumOperands(); i != e; ++i)\n" + << " if (lowerOperand(MI->getOperand(i), MCOp))\n" + << " TmpInst.addOperand(MCOp);\n"; + } + o << " EmitToStreamer(OutStreamer, TmpInst);\n" + << " break;\n" + << " }\n"; } - o << " EmitToStreamer(OutStreamer, TmpInst);\n" - << " break;\n" - << " }\n"; - } - o << " }\n return true;\n}\n\n"; + o << " }\n return true;"; + } else + o << " return false;"; + + o << "\n}\n\n"; } void PseudoLoweringEmitter::run(raw_ostream &o) { From c3be377e3698cd834fe1c90acc2963c04f45e060 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 23 May 2014 16:51:13 +0000 Subject: [PATCH 114/906] Convert test to use FileCheck. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209528 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll b/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll index e673d315a435..6052c102af61 100644 --- a/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll +++ b/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -relocation-model=pic | grep TLSGD | count 2 +; RUN: llc < %s -relocation-model=pic | FileCheck %s ; PR2137 ; ModuleID = '1.c' @@ -11,6 +11,8 @@ target triple = "i386-pc-linux-gnu" @__libc_resp = hidden alias %struct.__res_state** @__resp ; <%struct.__res_state**> [#uses=2] define i32 @foo() { +; CHECK-LABEL: foo: +; CHECK: leal __libc_resp@TLSGD entry: %retval = alloca i32 ; [#uses=1] %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] @@ -24,6 +26,8 @@ return: ; preds = %entry } define i32 @bar() { +; CHECK-LABEL: bar: +; CHECK: leal __libc_resp@TLSGD entry: %retval = alloca i32 ; [#uses=1] %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] From bd62a7ad6c6840f893ed3987c8b2536a00be1224 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Fri, 23 May 2014 16:53:14 +0000 Subject: [PATCH 115/906] Add FIXME comment based on code review feedback by Hal Finkel on r209338 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209529 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 06331e44d913..37136b2a2028 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -2371,6 +2371,8 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, if (!useSplitDwarf()) CU.applyStmtList(UnitDie); + // FIXME: Skip using COMDAT groups for type units in the .dwo file once tools + // such as DWP ( http://gcc.gnu.org/wiki/DebugFissionDWP ) can cope with it. NewTU.initSection( useSplitDwarf() ? Asm->getObjFileLowering().getDwarfTypesDWOSection(Signature) From bb75e24528a4bdb3030787640e4d5adb7cdfc476 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Fri, 23 May 2014 18:35:44 +0000 Subject: [PATCH 116/906] [RuntimeDyld] Remove relocation bounds check introduced in r208375 (MachO only). We do all of our address arithmetic in 64-bit, and operations involving logically negative 32-bit offsets (actually represented as unsigned 64 bit ints) often overflow into higher bits. The overflow check could be preserved by casting to uint32 at the callsite for applyRelocationValue, but this would eliminate the value of the check. The right way to handle overflow in relocations is to make relocation processing target specific, and compute the values for RelocationEntry objects in the appropriate types (32-bit for 32-bit targets, 64-bit for 64-bit targets). This is coming as part of the cleanup I'm working on. This fixes another i386 regression test. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209536 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h index 6911f2f07aaa..08573eed5c87 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h @@ -28,16 +28,13 @@ class RuntimeDyldMachO : public RuntimeDyldImpl { private: /// Write the least significant 'Size' bytes in 'Value' out at the address - /// pointed to by Addr. Check for overflow. + /// pointed to by Addr. bool applyRelocationValue(uint8_t *Addr, uint64_t Value, unsigned Size) { for (unsigned i = 0; i < Size; ++i) { *Addr++ = (uint8_t)Value; Value >>= 8; } - if (Value) // Catch overflow - return Error("Relocation out of range."); - return false; } From 8d959dd5638cdc556c736637e78639a3ccf074fc Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Fri, 23 May 2014 18:39:40 +0000 Subject: [PATCH 117/906] Add the extracted constant offset using GEP Fixed a TODO in r207783. Add the extracted constant offset using GEP instead of ugly ptrtoint+add+inttoptr. Using GEP simplifies future optimizations and makes IR easier to understand. Updated all affected tests, and added a new test in split-gep.ll to cover a corner case where emitting uglygep is necessary. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209537 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Scalar/SeparateConstOffsetFromGEP.cpp | 76 ++++++++++++------- .../NVPTX/split-gep-and-gvn.ll | 7 +- .../NVPTX/split-gep.ll | 36 ++++++--- 3 files changed, 80 insertions(+), 39 deletions(-) diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 0ccf29c225b1..ac3e7c4d74a1 100644 --- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -487,7 +487,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { int64_t ConstantOffset = ConstantOffsetExtractor::Extract(GEP->getOperand(I), NewIdx, DL, GEP); if (ConstantOffset != 0) { - assert(NewIdx && "ConstantOffset != 0 implies NewIdx is set"); + assert(NewIdx != nullptr && + "ConstantOffset != 0 implies NewIdx is set"); GEP->setOperand(I, NewIdx); // Clear the inbounds attribute because the new index may be off-bound. // e.g., @@ -522,44 +523,67 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // => add the offset // // %gep2 ; clone of %gep - // %0 = ptrtoint %gep2 - // %1 = add %0, - // %new.gep = inttoptr %1 + // %new.gep = gep %gep2, // %gep ; will be removed // ... %gep ... // // => replace all uses of %gep with %new.gep and remove %gep // // %gep2 ; clone of %gep - // %0 = ptrtoint %gep2 - // %1 = add %0, - // %new.gep = inttoptr %1 + // %new.gep = gep %gep2, // ... %new.gep ... // - // TODO(jingyue): Emit a GEP instead of an "uglygep" - // (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep) to make the IR - // prettier and more alias analysis friendly. One caveat: if the original GEP - // ends with a StructType, we need to split the GEP at the last - // SequentialType. For instance, consider the following IR: + // If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an + // uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep): + // bitcast %gep2 to i8*, add the offset, and bitcast the result back to the + // type of %gep. // - // %struct.S = type { float, double } - // @array = global [1024 x %struct.S] - // %p = getelementptr %array, 0, %i + 5, 1 - // - // To separate the constant 5 from %p, we would need to split %p at the last - // array index so that we have: - // - // %addr = gep %array, 0, %i - // %p = gep %addr, 5, 1 + // %gep2 ; clone of %gep + // %0 = bitcast %gep2 to i8* + // %uglygep = gep %0, + // %new.gep = bitcast %uglygep to + // ... %new.gep ... Instruction *NewGEP = GEP->clone(); NewGEP->insertBefore(GEP); + Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); - Value *Addr = new PtrToIntInst(NewGEP, IntPtrTy, "", GEP); - Addr = BinaryOperator::CreateAdd( - Addr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "", GEP); - Addr = new IntToPtrInst(Addr, GEP->getType(), "", GEP); + uint64_t ElementTypeSizeOfGEP = + DL->getTypeAllocSize(GEP->getType()->getElementType()); + if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) { + // Very likely. As long as %gep is natually aligned, the byte offset we + // extracted should be a multiple of sizeof(*%gep). + // Per ANSI C standard, signed / unsigned = unsigned. Therefore, we + // cast ElementTypeSizeOfGEP to signed. + int64_t Index = + AccumulativeByteOffset / static_cast(ElementTypeSizeOfGEP); + NewGEP = GetElementPtrInst::Create( + NewGEP, ConstantInt::get(IntPtrTy, Index, true), GEP->getName(), GEP); + } else { + // Unlikely but possible. For example, + // #pragma pack(1) + // struct S { + // int a[3]; + // int64 b[8]; + // }; + // #pragma pack() + // + // Suppose the gep before extraction is &s[i + 1].b[j + 3]. After + // extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is + // sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of + // sizeof(int64). + // + // Emit an uglygep in this case. + Type *I8PtrTy = Type::getInt8PtrTy(GEP->getContext(), + GEP->getPointerAddressSpace()); + NewGEP = new BitCastInst(NewGEP, I8PtrTy, "", GEP); + NewGEP = GetElementPtrInst::Create( + NewGEP, ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), + "uglygep", GEP); + if (GEP->getType() != I8PtrTy) + NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP); + } - GEP->replaceAllUsesWith(Addr); + GEP->replaceAllUsesWith(NewGEP); GEP->eraseFromParent(); return true; diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll index 66f4096fa964..850fc4cde8dc 100644 --- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll +++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll @@ -54,7 +54,6 @@ define void @sum_of_array(i32 %x, i32 %y, float* nocapture %output) { ; IR-LABEL: @sum_of_array( ; IR: [[BASE_PTR:%[0-9]+]] = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i32 %x, i32 %y -; IR: [[BASE_INT:%[0-9]+]] = ptrtoint float addrspace(3)* [[BASE_PTR]] to i64 -; IR: %5 = add i64 [[BASE_INT]], 4 -; IR: %10 = add i64 [[BASE_INT]], 128 -; IR: %15 = add i64 [[BASE_INT]], 132 +; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 1 +; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 32 +; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 33 diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll index f4020019c9a1..320af5fd613f 100644 --- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll +++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll @@ -39,7 +39,7 @@ entry: } ; CHECK-LABEL: @sext_zext ; CHECK: getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i32 %i, i32 %j -; CHECK: add i64 %{{[0-9]+}}, 136 +; CHECK: getelementptr float* %{{[0-9]+}}, i64 34 ; We should be able to trace into sext/zext if it can be distributed to both ; operands, e.g., sext (add nsw a, b) == add nsw (sext a), (sext b) @@ -55,8 +55,7 @@ define float* @ext_add_no_overflow(i64 %a, i32 %b, i64 %c, i32 %d) { } ; CHECK-LABEL: @ext_add_no_overflow ; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[0-9]+}}, i64 %{{[0-9]+}} -; CHECK: [[BASE_INT:%[0-9]+]] = ptrtoint float* [[BASE_PTR]] to i64 -; CHECK: add i64 [[BASE_INT]], 132 +; CHECK: getelementptr float* [[BASE_PTR]], i64 33 ; We should treat "or" with no common bits (%k) as "add", and leave "or" with ; potentially common bits (%l) as is. @@ -69,8 +68,8 @@ entry: ret float* %p } ; CHECK-LABEL: @or -; CHECK: getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %j, i64 %l -; CHECK: add i64 %{{[0-9]+}}, 384 +; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %j, i64 %l +; CHECK: getelementptr float* [[BASE_PTR]], i64 96 ; The subexpression (b + 5) is used in both "i = a + (b + 5)" and "*out = b + ; 5". When extracting the constant offset 5, make sure "*out = b + 5" isn't @@ -84,8 +83,8 @@ entry: ret float* %p } ; CHECK-LABEL: @expr -; CHECK: getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %0, i64 0 -; CHECK: add i64 %{{[0-9]+}}, 640 +; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %0, i64 0 +; CHECK: getelementptr float* [[BASE_PTR]], i64 160 ; CHECK: store i64 %b5, i64* %out ; Verifies we handle "sub" correctly. @@ -97,5 +96,24 @@ define float* @sub(i64 %i, i64 %j) { } ; CHECK-LABEL: @sub ; CHECK: %[[j2:[0-9]+]] = sub i64 0, %j -; CHECK: getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %[[j2]] -; CHECK: add i64 %{{[0-9]+}}, -620 +; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %[[j2]] +; CHECK: getelementptr float* [[BASE_PTR]], i64 -155 + +%struct.Packed = type <{ [3 x i32], [8 x i64] }> ; <> means packed + +; Verifies we can emit correct uglygep if the address is not natually aligned. +define i64* @packed_struct(i32 %i, i32 %j) { +entry: + %s = alloca [1024 x %struct.Packed], align 16 + %add = add nsw i32 %j, 3 + %idxprom = sext i32 %add to i64 + %add1 = add nsw i32 %i, 1 + %idxprom2 = sext i32 %add1 to i64 + %arrayidx3 = getelementptr inbounds [1024 x %struct.Packed]* %s, i64 0, i64 %idxprom2, i32 1, i64 %idxprom + ret i64* %arrayidx3 +} +; CHECK-LABEL: @packed_struct +; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [1024 x %struct.Packed]* %s, i64 0, i32 %i, i32 1, i32 %j +; CHECK: [[CASTED_PTR:%[0-9]+]] = bitcast i64* [[BASE_PTR]] to i8* +; CHECK: %uglygep = getelementptr i8* [[CASTED_PTR]], i64 100 +; CHECK: bitcast i8* %uglygep to i64* From a739ea33a0edad54ddedf928142dbd3dc1948caa Mon Sep 17 00:00:00 2001 From: Nico Rieck Date: Fri, 23 May 2014 19:06:24 +0000 Subject: [PATCH 118/906] Fix broken FileCheck prefixes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209538 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/ARM64/cse.ll | 2 +- test/CodeGen/ARM64/csel.ll | 6 +++--- test/CodeGen/ARM64/vmul.ll | 16 ++++++++-------- test/CodeGen/MSP430/fp.ll | 2 +- .../X86/insert-element-build-vector.ll | 2 +- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/test/CodeGen/ARM64/cse.ll b/test/CodeGen/ARM64/cse.ll index d98bfd605390..bb14c8955049 100644 --- a/test/CodeGen/ARM64/cse.ll +++ b/test/CodeGen/ARM64/cse.ll @@ -13,7 +13,7 @@ entry: ; CHECK: b.ge ; CHECK: sub ; CHECK: sub -; CHECK_NOT: sub +; CHECK-NOT: sub ; CHECK: ret %0 = load i32* %offset, align 4 %cmp = icmp slt i32 %0, %size diff --git a/test/CodeGen/ARM64/csel.ll b/test/CodeGen/ARM64/csel.ll index 98eba30f119d..9b42858558b6 100644 --- a/test/CodeGen/ARM64/csel.ll +++ b/test/CodeGen/ARM64/csel.ll @@ -79,9 +79,9 @@ define i32 @foo7(i32 %a, i32 %b) nounwind { entry: ; CHECK-LABEL: foo7: ; CHECK: sub -; CHECK-next: adds -; CHECK-next: csneg -; CHECK-next: b +; CHECK-NEXT: adds +; CHECK-NEXT: csneg +; CHECK-NEXT: b %sub = sub nsw i32 %a, %b %cmp = icmp sgt i32 %sub, -1 %sub3 = sub nsw i32 0, %sub diff --git a/test/CodeGen/ARM64/vmul.ll b/test/CodeGen/ARM64/vmul.ll index b6bd16ac0b4c..9d08b9dc3479 100644 --- a/test/CodeGen/ARM64/vmul.ll +++ b/test/CodeGen/ARM64/vmul.ll @@ -1201,35 +1201,35 @@ define <2 x i64> @umlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nou ; Scalar FMULX define float @fmulxs(float %a, float %b) nounwind { ; CHECK-LABEL: fmulxs: -; CHECKNEXT: fmulx s0, s0, s1 +; CHECK-NEXT: fmulx s0, s0, s1 %fmulx.i = tail call float @llvm.arm64.neon.fmulx.f32(float %a, float %b) nounwind -; CHECKNEXT: ret +; CHECK-NEXT: ret ret float %fmulx.i } define double @fmulxd(double %a, double %b) nounwind { ; CHECK-LABEL: fmulxd: -; CHECKNEXT: fmulx d0, d0, d1 +; CHECK-NEXT: fmulx d0, d0, d1 %fmulx.i = tail call double @llvm.arm64.neon.fmulx.f64(double %a, double %b) nounwind -; CHECKNEXT: ret +; CHECK-NEXT: ret ret double %fmulx.i } define float @fmulxs_lane(float %a, <4 x float> %vec) nounwind { ; CHECK-LABEL: fmulxs_lane: -; CHECKNEXT: fmulx.s s0, s0, v1[3] +; CHECK-NEXT: fmulx.s s0, s0, v1[3] %b = extractelement <4 x float> %vec, i32 3 %fmulx.i = tail call float @llvm.arm64.neon.fmulx.f32(float %a, float %b) nounwind -; CHECKNEXT: ret +; CHECK-NEXT: ret ret float %fmulx.i } define double @fmulxd_lane(double %a, <2 x double> %vec) nounwind { ; CHECK-LABEL: fmulxd_lane: -; CHECKNEXT: fmulx d0, d0, v1[1] +; CHECK-NEXT: fmulx d0, d0, v1[1] %b = extractelement <2 x double> %vec, i32 1 %fmulx.i = tail call double @llvm.arm64.neon.fmulx.f64(double %a, double %b) nounwind -; CHECKNEXT: ret +; CHECK-NEXT: ret ret double %fmulx.i } diff --git a/test/CodeGen/MSP430/fp.ll b/test/CodeGen/MSP430/fp.ll index b6ba22e47cc5..2559e23ae1f5 100644 --- a/test/CodeGen/MSP430/fp.ll +++ b/test/CodeGen/MSP430/fp.ll @@ -21,7 +21,7 @@ entry: ; does not happen anymore. Note that the only reason an ISR is used here is that ; the register allocator selects r4 first instead of fifth in a normal function. define msp430_intrcc void @fpb_alloced() #0 { -; CHECK_LABEL: fpb_alloced: +; CHECK-LABEL: fpb_alloced: ; CHECK-NOT: mov.b #0, r4 ; CHECK: nop call void asm sideeffect "nop", "r"(i8 0) diff --git a/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll index db33927cb793..9eda29f101ac 100644 --- a/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -223,7 +223,7 @@ define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) { ; instructions that are erased. define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) { ; ZEROTHRESH-LABEL: @take_credit( -; ZEROTHRESH-CHECK: %1 = fadd <4 x float> %a, %b +; ZEROTHRESH: %1 = fadd <4 x float> %a, %b %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 %c0 = fadd float %a0, %b0 From b16514017b8f1c6b9c38a9712354d4dc2fb52071 Mon Sep 17 00:00:00 2001 From: Nico Rieck Date: Fri, 23 May 2014 19:06:44 +0000 Subject: [PATCH 119/906] Remove unused CHECK lines git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209539 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/gcc_except_table.ll | 4 ---- 1 file changed, 4 deletions(-) diff --git a/test/CodeGen/X86/gcc_except_table.ll b/test/CodeGen/X86/gcc_except_table.ll index 7a29b07423c6..8c328ec58f93 100644 --- a/test/CodeGen/X86/gcc_except_table.ll +++ b/test/CodeGen/X86/gcc_except_table.ll @@ -50,7 +50,3 @@ eh.resume: declare void @_Z1fv() optsize declare i32 @__gxx_personality_v0(...) - -; CHECK: Leh_func_end0: -; CHECK: GCC_except_table0 -; CHECK: = Leh_func_end0- From ef72e73da9518b4de85120d40907297fd514aca3 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 23 May 2014 19:16:56 +0000 Subject: [PATCH 120/906] Use alias linkage and visibility to decide tls access mode. This matches both what we do for the non-thread case and what gcc does. With this patch clang would match gcc's behaviour in static __thread int a = 42; extern __thread int b __attribute__((alias("a"))); int *f(void) { return &a; } int *g(void) { return &b; } if not for pr19843. Manually writing the IL does produce the same access modes. It is also a step in the direction of fixing pr19844. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209543 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/TargetMachine.cpp | 23 ++++++++----------- test/CodeGen/Mips/tls-alias.ll | 2 +- .../X86/2008-03-12-ThreadLocalAlias.ll | 4 ++-- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index dbd433d5240c..8365f64dc54a 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -106,19 +106,13 @@ static TLSModel::Model getSelectedTLSModel(const GlobalVariable *Var) { } TLSModel::Model TargetMachine::getTLSModel(const GlobalValue *GV) const { - // If GV is an alias then use the aliasee for determining - // thread-localness. - if (const GlobalAlias *GA = dyn_cast(GV)) - GV = GA->getAliasee(); - const GlobalVariable *Var = cast(GV); - - bool isLocal = Var->hasLocalLinkage(); - bool isDeclaration = Var->isDeclaration(); + bool isLocal = GV->hasLocalLinkage(); + bool isDeclaration = GV->isDeclaration(); bool isPIC = getRelocationModel() == Reloc::PIC_; bool isPIE = Options.PositionIndependentExecutable; // FIXME: what should we do for protected and internal visibility? // For variables, is internal different from hidden? - bool isHidden = Var->hasHiddenVisibility(); + bool isHidden = GV->hasHiddenVisibility(); TLSModel::Model Model; if (isPIC && !isPIE) { @@ -133,10 +127,13 @@ TLSModel::Model TargetMachine::getTLSModel(const GlobalValue *GV) const { Model = TLSModel::InitialExec; } - // If the user specified a more specific model, use that. - TLSModel::Model SelectedModel = getSelectedTLSModel(Var); - if (SelectedModel > Model) - return SelectedModel; + const GlobalVariable *Var = dyn_cast(GV); + if (Var) { + // If the user specified a more specific model, use that. + TLSModel::Model SelectedModel = getSelectedTLSModel(Var); + if (SelectedModel > Model) + return SelectedModel; + } return Model; } diff --git a/test/CodeGen/Mips/tls-alias.ll b/test/CodeGen/Mips/tls-alias.ll index 3c810542cca3..80fbe87a8d61 100644 --- a/test/CodeGen/Mips/tls-alias.ll +++ b/test/CodeGen/Mips/tls-alias.ll @@ -5,6 +5,6 @@ define i32* @zed() { ; CHECK-DAG: __tls_get_addr -; CHECK-DAG: %tlsgd(bar) +; CHECK-DAG: %tlsldm(bar) ret i32* @bar } diff --git a/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll b/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll index 6052c102af61..e64375a2b361 100644 --- a/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll +++ b/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll @@ -12,7 +12,7 @@ target triple = "i386-pc-linux-gnu" define i32 @foo() { ; CHECK-LABEL: foo: -; CHECK: leal __libc_resp@TLSGD +; CHECK: leal __libc_resp@TLSLD entry: %retval = alloca i32 ; [#uses=1] %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] @@ -27,7 +27,7 @@ return: ; preds = %entry define i32 @bar() { ; CHECK-LABEL: bar: -; CHECK: leal __libc_resp@TLSGD +; CHECK: leal __libc_resp@TLSLD entry: %retval = alloca i32 ; [#uses=1] %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] From 8b8e384b3cfe35093829530e1772cd1daac62b74 Mon Sep 17 00:00:00 2001 From: Nico Rieck Date: Fri, 23 May 2014 19:33:49 +0000 Subject: [PATCH 121/906] Revert part of "Fix broken FileCheck prefixes" This reverts part of commit r209538. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209544 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/ARM64/csel.ll | 6 +++--- test/CodeGen/ARM64/vmul.ll | 16 ++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/test/CodeGen/ARM64/csel.ll b/test/CodeGen/ARM64/csel.ll index 9b42858558b6..98eba30f119d 100644 --- a/test/CodeGen/ARM64/csel.ll +++ b/test/CodeGen/ARM64/csel.ll @@ -79,9 +79,9 @@ define i32 @foo7(i32 %a, i32 %b) nounwind { entry: ; CHECK-LABEL: foo7: ; CHECK: sub -; CHECK-NEXT: adds -; CHECK-NEXT: csneg -; CHECK-NEXT: b +; CHECK-next: adds +; CHECK-next: csneg +; CHECK-next: b %sub = sub nsw i32 %a, %b %cmp = icmp sgt i32 %sub, -1 %sub3 = sub nsw i32 0, %sub diff --git a/test/CodeGen/ARM64/vmul.ll b/test/CodeGen/ARM64/vmul.ll index 9d08b9dc3479..b6bd16ac0b4c 100644 --- a/test/CodeGen/ARM64/vmul.ll +++ b/test/CodeGen/ARM64/vmul.ll @@ -1201,35 +1201,35 @@ define <2 x i64> @umlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nou ; Scalar FMULX define float @fmulxs(float %a, float %b) nounwind { ; CHECK-LABEL: fmulxs: -; CHECK-NEXT: fmulx s0, s0, s1 +; CHECKNEXT: fmulx s0, s0, s1 %fmulx.i = tail call float @llvm.arm64.neon.fmulx.f32(float %a, float %b) nounwind -; CHECK-NEXT: ret +; CHECKNEXT: ret ret float %fmulx.i } define double @fmulxd(double %a, double %b) nounwind { ; CHECK-LABEL: fmulxd: -; CHECK-NEXT: fmulx d0, d0, d1 +; CHECKNEXT: fmulx d0, d0, d1 %fmulx.i = tail call double @llvm.arm64.neon.fmulx.f64(double %a, double %b) nounwind -; CHECK-NEXT: ret +; CHECKNEXT: ret ret double %fmulx.i } define float @fmulxs_lane(float %a, <4 x float> %vec) nounwind { ; CHECK-LABEL: fmulxs_lane: -; CHECK-NEXT: fmulx.s s0, s0, v1[3] +; CHECKNEXT: fmulx.s s0, s0, v1[3] %b = extractelement <4 x float> %vec, i32 3 %fmulx.i = tail call float @llvm.arm64.neon.fmulx.f32(float %a, float %b) nounwind -; CHECK-NEXT: ret +; CHECKNEXT: ret ret float %fmulx.i } define double @fmulxd_lane(double %a, <2 x double> %vec) nounwind { ; CHECK-LABEL: fmulxd_lane: -; CHECK-NEXT: fmulx d0, d0, v1[1] +; CHECKNEXT: fmulx d0, d0, v1[1] %b = extractelement <2 x double> %vec, i32 1 %fmulx.i = tail call double @llvm.arm64.neon.fmulx.f64(double %a, double %b) nounwind -; CHECK-NEXT: ret +; CHECKNEXT: ret ret double %fmulx.i } From ab0d042a74f2e454a516be7a956b091f0ae7e1af Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 23 May 2014 19:47:13 +0000 Subject: [PATCH 122/906] Fix and improve SCEV ComputeBackedgeTankCount. This is a follow-up to r209358: PR19799: Indvars miscompile due to an incorrect max backedge taken count from SCEV. That fix was incomplete as pointed out by Arnold and Michael Z. The code was also too confusing. It needed a careful rewrite with more unit tests. This version will also happen to optimize more cases. PR19799: Indvars miscompile... git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209545 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 65 +++++++++++++------ .../ScalarEvolution/max-trip-count.ll | 56 ++++++++++++++++ 2 files changed, 102 insertions(+), 19 deletions(-) diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index d27afb09cf3a..0c864d840f11 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -4409,36 +4409,63 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); - // Examine all exits and pick the most conservative values. - const SCEV *MaxBECount = getCouldNotCompute(); + SmallVector, 4> ExitCounts; bool CouldComputeBECount = true; BasicBlock *Latch = L->getLoopLatch(); // may be NULL. - bool LatchMustExit = false; - SmallVector, 4> ExitCounts; + const SCEV *MustExitMaxBECount = nullptr; + const SCEV *MayExitMaxBECount = nullptr; + + // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts + // and compute maxBECount. for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { - ExitLimit EL = ComputeExitLimit(L, ExitingBlocks[i]); + BasicBlock *ExitBB = ExitingBlocks[i]; + ExitLimit EL = ComputeExitLimit(L, ExitBB); + + // 1. For each exit that can be computed, add an entry to ExitCounts. + // CouldComputeBECount is true only if all exits can be computed. if (EL.Exact == getCouldNotCompute()) // We couldn't compute an exact value for this exit, so // we won't be able to compute an exact value for the loop. CouldComputeBECount = false; else - ExitCounts.push_back(std::make_pair(ExitingBlocks[i], EL.Exact)); - - if (MaxBECount == getCouldNotCompute()) - MaxBECount = EL.Max; - else if (EL.Max != getCouldNotCompute()) { - // We cannot take the "min" MaxBECount, because non-unit stride loops may - // skip some loop tests. Taking the max over the exits is sufficiently - // conservative. TODO: We could do better taking into consideration - // non-latch exits that dominate the latch. - if (EL.MustExit && ExitingBlocks[i] == Latch) { - MaxBECount = EL.Max; - LatchMustExit = true; + ExitCounts.push_back(std::make_pair(ExitBB, EL.Exact)); + + // 2. Derive the loop's MaxBECount from each exit's max number of + // non-exiting iterations. Partition the loop exits into two kinds: + // LoopMustExits and LoopMayExits. + // + // A LoopMustExit meets two requirements: + // + // (a) Its ExitLimit.MustExit flag must be set which indicates that the exit + // test condition cannot be skipped (the tested variable has unit stride or + // the test is less-than or greater-than, rather than a strict inequality). + // + // (b) It must dominate the loop latch, hence must be tested on every loop + // iteration. + // + // If any computable LoopMustExit is found, then MaxBECount is the minimum + // EL.Max of computable LoopMustExits. Otherwise, MaxBECount is + // conservatively the maximum EL.Max, where CouldNotCompute is considered + // greater than any computable EL.Max. + if (EL.MustExit && EL.Max != getCouldNotCompute() && Latch && + DT->dominates(ExitBB, Latch)) { + if (!MustExitMaxBECount) + MustExitMaxBECount = EL.Max; + else { + MustExitMaxBECount = + getUMinFromMismatchedTypes(MustExitMaxBECount, EL.Max); + } + } else if (MayExitMaxBECount != getCouldNotCompute()) { + if (!MayExitMaxBECount || EL.Max == getCouldNotCompute()) + MayExitMaxBECount = EL.Max; + else { + MayExitMaxBECount = + getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.Max); } - else if (!LatchMustExit) - MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max); } } + const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount : + (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute()); return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount); } diff --git a/test/Analysis/ScalarEvolution/max-trip-count.ll b/test/Analysis/ScalarEvolution/max-trip-count.ll index 43a54b4f3033..d67d2fab2e35 100644 --- a/test/Analysis/ScalarEvolution/max-trip-count.ll +++ b/test/Analysis/ScalarEvolution/max-trip-count.ll @@ -124,3 +124,59 @@ for.cond.i: ; preds = %for.body.i bar.exit: ; preds = %for.cond.i, %for.body.i ret i32 0 } + +; Here we have a must-exit loop latch that is not computabe and a +; may-exit early exit that can only have one non-exiting iteration +; before the check is forever skipped. +; +; CHECK-LABEL: @cannot_compute_mustexit +; CHECK: Loop %for.body.i: Unpredictable backedge-taken count. +; CHECK: Loop %for.body.i: Unpredictable max backedge-taken count. +@b = common global i32 0, align 4 + +define i32 @cannot_compute_mustexit() { +entry: + store i32 -1, i32* @a, align 4 + br label %for.body.i + +for.body.i: ; preds = %for.cond.i, %entry + %storemerge1.i = phi i32 [ -1, %entry ], [ %add.i.i, %for.cond.i ] + %tobool.i = icmp eq i32 %storemerge1.i, 0 + %add.i.i = add nsw i32 %storemerge1.i, 2 + br i1 %tobool.i, label %bar.exit, label %for.cond.i + +for.cond.i: ; preds = %for.body.i + store i32 %add.i.i, i32* @a, align 4 + %ld = load volatile i32* @b + %cmp.i = icmp ne i32 %ld, 0 + br i1 %cmp.i, label %for.body.i, label %bar.exit + +bar.exit: ; preds = %for.cond.i, %for.body.i + ret i32 0 +} + +; This loop has two must-exits, both of with dominate the latch. The +; MaxBECount should be the minimum of them. +; +; CHECK-LABEL: @two_mustexit +; CHECK: Loop %for.body.i: Unpredictable backedge-taken count. +; CHECK: Loop %for.body.i: max backedge-taken count is 1 +define i32 @two_mustexit() { +entry: + store i32 -1, i32* @a, align 4 + br label %for.body.i + +for.body.i: ; preds = %for.cond.i, %entry + %storemerge1.i = phi i32 [ -1, %entry ], [ %add.i.i, %for.cond.i ] + %tobool.i = icmp sgt i32 %storemerge1.i, 0 + %add.i.i = add nsw i32 %storemerge1.i, 2 + br i1 %tobool.i, label %bar.exit, label %for.cond.i + +for.cond.i: ; preds = %for.body.i + store i32 %add.i.i, i32* @a, align 4 + %cmp.i = icmp slt i32 %storemerge1.i, 3 + br i1 %cmp.i, label %for.body.i, label %bar.exit + +bar.exit: ; preds = %for.cond.i, %for.body.i + ret i32 0 +} From ae13f23de63ce045bd11c34d667c76d7ce4636f2 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Fri, 23 May 2014 20:25:15 +0000 Subject: [PATCH 123/906] DebugInfo: Put concrete definitions referencing abstract definitions in the same scope as the abstract definition. This seems like a simple cleanup/improved consistency, but also helps lay the foundation to fix the bug mentioned in the test case: concrete definitions preceeding any inlined usage aren't properly split into concrete + abstract (because they're not known to need it until it's too late). Once we start deferring this choice until later, we won't have the choice to put concrete definitions for inlined subroutines in a different scope from concrete definitions for non-inlined subroutines (since we won't know at time-of-construction which one it'll be). This change brings those two cases into alignment ahead of that future chaneg/fix. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209547 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 4 +- .../namespace_inline_function_definition.ll | 92 +++++++++++++++++++ 2 files changed, 95 insertions(+), 1 deletion(-) create mode 100644 test/DebugInfo/namespace_inline_function_definition.ll diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 37136b2a2028..5c802f7a2ceb 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -324,7 +324,9 @@ DIE &DwarfDebug::updateSubprogramScopeDIE(DwarfCompileUnit &SPCU, if (DIE *AbsSPDIE = AbstractSPDies.lookup(SP)) { assert(SPDie == AbsSPDIE); // Pick up abstract subprogram DIE. - SPDie = &SPCU.createAndAddDIE(dwarf::DW_TAG_subprogram, SPCU.getUnitDie()); + SPDie = &SPCU.createAndAddDIE( + dwarf::DW_TAG_subprogram, + *SPCU.getOrCreateContextDIE(resolve(SP.getContext()))); SPCU.addDIEEntry(*SPDie, dwarf::DW_AT_abstract_origin, *AbsSPDIE); } diff --git a/test/DebugInfo/namespace_inline_function_definition.ll b/test/DebugInfo/namespace_inline_function_definition.ll new file mode 100644 index 000000000000..65fa4a442dc6 --- /dev/null +++ b/test/DebugInfo/namespace_inline_function_definition.ll @@ -0,0 +1,92 @@ +; REQUIRES: object-emission + +; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s + +; Generate from clang with the following source. Note that the definition of +; the inline function follows its use to workaround another bug that should be +; fixed soon. +; namespace ns { +; int func(int i); +; } +; extern int x; +; int main() { return ns::func(x); } +; int __attribute__((always_inline)) ns::func(int i) { return i * 2; } + +; CHECK: DW_TAG_namespace +; CHECK-NEXT: DW_AT_name {{.*}} "ns" +; CHECK-NOT: DW_TAG +; CHECK: [[ABS_DEF:0x.*]]: DW_TAG_subprogram +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_ZN2ns4funcEi" +; CHECK-NOT: DW_TAG +; CHECK: [[ABS_PRM:0x.*]]: DW_TAG_formal_parameter +; CHECK: NULL +; CHECK-NOT: NULL +; CHECK: DW_TAG_subprogram +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_abstract_origin {{.*}} {[[ABS_DEF]]} +; CHECK-NOT: DW_TAG +; CHECK: DW_TAG_formal_parameter +; CHECK: DW_AT_abstract_origin {{.*}} {[[ABS_PRM]]} +; CHECK: NULL +; CHECK: NULL +; CHECK: NULL + +@x = external global i32 + +; Function Attrs: uwtable +define i32 @main() #0 { +entry: + %i.addr.i = alloca i32, align 4 + %retval = alloca i32, align 4 + store i32 0, i32* %retval + %0 = load i32* @x, align 4, !dbg !16 + store i32 %0, i32* %i.addr.i, align 4 + call void @llvm.dbg.declare(metadata !{i32* %i.addr.i}, metadata !17), !dbg !18 + %1 = load i32* %i.addr.i, align 4, !dbg !18 + %mul.i = mul nsw i32 %1, 2, !dbg !18 + ret i32 %mul.i, !dbg !16 +} + +; Function Attrs: alwaysinline nounwind uwtable +define i32 @_ZN2ns4funcEi(i32 %i) #1 { +entry: + %i.addr = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + call void @llvm.dbg.declare(metadata !{i32* %i.addr}, metadata !17), !dbg !19 + %0 = load i32* %i.addr, align 4, !dbg !19 + %mul = mul nsw i32 %0, 2, !dbg !19 + ret i32 %mul, !dbg !19 +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata) #2 + +attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!13, !14} +!llvm.ident = !{!15} + +!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/namespace_inline_function_definition.cpp] [DW_LANG_C_plus_plus] +!1 = metadata !{metadata !"namespace_inline_function_definition.cpp", metadata !"/tmp/dbginfo"} +!2 = metadata !{} +!3 = metadata !{metadata !4, metadata !9} +!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 5, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [main] +!5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [/tmp/dbginfo/namespace_inline_function_definition.cpp] +!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = metadata !{metadata !8} +!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!9 = metadata !{i32 786478, metadata !1, metadata !10, metadata !"func", metadata !"func", metadata !"_ZN2ns4funcEi", i32 6, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_ZN2ns4funcEi, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [func] +!10 = metadata !{i32 786489, metadata !1, null, metadata !"ns", i32 1} ; [ DW_TAG_namespace ] [ns] [line 1] +!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!12 = metadata !{metadata !8, metadata !8} +!13 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} +!14 = metadata !{i32 2, metadata !"Debug Info Version", i32 1} +!15 = metadata !{metadata !"clang version 3.5.0 "} +!16 = metadata !{i32 5, i32 0, metadata !4, null} +!17 = metadata !{i32 786689, metadata !9, metadata !"i", metadata !5, i32 16777222, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [i] [line 6] +!18 = metadata !{i32 6, i32 0, metadata !9, metadata !16} +!19 = metadata !{i32 6, i32 0, metadata !9, null} From 0d4e85d19d43434b626dc073f5f4911a26919b68 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 23 May 2014 20:35:47 +0000 Subject: [PATCH 124/906] Remove a confusing use of a static method. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209548 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 638d8701fad1..3c892b162a4e 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -280,7 +280,7 @@ getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV, UnderlyingGV = GA->getAliasee(); if (const GlobalVariable *GVar = dyn_cast(UnderlyingGV)) { if ( ( GVar->isConstant() && - UnderlyingGV->isLocalLinkage(GV->getLinkage()) ) + GV->hasLocalLinkage() ) || ( GVar->hasSection() && StringRef(GVar->getSection()).startswith(".cp.") ) ) return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA); From ef3a12f9848debb8cd9d7e5f28e8ee7069943201 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 23 May 2014 20:39:23 +0000 Subject: [PATCH 125/906] clang-format function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209550 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreISelLowering.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 3c892b162a4e..9d785864952b 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -268,10 +268,9 @@ LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const Op.getOperand(1)); } -SDValue XCoreTargetLowering:: -getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV, - SelectionDAG &DAG) const -{ +SDValue XCoreTargetLowering::getGlobalAddressWrapper(SDValue GA, + const GlobalValue *GV, + SelectionDAG &DAG) const { // FIXME there is no actual debug info here SDLoc dl(GA); const GlobalValue *UnderlyingGV = GV; @@ -279,10 +278,9 @@ getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV, if (const GlobalAlias *GA = dyn_cast(GV)) UnderlyingGV = GA->getAliasee(); if (const GlobalVariable *GVar = dyn_cast(UnderlyingGV)) { - if ( ( GVar->isConstant() && - GV->hasLocalLinkage() ) - || ( GVar->hasSection() && - StringRef(GVar->getSection()).startswith(".cp.") ) ) + if ((GVar->isConstant() && GV->hasLocalLinkage()) || + (GVar->hasSection() && + StringRef(GVar->getSection()).startswith(".cp."))) return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA); return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA); } From 7ce3a725d3dd69c437a5a2c079973bd9af56fce1 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 23 May 2014 20:46:21 +0000 Subject: [PATCH 126/906] Test case comments. Fix sloppiness. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209551 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Analysis/ScalarEvolution/max-trip-count.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/Analysis/ScalarEvolution/max-trip-count.ll b/test/Analysis/ScalarEvolution/max-trip-count.ll index d67d2fab2e35..75f7418f14a7 100644 --- a/test/Analysis/ScalarEvolution/max-trip-count.ll +++ b/test/Analysis/ScalarEvolution/max-trip-count.ll @@ -125,7 +125,7 @@ bar.exit: ; preds = %for.cond.i, %for.bo ret i32 0 } -; Here we have a must-exit loop latch that is not computabe and a +; Here we have a must-exit loop latch that is not computable and a ; may-exit early exit that can only have one non-exiting iteration ; before the check is forever skipped. ; @@ -155,7 +155,7 @@ bar.exit: ; preds = %for.cond.i, %for.bo ret i32 0 } -; This loop has two must-exits, both of with dominate the latch. The +; This loop has two must-exits, both of which dominate the latch. The ; MaxBECount should be the minimum of them. ; ; CHECK-LABEL: @two_mustexit From 3b39845adfcfff861198db9f8ef0267bcee44f0a Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Fri, 23 May 2014 21:07:01 +0000 Subject: [PATCH 127/906] DebugInfo: Generalize a test case to not depend on abbreviation numbering. It's an unnecessary detail for this test and just gets in the way when making unrelated changes to the output in this test. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209553 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/DebugInfo/X86/block-capture.ll | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test/DebugInfo/X86/block-capture.ll b/test/DebugInfo/X86/block-capture.ll index 0ff91de4cce1..31b4fa937d84 100644 --- a/test/DebugInfo/X86/block-capture.ll +++ b/test/DebugInfo/X86/block-capture.ll @@ -4,15 +4,15 @@ ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s -check-prefix=DWARF3 ; Checks that we emit debug info for the block variable declare. -; CHECK: DW_TAG_subprogram [3] -; CHECK: DW_TAG_variable [5] -; CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[{{.*}}] = "block") -; CHECK: DW_AT_location [DW_FORM_sec_offset] ({{.*}}) +; CHECK: DW_TAG_subprogram +; CHECK: DW_TAG_variable +; CHECK: DW_AT_name {{.*}} "block" +; CHECK: DW_AT_location [DW_FORM_sec_offset] -; DWARF3: DW_TAG_subprogram [3] -; DWARF3: DW_TAG_variable [5] -; DWARF3: DW_AT_name [DW_FORM_strp] ( .debug_str[{{.*}}] = "block") -; DWARF3: DW_AT_location [DW_FORM_data4] ({{.*}}) +; DWARF3: DW_TAG_subprogram +; DWARF3: DW_TAG_variable +; DWARF3: DW_AT_name {{.*}} "block" +; DWARF3: DW_AT_location [DW_FORM_data4] %struct.__block_descriptor = type { i64, i64 } %struct.__block_literal_generic = type { i8*, i32, i32, i8*, %struct.__block_descriptor* } From 1288a3b8e63b7704f7b70ec64fdc3e31d7f09b91 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Fri, 23 May 2014 21:11:46 +0000 Subject: [PATCH 128/906] DebugInfo: Generalize some tests to handle variations in attribute ordering. In an effort to fix inlined debug info in situations where the out of line definition of a function preceeds any inlined usage, the order in which some attributes are added to subprogram DIEs may change. (in essence, definition-necessary attributes like DW_AT_low_pc/high_pc will be added immediately, but the names, types, and other features will be delayed to module end where they may either be added to the subprogram DIE or instead reference an abstract definition for those values) These tests can be generalized to be resilient to this change. 5 or so tests actually have to be incompatibly changed to cope with this reordering and will go along with the change that affects the order. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209554 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/DebugInfo/AArch64/variable-loc.ll | 19 ++++------ test/DebugInfo/SystemZ/variable-loc.ll | 23 +++++------- test/DebugInfo/X86/DW_AT_linkage_name.ll | 6 ++-- test/DebugInfo/X86/arguments.ll | 3 +- .../X86/dbg-value-inlined-parameter.ll | 2 +- test/DebugInfo/X86/debug-loc-offset.ll | 4 ++- test/DebugInfo/X86/gnu-public-names.ll | 36 ++++++++++++------- test/DebugInfo/X86/inline-seldag-test.ll | 3 +- test/DebugInfo/X86/pr11300.ll | 3 +- test/DebugInfo/namespace.ll | 18 ++++++---- test/Linker/type-unique-odr-a.ll | 12 ++++--- test/MC/ARM/coff-debugging-secrel.ll | 2 -- 12 files changed, 73 insertions(+), 58 deletions(-) diff --git a/test/DebugInfo/AArch64/variable-loc.ll b/test/DebugInfo/AArch64/variable-loc.ll index f28ee76ebfa3..9f432d9f2c02 100644 --- a/test/DebugInfo/AArch64/variable-loc.ll +++ b/test/DebugInfo/AArch64/variable-loc.ll @@ -1,4 +1,6 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-fp-elim -filetype=obj < %s \ +; RUN: | llvm-dwarfdump -debug-dump=info - | FileCheck --check-prefix=DEBUG %s ; This is a regression test making sure the location of variables is correct in ; debugging information, even if they're addressed via the frame pointer. @@ -23,19 +25,10 @@ ; CHECK: add x29, sp, #416 ; CHECK: add {{x[0-9]+}}, sp, #4 -; CHECK: .Linfo_string7: -; CHECK-NEXT: main_arr - -; Now check the debugging information reflects this: -; CHECK: DW_TAG_variable -; CHECK-NEXT: .word .Linfo_string7 - - ; Rather hard-coded, but 145 => DW_OP_fbreg and the .ascii is LEB128 encoded -412. -; CHECK: DW_AT_location -; CHECK-NEXT: .byte 145 -; CHECK-NEXT: .ascii "\344|" - - +; DEBUG: DW_TAG_variable +; DEBUG-NEXT: DW_AT_name {{.*}} "main_arr" +; Rather hard-coded, but 0x91 => DW_OP_fbreg and 0xe47c is LEB128 encoded -412. +; DEBUG: DW_AT_location {{.*}}(<0x3> 91 e4 7c ) target datalayout = "e-p:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-f128:128:128-n32:64-S128" target triple = "aarch64-none-linux-gnu" diff --git a/test/DebugInfo/SystemZ/variable-loc.ll b/test/DebugInfo/SystemZ/variable-loc.ll index 2d92fd9edcc5..e0e4156d3957 100644 --- a/test/DebugInfo/SystemZ/variable-loc.ll +++ b/test/DebugInfo/SystemZ/variable-loc.ll @@ -1,4 +1,6 @@ ; RUN: llc -mtriple=s390x-linux-gnu -disable-fp-elim < %s | FileCheck %s +; RUN: llc -mtriple=s390x-linux-gnu -disable-fp-elim -filetype=obj < %s \ +; RUN: | llvm-dwarfdump -debug-dump=info - | FileCheck --check-prefix=DEBUG %s ; ; This is a regression test making sure the location of variables is correct in ; debugging information, even if they're addressed via the frame pointer. @@ -10,20 +12,13 @@ ; CHECK: aghi %r15, -568 ; CHECK: la %r2, 164(%r11) ; CHECK: brasl %r14, populate_array@PLT -; -; CHECK: .Linfo_string7: -; CHECK-NEXT: main_arr -; -; Now check that the debugging information reflects this: -; CHECK: DW_TAG_variable -; CHECK-NEXT: .long .Linfo_string7 -; -; Rather hard-coded, but 145 => DW_OP_fbreg and the .ascii is the sleb128 -; encoding of 164: -; CHECK: DW_AT_location -; CHECK-NEXT: .byte 145 -; CHECK-NEXT: .ascii "\244\001" -; + +; DEBUG: DW_TAG_variable +; DEBUG-NOT: DW_TAG +; DEBUG: DW_AT_name {{.*}} "main_arr" +; Rather hard-coded, but 0x91 => DW_OP_fbreg and 0xa401 is SLEB128 encoded 164. +; DEBUG-NOT: DW_TAG +; DEBUG: DW_AT_location {{.*}}(<0x3> 91 a4 01 ) @.str = private unnamed_addr constant [13 x i8] c"Total is %d\0A\00", align 2 diff --git a/test/DebugInfo/X86/DW_AT_linkage_name.ll b/test/DebugInfo/X86/DW_AT_linkage_name.ll index 76d3abbe358c..dce234aa9002 100644 --- a/test/DebugInfo/X86/DW_AT_linkage_name.ll +++ b/test/DebugInfo/X86/DW_AT_linkage_name.ll @@ -22,8 +22,10 @@ ; CHECK: DW_AT_name {{.*}} "~A" ; CHECK-NOT: DW_AT_MIPS_linkage_name ; CHECK: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_MIPS_linkage_name {{.*}} "_ZN1AD2Ev" -; CHECK-NEXT: DW_AT_specification {{.*}}[[A_DTOR]] +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_ZN1AD2Ev" +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_specification {{.*}}[[A_DTOR]] target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" diff --git a/test/DebugInfo/X86/arguments.ll b/test/DebugInfo/X86/arguments.ll index 673528455714..3597b2ca1dc0 100644 --- a/test/DebugInfo/X86/arguments.ll +++ b/test/DebugInfo/X86/arguments.ll @@ -15,7 +15,8 @@ ; CHECK: debug_info contents ; CHECK: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_MIPS_linkage_name{{.*}}"_Z4func3fooS_" +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_MIPS_linkage_name{{.*}}"_Z4func3fooS_" ; CHECK-NOT: NULL ; CHECK: DW_TAG_formal_parameter ; CHECK-NEXT: DW_AT_name{{.*}}"f" diff --git a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll index 3db67ffdc7a4..45281c92953e 100644 --- a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll +++ b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll @@ -16,7 +16,7 @@ ;FIXME: Linux shouldn't drop this parameter either... ;LINUX-NOT: DW_TAG_formal_parameter ;DARWIN: DW_TAG_formal_parameter -;DARWIN-NEXT: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000055] = "sp") +;DARWIN-NEXT: DW_AT_name {{.*}} "sp" %struct.S1 = type { float*, i32 } diff --git a/test/DebugInfo/X86/debug-loc-offset.ll b/test/DebugInfo/X86/debug-loc-offset.ll index b10309c85f8c..3f4d39da7622 100644 --- a/test/DebugInfo/X86/debug-loc-offset.ll +++ b/test/DebugInfo/X86/debug-loc-offset.ll @@ -37,7 +37,9 @@ ; CHECK: DW_AT_high_pc ; CHECK: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_MIPS_linkage_name [DW_FORM_strp]{{.*}}"_Z1a1A" +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_MIPS_linkage_name [DW_FORM_strp]{{.*}}"_Z1a1A" +; CHECK-NOT: {{DW_TAG|NULL}} ; CHECK: DW_TAG_formal_parameter ; CHECK-NEXT: DW_AT_name [DW_FORM_strp]{{.*}}"var" ; CHECK: DW_AT_location [DW_FORM_sec_offset] (0x00000000) diff --git a/test/DebugInfo/X86/gnu-public-names.ll b/test/DebugInfo/X86/gnu-public-names.ll index f4001e3af927..4e35dbe2b501 100644 --- a/test/DebugInfo/X86/gnu-public-names.ll +++ b/test/DebugInfo/X86/gnu-public-names.ll @@ -86,8 +86,10 @@ ; CHECK-NEXT: DW_AT_name {{.*}} "D" ; CHECK: [[GLOB_NS_FUNC:[0-9a-f]+]]: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_MIPS_linkage_name -; CHECK-NEXT: DW_AT_name {{.*}} "global_namespace_function" +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_MIPS_linkage_name +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_name {{.*}} "global_namespace_function" ; CHECK: [[GLOB_NS_VAR:[0-9a-f]+]]: DW_TAG_variable ; CHECK-NEXT: DW_AT_specification {{.*}}[[GLOB_NS_VAR_DECL]] @@ -96,14 +98,18 @@ ; CHECK-NEXT: DW_AT_specification {{.*}}[[D_VAR_DECL]] ; CHECK: [[MEM_FUNC:[0-9a-f]+]]: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_specification {{.*}}[[MEM_FUNC_DECL]] +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_specification {{.*}}[[MEM_FUNC_DECL]] ; CHECK: [[STATIC_MEM_FUNC:[0-9a-f]+]]: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_specification {{.*}}[[STATIC_MEM_FUNC_DECL]] +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_specification {{.*}}[[STATIC_MEM_FUNC_DECL]] ; CHECK: [[GLOBAL_FUNC:[0-9a-f]+]]: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_MIPS_linkage_name -; CHECK-NEXT: DW_AT_name {{.*}} "global_function" +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_MIPS_linkage_name +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_name {{.*}} "global_function" ; CHECK-LABEL: .debug_gnu_pubnames contents: ; CHECK-NEXT: length = 0x000000e7 version = 0x0002 unit_offset = 0x00000000 unit_size = [[UNIT_SIZE]] @@ -166,8 +172,10 @@ ; DWARF3-NEXT: DW_AT_name {{.*}} "D" ; DWARF3: [[GLOB_NS_FUNC:[0-9a-f]+]]: DW_TAG_subprogram -; DWARF3-NEXT: DW_AT_MIPS_linkage_name -; DWARF3-NEXT: DW_AT_name {{.*}} "global_namespace_function" +; DWARF3-NOT: DW_TAG +; DWARF3: DW_AT_MIPS_linkage_name +; DWARF3-NOT: DW_TAG +; DWARF3: DW_AT_name {{.*}} "global_namespace_function" ; DWARF3: [[GLOB_NS_VAR:[0-9a-f]+]]: DW_TAG_variable ; DWARF3-NEXT: DW_AT_specification {{.*}}[[GLOB_NS_VAR_DECL]] @@ -176,14 +184,18 @@ ; DWARF3-NEXT: DW_AT_specification {{.*}}[[D_VAR_DECL]] ; DWARF3: [[MEM_FUNC:[0-9a-f]+]]: DW_TAG_subprogram -; DWARF3-NEXT: DW_AT_specification {{.*}}[[MEM_FUNC_DECL]] +; DWARF3-NOT: DW_TAG +; DWARF3: DW_AT_specification {{.*}}[[MEM_FUNC_DECL]] ; DWARF3: [[STATIC_MEM_FUNC:[0-9a-f]+]]: DW_TAG_subprogram -; DWARF3-NEXT: DW_AT_specification {{.*}}[[STATIC_MEM_FUNC_DECL]] +; DWARF3-NOT: DW_TAG +; DWARF3: DW_AT_specification {{.*}}[[STATIC_MEM_FUNC_DECL]] ; DWARF3: [[GLOBAL_FUNC:[0-9a-f]+]]: DW_TAG_subprogram -; DWARF3-NEXT: DW_AT_MIPS_linkage_name -; DWARF3-NEXT: DW_AT_name {{.*}} "global_function" +; DWARF3-NOT: DW_TAG +; DWARF3: DW_AT_MIPS_linkage_name +; DWARF3-NOT: DW_TAG +; DWARF3: DW_AT_name {{.*}} "global_function" ; DWARF3-LABEL: .debug_gnu_pubnames contents: ; DWARF3-NEXT: length = 0x000000e7 version = 0x0002 unit_offset = 0x00000000 unit_size = [[UNIT_SIZE]] diff --git a/test/DebugInfo/X86/inline-seldag-test.ll b/test/DebugInfo/X86/inline-seldag-test.ll index 83c61c4fc62c..f139140ee758 100644 --- a/test/DebugInfo/X86/inline-seldag-test.ll +++ b/test/DebugInfo/X86/inline-seldag-test.ll @@ -14,7 +14,8 @@ ; CHECK: DW_TAG_inlined_subroutine ; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[F:0x.*]]} ; CHECK: [[F]]: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_name {{.*}} "f" +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_name {{.*}} "f" ; Make sure the condition test is attributed to the inline function, not the diff --git a/test/DebugInfo/X86/pr11300.ll b/test/DebugInfo/X86/pr11300.ll index b3c911252d83..11c409c16042 100644 --- a/test/DebugInfo/X86/pr11300.ll +++ b/test/DebugInfo/X86/pr11300.ll @@ -9,7 +9,8 @@ ; CHECK: [[BAR_DECL:0x[0-9a-f]*]]: DW_TAG_subprogram ; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_ZN3foo3barEv" ; CHECK: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_specification {{.*}} {[[BAR_DECL]]} +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_specification {{.*}} {[[BAR_DECL]]} %struct.foo = type { i8 } diff --git a/test/DebugInfo/namespace.ll b/test/DebugInfo/namespace.ll index 6af3dc3ddd9c..ca5cf808d180 100644 --- a/test/DebugInfo/namespace.ll +++ b/test/DebugInfo/namespace.ll @@ -17,11 +17,15 @@ ; CHECK-NEXT: DW_AT_name{{.*}}= "i" ; CHECK-NOT: NULL ; CHECK: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_MIPS_linkage_name -; CHECK-NEXT: DW_AT_name{{.*}}= "f1" +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_MIPS_linkage_name +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_name{{.*}}= "f1" ; CHECK: [[FUNC1:0x[0-9a-f]*]]:{{ *}}DW_TAG_subprogram -; CHECK-NEXT: DW_AT_MIPS_linkage_name -; CHECK-NEXT: DW_AT_name{{.*}}= "f1" +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_MIPS_linkage_name +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_name{{.*}}= "f1" ; CHECK: NULL ; CHECK-NOT: NULL ; CHECK: [[FOO:0x[0-9a-f]*]]:{{ *}}DW_TAG_structure_type @@ -45,8 +49,10 @@ ; CHECK-NOT: NULL ; CHECK: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_MIPS_linkage_name -; CHECK-NEXT: DW_AT_name{{.*}}= "func" +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_MIPS_linkage_name +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_name{{.*}}= "func" ; CHECK-NOT: NULL ; CHECK: DW_TAG_imported_module ; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2]]) diff --git a/test/Linker/type-unique-odr-a.ll b/test/Linker/type-unique-odr-a.ll index a1b8d28e6450..54befb75ba45 100644 --- a/test/Linker/type-unique-odr-a.ll +++ b/test/Linker/type-unique-odr-a.ll @@ -23,9 +23,11 @@ ; } ; ; CHECK: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_MIPS_linkage_name {{.*}} "_Z3bazv" +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_Z3bazv" ; CHECK: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_MIPS_linkage_name {{.*}} "_ZL3barv" +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_ZL3barv" ; CHECK: DW_TAG_class_type ; CHECK-NEXT: DW_AT_name {{.*}} "A" ; CHECK-NOT: DW_TAG @@ -33,8 +35,10 @@ ; CHECK-NEXT: DW_AT_name {{.*}} "data" ; CHECK-NOT: DW_TAG ; CHECK: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_MIPS_linkage_name {{.*}} "_ZN1A6getFooEv" -; CHECK-NEXT: DW_AT_name {{.*}} "getFoo" +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_ZN1A6getFooEv" +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_name {{.*}} "getFoo" ; getFoo and A may only appear once. ; CHECK-NOT: {{(getFoo)|("A")}} diff --git a/test/MC/ARM/coff-debugging-secrel.ll b/test/MC/ARM/coff-debugging-secrel.ll index a6c52db4adaf..f37b19e6a70c 100644 --- a/test/MC/ARM/coff-debugging-secrel.ll +++ b/test/MC/ARM/coff-debugging-secrel.ll @@ -34,8 +34,6 @@ entry: ; CHECK-ITANIUM: 0xC IMAGE_REL_ARM_SECREL .debug_str ; CHECK-ITANIUM: 0x12 IMAGE_REL_ARM_SECREL .debug_str ; CHECK-ITANIUM: 0x16 IMAGE_REL_ARM_SECREL .debug_line -; CHECK-ITANIUM: 0x1A IMAGE_REL_ARM_SECREL .debug_str -; CHECK-ITANIUM: 0x27 IMAGE_REL_ARM_SECREL .debug_str ; CHECK-ITANIUM: } ; CHECK-ITANIUM: Section {{.*}}.debug_pubnames { ; CHECK-ITANIUM: 0x6 IMAGE_REL_ARM_SECREL .debug_info From 6a04ef99f64c60e8368f23fa7717b74dc83cd6e1 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Sat, 24 May 2014 07:05:42 +0000 Subject: [PATCH 129/906] ARM64: extract a 32-bit subreg when selecting an inreg extend After the load/store refactoring, we were sometimes trying to feed a GPR64 into a 32-bit register offset operand. This failed in copyPhysReg. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209566 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM64/ARM64ISelDAGToDAG.cpp | 29 ++-- .../ARM64/register-offset-addressing.ll | 137 +++++++++++++++++- 2 files changed, 154 insertions(+), 12 deletions(-) diff --git a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp index 9b235db30a32..23c45d414e2d 100644 --- a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp +++ b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp @@ -517,6 +517,21 @@ SDNode *ARM64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) { return CurDAG->getMachineNode(SMULLOpc, SDLoc(N), N->getValueType(0), Ops); } +/// Instructions that accept extend modifiers like UXTW expect the register +/// being extended to be a GPR32, but the incoming DAG might be acting on a +/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if +/// this is the case. +static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { + if (N.getValueType() == MVT::i32) + return N; + + SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32); + MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, + SDLoc(N), MVT::i32, N, SubReg); + return SDValue(Node, 0); +} + + /// SelectArithExtendedRegister - Select a "extended register" operand. This /// operand folds in an extend followed by an optional left shift. bool ARM64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, @@ -551,13 +566,7 @@ bool ARM64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, // there might not be an actual 32-bit value in the program. We can // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. assert(Ext != ARM64_AM::UXTX && Ext != ARM64_AM::SXTX); - if (Reg.getValueType() == MVT::i64) { - SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32); - MachineSDNode *Node = CurDAG->getMachineNode( - TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32, Reg, SubReg); - Reg = SDValue(Node, 0); - } - + Reg = narrowIfNeeded(CurDAG, Reg); Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), MVT::i32); return isWorthFolding(N); } @@ -677,7 +686,7 @@ bool ARM64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, if (Ext == ARM64_AM::InvalidShiftExtend) return false; - Offset = N.getOperand(0).getOperand(0); + Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); SignExtend = CurDAG->getTargetConstant(Ext == ARM64_AM::SXTW, MVT::i32); } else { Offset = N.getOperand(0); @@ -746,7 +755,7 @@ bool ARM64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, if (IsExtendedRegisterWorthFolding && (Ext = getExtendTypeForNode(LHS, true)) != ARM64_AM::InvalidShiftExtend) { Base = RHS; - Offset = LHS.getOperand(0); + Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); SignExtend = CurDAG->getTargetConstant(Ext == ARM64_AM::SXTW, MVT::i32); if (isWorthFolding(LHS)) return true; @@ -756,7 +765,7 @@ bool ARM64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, if (IsExtendedRegisterWorthFolding && (Ext = getExtendTypeForNode(RHS, true)) != ARM64_AM::InvalidShiftExtend) { Base = LHS; - Offset = RHS.getOperand(0); + Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); SignExtend = CurDAG->getTargetConstant(Ext == ARM64_AM::SXTW, MVT::i32); if (isWorthFolding(RHS)) return true; diff --git a/test/CodeGen/ARM64/register-offset-addressing.ll b/test/CodeGen/ARM64/register-offset-addressing.ll index c27360257cdc..045712bea6ac 100644 --- a/test/CodeGen/ARM64/register-offset-addressing.ll +++ b/test/CodeGen/ARM64/register-offset-addressing.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s -define i8 @t1(i16* %a, i64 %b) { -; CHECK: t1 +define i8 @test_64bit_add(i16* %a, i64 %b) { +; CHECK-LABEL: test_64bit_add: ; CHECK: lsl [[REG:x[0-9]+]], x1, #1 ; CHECK: ldrb w0, [x0, [[REG]]] ; CHECK: ret @@ -10,3 +10,136 @@ define i8 @t1(i16* %a, i64 %b) { %tmp3 = trunc i16 %tmp2 to i8 ret i8 %tmp3 } + +; These tests are trying to form SEXT and ZEXT operations that never leave i64 +; space, to make sure LLVM can adapt the offset register correctly. +define void @ldst_8bit(i8* %base, i64 %offset) minsize { +; CHECK-LABEL: ldst_8bit: + + %off32.sext.tmp = shl i64 %offset, 32 + %off32.sext = ashr i64 %off32.sext.tmp, 32 + %addr8_sxtw = getelementptr i8* %base, i64 %off32.sext + %val8_sxtw = load volatile i8* %addr8_sxtw + %val32_signed = sext i8 %val8_sxtw to i32 + store volatile i32 %val32_signed, i32* @var_32bit +; CHECK: ldrsb {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw] + + %addrint_uxtw = ptrtoint i8* %base to i64 + %offset_uxtw = and i64 %offset, 4294967295 + %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw + %addr_uxtw = inttoptr i64 %addrint1_uxtw to i8* + %val8_uxtw = load volatile i8* %addr_uxtw + %newval8 = add i8 %val8_uxtw, 1 + store volatile i8 %newval8, i8* @var_8bit +; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw] + + ret void +} + + +define void @ldst_16bit(i16* %base, i64 %offset) minsize { +; CHECK-LABEL: ldst_16bit: + + %addrint_uxtw = ptrtoint i16* %base to i64 + %offset_uxtw = and i64 %offset, 4294967295 + %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw + %addr_uxtw = inttoptr i64 %addrint1_uxtw to i16* + %val8_uxtw = load volatile i16* %addr_uxtw + %newval8 = add i16 %val8_uxtw, 1 + store volatile i16 %newval8, i16* @var_16bit +; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw] + + %base_sxtw = ptrtoint i16* %base to i64 + %offset_sxtw.tmp = shl i64 %offset, 32 + %offset_sxtw = ashr i64 %offset_sxtw.tmp, 32 + %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw + %addr_sxtw = inttoptr i64 %addrint_sxtw to i16* + %val16_sxtw = load volatile i16* %addr_sxtw + %val64_signed = sext i16 %val16_sxtw to i64 + store volatile i64 %val64_signed, i64* @var_64bit +; CHECK: ldrsh {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw] + + + %base_uxtwN = ptrtoint i16* %base to i64 + %offset_uxtwN = and i64 %offset, 4294967295 + %offset2_uxtwN = shl i64 %offset_uxtwN, 1 + %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN + %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i16* + %val32 = load volatile i32* @var_32bit + %val16_trunc32 = trunc i32 %val32 to i16 + store volatile i16 %val16_trunc32, i16* %addr_uxtwN +; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #1] + ret void +} + +define void @ldst_32bit(i32* %base, i64 %offset) minsize { +; CHECK-LABEL: ldst_32bit: + + %addrint_uxtw = ptrtoint i32* %base to i64 + %offset_uxtw = and i64 %offset, 4294967295 + %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw + %addr_uxtw = inttoptr i64 %addrint1_uxtw to i32* + %val32_uxtw = load volatile i32* %addr_uxtw + %newval32 = add i32 %val32_uxtw, 1 + store volatile i32 %newval32, i32* @var_32bit +; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw] + + %base_sxtw = ptrtoint i32* %base to i64 + %offset_sxtw.tmp = shl i64 %offset, 32 + %offset_sxtw = ashr i64 %offset_sxtw.tmp, 32 + %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw + %addr_sxtw = inttoptr i64 %addrint_sxtw to i32* + %val32_sxtw = load volatile i32* %addr_sxtw + %val64_signed = sext i32 %val32_sxtw to i64 + store volatile i64 %val64_signed, i64* @var_64bit +; CHECK: ldrsw {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw] + + + %base_uxtwN = ptrtoint i32* %base to i64 + %offset_uxtwN = and i64 %offset, 4294967295 + %offset2_uxtwN = shl i64 %offset_uxtwN, 2 + %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN + %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i32* + %val32 = load volatile i32* @var_32bit + store volatile i32 %val32, i32* %addr_uxtwN +; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #2] + ret void +} + +define void @ldst_64bit(i64* %base, i64 %offset) minsize { +; CHECK-LABEL: ldst_64bit: + + %addrint_uxtw = ptrtoint i64* %base to i64 + %offset_uxtw = and i64 %offset, 4294967295 + %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw + %addr_uxtw = inttoptr i64 %addrint1_uxtw to i64* + %val64_uxtw = load volatile i64* %addr_uxtw + %newval8 = add i64 %val64_uxtw, 1 + store volatile i64 %newval8, i64* @var_64bit +; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw] + + %base_sxtw = ptrtoint i64* %base to i64 + %offset_sxtw.tmp = shl i64 %offset, 32 + %offset_sxtw = ashr i64 %offset_sxtw.tmp, 32 + %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw + %addr_sxtw = inttoptr i64 %addrint_sxtw to i64* + %val64_sxtw = load volatile i64* %addr_sxtw + store volatile i64 %val64_sxtw, i64* @var_64bit +; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw] + + + %base_uxtwN = ptrtoint i64* %base to i64 + %offset_uxtwN = and i64 %offset, 4294967295 + %offset2_uxtwN = shl i64 %offset_uxtwN, 3 + %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN + %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i64* + %val64 = load volatile i64* @var_64bit + store volatile i64 %val64, i64* %addr_uxtwN +; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #3] + ret void +} + +@var_8bit = global i8 0 +@var_16bit = global i16 0 +@var_32bit = global i32 0 +@var_64bit = global i64 0 From 45788be6e2603ecfc149f43df1a6d5e04c5734d8 Mon Sep 17 00:00:00 2001 From: Michael Zolotukhin Date: Sat, 24 May 2014 08:09:57 +0000 Subject: [PATCH 130/906] Implement sext(C1 + C2*X) --> sext(C1) + sext(C2*X) and sext{C1,+,C2} --> sext(C1) + sext{0,+,C2} transformation in Scalar Evolution. That helps SLP-vectorizer to recognize consecutive loads/stores. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209568 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 35 ++++ .../SLPVectorizer/X86/consecutive-access.ll | 175 ++++++++++++++++++ 2 files changed, 210 insertions(+) create mode 100644 test/Transforms/SLPVectorizer/X86/consecutive-access.ll diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 0c864d840f11..461fdac3c6c0 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -1201,6 +1201,24 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, return getTruncateOrSignExtend(X, Ty); } + // sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2 + if (auto SA = dyn_cast(Op)) { + if (SA->getNumOperands() == 2) { + auto SC1 = dyn_cast(SA->getOperand(0)); + auto SMul = dyn_cast(SA->getOperand(1)); + if (SMul && SC1) { + if (auto SC2 = dyn_cast(SMul->getOperand(0))) { + APInt C1 = SC1->getValue()->getValue(); + APInt C2 = SC2->getValue()->getValue(); + APInt CDiff = C2 - C1; + if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && + CDiff.isStrictlyPositive() && C2.isPowerOf2()) + return getAddExpr(getSignExtendExpr(SC1, Ty), + getSignExtendExpr(SMul, Ty)); + } + } + } + } // If the input value is a chrec scev, and we can prove that the value // did not overflow the old, smaller, value, we can sign extend all of the // operands (often constants). This allows analysis of something like @@ -1292,6 +1310,23 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, L, AR->getNoWrapFlags()); } } + // If Start and Step are constants, check if we can apply this + // transformation: + // sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2 + auto SC1 = dyn_cast(Start); + auto SC2 = dyn_cast(Step); + if (SC1 && SC2) { + APInt C1 = SC1->getValue()->getValue(); + APInt C2 = SC2->getValue()->getValue(); + APInt CDiff = C2 - C1; + if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && + CDiff.isStrictlyPositive() && C2.isPowerOf2()) { + Start = getSignExtendExpr(Start, Ty); + const SCEV *NewAR = getAddRecExpr(getConstant(AR->getType(), 0), Step, + L, AR->getNoWrapFlags()); + return getAddExpr(Start, getSignExtendExpr(NewAR, Ty)); + } + } } // The cast wasn't folded; create an explicit cast node. diff --git a/test/Transforms/SLPVectorizer/X86/consecutive-access.ll b/test/Transforms/SLPVectorizer/X86/consecutive-access.ll new file mode 100644 index 000000000000..f4f112fe32c6 --- /dev/null +++ b/test/Transforms/SLPVectorizer/X86/consecutive-access.ll @@ -0,0 +1,175 @@ +; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +@A = common global [2000 x double] zeroinitializer, align 16 +@B = common global [2000 x double] zeroinitializer, align 16 +@C = common global [2000 x float] zeroinitializer, align 16 +@D = common global [2000 x float] zeroinitializer, align 16 + +; Currently SCEV isn't smart enough to figure out that accesses +; A[3*i], A[3*i+1] and A[3*i+2] are consecutive, but in future +; that would hopefully be fixed. For now, check that this isn't +; vectorized. +; CHECK-LABEL: foo_3double +; CHECK-NOT: x double> +; Function Attrs: nounwind ssp uwtable +define void @foo_3double(i32 %u) #0 { +entry: + %u.addr = alloca i32, align 4 + store i32 %u, i32* %u.addr, align 4 + %mul = mul nsw i32 %u, 3 + %idxprom = sext i32 %mul to i64 + %arrayidx = getelementptr inbounds [2000 x double]* @A, i32 0, i64 %idxprom + %0 = load double* %arrayidx, align 8 + %arrayidx4 = getelementptr inbounds [2000 x double]* @B, i32 0, i64 %idxprom + %1 = load double* %arrayidx4, align 8 + %add5 = fadd double %0, %1 + store double %add5, double* %arrayidx, align 8 + %add11 = add nsw i32 %mul, 1 + %idxprom12 = sext i32 %add11 to i64 + %arrayidx13 = getelementptr inbounds [2000 x double]* @A, i32 0, i64 %idxprom12 + %2 = load double* %arrayidx13, align 8 + %arrayidx17 = getelementptr inbounds [2000 x double]* @B, i32 0, i64 %idxprom12 + %3 = load double* %arrayidx17, align 8 + %add18 = fadd double %2, %3 + store double %add18, double* %arrayidx13, align 8 + %add24 = add nsw i32 %mul, 2 + %idxprom25 = sext i32 %add24 to i64 + %arrayidx26 = getelementptr inbounds [2000 x double]* @A, i32 0, i64 %idxprom25 + %4 = load double* %arrayidx26, align 8 + %arrayidx30 = getelementptr inbounds [2000 x double]* @B, i32 0, i64 %idxprom25 + %5 = load double* %arrayidx30, align 8 + %add31 = fadd double %4, %5 + store double %add31, double* %arrayidx26, align 8 + ret void +} + +; SCEV should be able to tell that accesses A[C1 + C2*i], A[C1 + C2*i], ... +; A[C1 + C2*i] are consecutive, if C2 is a power of 2, and C2 > C1 > 0. +; Thus, the following code should be vectorized. +; CHECK-LABEL: foo_2double +; CHECK: x double> +; Function Attrs: nounwind ssp uwtable +define void @foo_2double(i32 %u) #0 { +entry: + %u.addr = alloca i32, align 4 + store i32 %u, i32* %u.addr, align 4 + %mul = mul nsw i32 %u, 2 + %idxprom = sext i32 %mul to i64 + %arrayidx = getelementptr inbounds [2000 x double]* @A, i32 0, i64 %idxprom + %0 = load double* %arrayidx, align 8 + %arrayidx4 = getelementptr inbounds [2000 x double]* @B, i32 0, i64 %idxprom + %1 = load double* %arrayidx4, align 8 + %add5 = fadd double %0, %1 + store double %add5, double* %arrayidx, align 8 + %add11 = add nsw i32 %mul, 1 + %idxprom12 = sext i32 %add11 to i64 + %arrayidx13 = getelementptr inbounds [2000 x double]* @A, i32 0, i64 %idxprom12 + %2 = load double* %arrayidx13, align 8 + %arrayidx17 = getelementptr inbounds [2000 x double]* @B, i32 0, i64 %idxprom12 + %3 = load double* %arrayidx17, align 8 + %add18 = fadd double %2, %3 + store double %add18, double* %arrayidx13, align 8 + ret void +} + +; Similar to the previous test, but with different datatype. +; CHECK-LABEL: foo_4float +; CHECK: x float> +; Function Attrs: nounwind ssp uwtable +define void @foo_4float(i32 %u) #0 { +entry: + %u.addr = alloca i32, align 4 + store i32 %u, i32* %u.addr, align 4 + %mul = mul nsw i32 %u, 4 + %idxprom = sext i32 %mul to i64 + %arrayidx = getelementptr inbounds [2000 x float]* @C, i32 0, i64 %idxprom + %0 = load float* %arrayidx, align 4 + %arrayidx4 = getelementptr inbounds [2000 x float]* @D, i32 0, i64 %idxprom + %1 = load float* %arrayidx4, align 4 + %add5 = fadd float %0, %1 + store float %add5, float* %arrayidx, align 4 + %add11 = add nsw i32 %mul, 1 + %idxprom12 = sext i32 %add11 to i64 + %arrayidx13 = getelementptr inbounds [2000 x float]* @C, i32 0, i64 %idxprom12 + %2 = load float* %arrayidx13, align 4 + %arrayidx17 = getelementptr inbounds [2000 x float]* @D, i32 0, i64 %idxprom12 + %3 = load float* %arrayidx17, align 4 + %add18 = fadd float %2, %3 + store float %add18, float* %arrayidx13, align 4 + %add24 = add nsw i32 %mul, 2 + %idxprom25 = sext i32 %add24 to i64 + %arrayidx26 = getelementptr inbounds [2000 x float]* @C, i32 0, i64 %idxprom25 + %4 = load float* %arrayidx26, align 4 + %arrayidx30 = getelementptr inbounds [2000 x float]* @D, i32 0, i64 %idxprom25 + %5 = load float* %arrayidx30, align 4 + %add31 = fadd float %4, %5 + store float %add31, float* %arrayidx26, align 4 + %add37 = add nsw i32 %mul, 3 + %idxprom38 = sext i32 %add37 to i64 + %arrayidx39 = getelementptr inbounds [2000 x float]* @C, i32 0, i64 %idxprom38 + %6 = load float* %arrayidx39, align 4 + %arrayidx43 = getelementptr inbounds [2000 x float]* @D, i32 0, i64 %idxprom38 + %7 = load float* %arrayidx43, align 4 + %add44 = fadd float %6, %7 + store float %add44, float* %arrayidx39, align 4 + ret void +} + +; Similar to the previous tests, but now we are dealing with AddRec SCEV. +; CHECK-LABEL: foo_loop +; CHECK: x double> +; Function Attrs: nounwind ssp uwtable +define i32 @foo_loop(double* %A, i32 %n) #0 { +entry: + %A.addr = alloca double*, align 8 + %n.addr = alloca i32, align 4 + %sum = alloca double, align 8 + %i = alloca i32, align 4 + store double* %A, double** %A.addr, align 8 + store i32 %n, i32* %n.addr, align 4 + store double 0.000000e+00, double* %sum, align 8 + store i32 0, i32* %i, align 4 + %cmp1 = icmp slt i32 0, %n + br i1 %cmp1, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %0 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %1 = phi double [ 0.000000e+00, %for.body.lr.ph ], [ %add7, %for.body ] + %mul = mul nsw i32 %0, 2 + %idxprom = sext i32 %mul to i64 + %arrayidx = getelementptr inbounds double* %A, i64 %idxprom + %2 = load double* %arrayidx, align 8 + %mul1 = fmul double 7.000000e+00, %2 + %add = add nsw i32 %mul, 1 + %idxprom3 = sext i32 %add to i64 + %arrayidx4 = getelementptr inbounds double* %A, i64 %idxprom3 + %3 = load double* %arrayidx4, align 8 + %mul5 = fmul double 7.000000e+00, %3 + %add6 = fadd double %mul1, %mul5 + %add7 = fadd double %1, %add6 + store double %add7, double* %sum, align 8 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %i, align 4 + %cmp = icmp slt i32 %inc, %n + br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.body + %split = phi double [ %add7, %for.body ] + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry + %.lcssa = phi double [ %split, %for.cond.for.end_crit_edge ], [ 0.000000e+00, %entry ] + %conv = fptosi double %.lcssa to i32 + ret i32 %conv +} + +attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = metadata !{metadata !"clang version 3.5.0 "} From 4ca8b0b66defbeff6693ce1fc68436a836939a53 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Sat, 24 May 2014 08:47:11 +0000 Subject: [PATCH 131/906] llvm/test/Object/ar-error.test: Don't check the message "No such file or directory". It didn't match on non-English version of Windows. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209570 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Object/ar-error.test | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/Object/ar-error.test b/test/Object/ar-error.test index 9b8ddbd78fb1..7add9b448e19 100644 --- a/test/Object/ar-error.test +++ b/test/Object/ar-error.test @@ -2,4 +2,5 @@ Test if we get a proper error with a filename that doesn't exist RUN: not llvm-ar r %t.out.a sparkle.o %t 2>&1 | FileCheck %s -CHECK: llvm-ar{{(.exe|.EXE)?}}: sparkle.o: No such file or directory +# Don't check the message "No such file or directory". +CHECK: llvm-ar{{(.exe|.EXE)?}}: sparkle.o: From 9105f66d6f3cb6330ce77a88a0ef1ec0744aba85 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Sat, 24 May 2014 12:42:26 +0000 Subject: [PATCH 132/906] AArch64/ARM64: remove AArch64 from tree prior to renaming ARM64. I'm doing this in two phases for a better "git blame" record. This commit removes the previous AArch64 backend and redirects all functionality to ARM64. It also deduplicates test-lines and removes orphaned AArch64 tests. The next step will be "git mv ARM64 AArch64" and rewire most of the tests. Hopefully LLVM is still functional, though it would be even better if no-one ever had to care because the rename happens straight afterwards. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209576 91177308-0d34-0410-b5e6-96231b3b80d8 --- CMakeLists.txt | 3 +- autoconf/configure.ac | 13 +- configure | 14 +- include/llvm/IR/Intrinsics.td | 1 - include/llvm/IR/IntrinsicsAArch64.td | 407 - lib/Target/AArch64/AArch64.h | 46 - lib/Target/AArch64/AArch64.td | 83 - lib/Target/AArch64/AArch64AsmPrinter.cpp | 303 - lib/Target/AArch64/AArch64AsmPrinter.h | 76 - lib/Target/AArch64/AArch64BranchFixupPass.cpp | 601 -- lib/Target/AArch64/AArch64CallingConv.td | 197 - lib/Target/AArch64/AArch64FrameLowering.cpp | 626 -- lib/Target/AArch64/AArch64FrameLowering.h | 108 - lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 1576 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 5564 ---------- lib/Target/AArch64/AArch64ISelLowering.h | 410 - lib/Target/AArch64/AArch64InstrFormats.td | 1487 --- lib/Target/AArch64/AArch64InstrInfo.cpp | 979 -- lib/Target/AArch64/AArch64InstrInfo.h | 112 - lib/Target/AArch64/AArch64InstrInfo.td | 5388 ---------- lib/Target/AArch64/AArch64InstrNEON.td | 9474 ----------------- lib/Target/AArch64/AArch64MCInstLower.cpp | 157 - .../AArch64/AArch64MachineFunctionInfo.cpp | 18 - .../AArch64/AArch64MachineFunctionInfo.h | 149 - lib/Target/AArch64/AArch64RegisterInfo.cpp | 186 - lib/Target/AArch64/AArch64RegisterInfo.h | 79 - lib/Target/AArch64/AArch64RegisterInfo.td | 290 - lib/Target/AArch64/AArch64Schedule.td | 80 - lib/Target/AArch64/AArch64ScheduleA53.td | 144 - .../AArch64/AArch64SelectionDAGInfo.cpp | 26 - lib/Target/AArch64/AArch64SelectionDAGInfo.h | 32 - lib/Target/AArch64/AArch64Subtarget.cpp | 99 - lib/Target/AArch64/AArch64Subtarget.h | 89 - lib/Target/AArch64/AArch64TargetMachine.cpp | 121 - lib/Target/AArch64/AArch64TargetMachine.h | 94 - .../AArch64/AArch64TargetObjectFile.cpp | 24 - lib/Target/AArch64/AArch64TargetObjectFile.h | 31 - .../AArch64/AArch64TargetTransformInfo.cpp | 109 - .../AArch64/AsmParser/AArch64AsmParser.cpp | 2677 ----- lib/Target/AArch64/AsmParser/CMakeLists.txt | 3 - lib/Target/AArch64/AsmParser/LLVMBuild.txt | 23 - lib/Target/AArch64/AsmParser/Makefile | 15 - lib/Target/AArch64/CMakeLists.txt | 37 - .../Disassembler/AArch64Disassembler.cpp | 1572 --- .../AArch64/Disassembler/CMakeLists.txt | 3 - lib/Target/AArch64/Disassembler/LLVMBuild.txt | 23 - lib/Target/AArch64/Disassembler/Makefile | 16 - .../InstPrinter/AArch64InstPrinter.cpp | 549 - .../AArch64/InstPrinter/AArch64InstPrinter.h | 186 - lib/Target/AArch64/InstPrinter/CMakeLists.txt | 3 - lib/Target/AArch64/InstPrinter/LLVMBuild.txt | 24 - lib/Target/AArch64/InstPrinter/Makefile | 15 - lib/Target/AArch64/LLVMBuild.txt | 35 - .../MCTargetDesc/AArch64AsmBackend.cpp | 593 -- .../MCTargetDesc/AArch64ELFObjectWriter.cpp | 291 - .../MCTargetDesc/AArch64ELFStreamer.cpp | 161 - .../AArch64/MCTargetDesc/AArch64ELFStreamer.h | 27 - .../AArch64/MCTargetDesc/AArch64FixupKinds.h | 113 - .../AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp | 46 - .../AArch64/MCTargetDesc/AArch64MCAsmInfo.h | 29 - .../MCTargetDesc/AArch64MCCodeEmitter.cpp | 613 -- .../AArch64/MCTargetDesc/AArch64MCExpr.cpp | 179 - .../AArch64/MCTargetDesc/AArch64MCExpr.h | 187 - .../MCTargetDesc/AArch64MCTargetDesc.cpp | 221 - .../MCTargetDesc/AArch64MCTargetDesc.h | 72 - .../AArch64/MCTargetDesc/CMakeLists.txt | 9 - lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt | 24 - lib/Target/AArch64/MCTargetDesc/Makefile | 16 - lib/Target/AArch64/Makefile | 30 - lib/Target/AArch64/README.txt | 2 - .../AArch64/TargetInfo/AArch64TargetInfo.cpp | 27 - lib/Target/AArch64/TargetInfo/CMakeLists.txt | 3 - lib/Target/AArch64/TargetInfo/LLVMBuild.txt | 23 - lib/Target/AArch64/TargetInfo/Makefile | 15 - lib/Target/AArch64/Utils/AArch64BaseInfo.cpp | 1173 -- lib/Target/AArch64/Utils/AArch64BaseInfo.h | 1138 -- lib/Target/AArch64/Utils/CMakeLists.txt | 3 - lib/Target/AArch64/Utils/LLVMBuild.txt | 23 - lib/Target/AArch64/Utils/Makefile | 15 - lib/Target/ARM64/ARM64AsmPrinter.cpp | 3 + lib/Target/ARM64/ARM64TargetMachine.cpp | 3 + lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp | 3 + .../ARM64/Disassembler/ARM64Disassembler.cpp | 9 + .../ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp | 26 + .../ARM64/MCTargetDesc/ARM64MCTargetDesc.h | 2 + .../ARM64/TargetInfo/ARM64TargetInfo.cpp | 7 + lib/Target/LLVMBuild.txt | 2 +- test/CodeGen/AArch64/128bit_load_store.ll | 7 - test/CodeGen/AArch64/adc.ll | 2 - test/CodeGen/AArch64/addsub-shifted.ll | 1 - test/CodeGen/AArch64/addsub.ll | 1 - test/CodeGen/AArch64/addsub_ext.ll | 1 - test/CodeGen/AArch64/alloca.ll | 21 - test/CodeGen/AArch64/analyze-branch.ll | 1 - test/CodeGen/AArch64/andCmpBrToTBZ.ll | 74 - test/CodeGen/AArch64/assertion-rc-mismatch.ll | 1 - .../AArch64/atomic-ops-not-barriers.ll | 1 - test/CodeGen/AArch64/atomic-ops.ll | 34 - test/CodeGen/AArch64/basic-pic.ll | 1 - test/CodeGen/AArch64/bitfield-insert-0.ll | 1 - test/CodeGen/AArch64/bitfield-insert.ll | 6 - test/CodeGen/AArch64/bitfield.ll | 4 - test/CodeGen/AArch64/blockaddress.ll | 2 - test/CodeGen/AArch64/bool-loads.ll | 1 - test/CodeGen/AArch64/breg.ll | 1 - test/CodeGen/AArch64/callee-save.ll | 1 - test/CodeGen/AArch64/code-model-large-abs.ll | 1 - test/CodeGen/AArch64/compare-branch.ll | 1 - test/CodeGen/AArch64/complex-copy-noneon.ll | 1 - test/CodeGen/AArch64/concatvector-bugs.ll | 70 - test/CodeGen/AArch64/cond-sel.ll | 3 - test/CodeGen/AArch64/cpus.ll | 4 - test/CodeGen/AArch64/directcond.ll | 10 - test/CodeGen/AArch64/dp-3source.ll | 1 - test/CodeGen/AArch64/dp1.ll | 1 - test/CodeGen/AArch64/dp2.ll | 1 - test/CodeGen/AArch64/eliminate-trunc.ll | 4 - test/CodeGen/AArch64/extern-weak.ll | 11 - test/CodeGen/AArch64/extract.ll | 1 - test/CodeGen/AArch64/fastcc-reserved.ll | 1 - test/CodeGen/AArch64/fastcc.ll | 2 - test/CodeGen/AArch64/fcmp.ll | 1 - test/CodeGen/AArch64/fcvt-fixed.ll | 1 - test/CodeGen/AArch64/fcvt-int.ll | 1 - test/CodeGen/AArch64/flags-multiuse.ll | 1 - test/CodeGen/AArch64/floatdp_1source.ll | 1 - test/CodeGen/AArch64/floatdp_2source.ll | 1 - test/CodeGen/AArch64/fp-cond-sel.ll | 3 - test/CodeGen/AArch64/fp-dp3.ll | 2 - test/CodeGen/AArch64/fp128-folding.ll | 1 - test/CodeGen/AArch64/fp128.ll | 282 - test/CodeGen/AArch64/fpimm.ll | 1 - test/CodeGen/AArch64/frameaddr.ll | 1 - test/CodeGen/AArch64/free-zext.ll | 1 - test/CodeGen/AArch64/func-argpassing.ll | 9 - test/CodeGen/AArch64/func-calls.ll | 14 - test/CodeGen/AArch64/global-alignment.ll | 1 - test/CodeGen/AArch64/global_merge_1.ll | 17 - test/CodeGen/AArch64/got-abuse.ll | 2 - test/CodeGen/AArch64/i128-align.ll | 1 - test/CodeGen/AArch64/i128-shift.ll | 44 - test/CodeGen/AArch64/illegal-float-ops.ll | 1 - test/CodeGen/AArch64/init-array.ll | 2 - .../AArch64/inline-asm-constraints-badI.ll | 1 - .../AArch64/inline-asm-constraints-badK.ll | 1 - .../AArch64/inline-asm-constraints-badK2.ll | 1 - .../AArch64/inline-asm-constraints-badL.ll | 1 - .../CodeGen/AArch64/inline-asm-constraints.ll | 137 - test/CodeGen/AArch64/inline-asm-modifiers.ll | 147 - test/CodeGen/AArch64/jump-table.ll | 3 - test/CodeGen/AArch64/large-consts.ll | 5 - test/CodeGen/AArch64/large-frame.ll | 120 - test/CodeGen/AArch64/ldst-regoffset.ll | 2 - test/CodeGen/AArch64/ldst-unscaledimm.ll | 2 - test/CodeGen/AArch64/ldst-unsignedimm.ll | 2 - test/CodeGen/AArch64/lit.local.cfg | 4 - test/CodeGen/AArch64/literal_pools_float.ll | 4 - test/CodeGen/AArch64/literal_pools_int.ll | 58 - test/CodeGen/AArch64/local_vars.ll | 2 - test/CodeGen/AArch64/logical-imm.ll | 1 - test/CodeGen/AArch64/logical_shifted_reg.ll | 1 - test/CodeGen/AArch64/mature-mc-support.ll | 2 - test/CodeGen/AArch64/misched-basic-A53.ll | 113 - test/CodeGen/AArch64/movw-consts.ll | 14 - test/CodeGen/AArch64/movw-shift-encoding.ll | 5 - test/CodeGen/AArch64/mul-lohi.ll | 2 - test/CodeGen/AArch64/named-reg-alloc.ll | 14 - test/CodeGen/AArch64/named-reg-notareg.ll | 13 - test/CodeGen/AArch64/neon-2velem-high.ll | 331 - test/CodeGen/AArch64/neon-2velem.ll | 2854 ----- test/CodeGen/AArch64/neon-3vdiff.ll | 1834 ---- test/CodeGen/AArch64/neon-aba-abd.ll | 237 - test/CodeGen/AArch64/neon-across.ll | 473 - test/CodeGen/AArch64/neon-add-pairwise.ll | 102 - test/CodeGen/AArch64/neon-add-sub.ll | 280 - test/CodeGen/AArch64/neon-bitcast.ll | 1 - .../AArch64/neon-bitwise-instructions.ll | 1 - test/CodeGen/AArch64/neon-bsl.ll | 237 - .../AArch64/neon-compare-instructions.ll | 1 - test/CodeGen/AArch64/neon-copy.ll | 1402 --- .../CodeGen/AArch64/neon-copyPhysReg-tuple.ll | 48 - test/CodeGen/AArch64/neon-crypto.ll | 145 - test/CodeGen/AArch64/neon-diagnostics.ll | 1 - test/CodeGen/AArch64/neon-extract.ll | 1 - test/CodeGen/AArch64/neon-facge-facgt.ll | 57 - test/CodeGen/AArch64/neon-fma.ll | 1 - test/CodeGen/AArch64/neon-fpround_f128.ll | 1 - test/CodeGen/AArch64/neon-frsqrt-frecp.ll | 55 - test/CodeGen/AArch64/neon-halving-add-sub.ll | 208 - test/CodeGen/AArch64/neon-idiv.ll | 1 - test/CodeGen/AArch64/neon-load-store-v1i32.ll | 30 - test/CodeGen/AArch64/neon-max-min-pairwise.ll | 347 - test/CodeGen/AArch64/neon-max-min.ll | 311 - test/CodeGen/AArch64/neon-misc-scalar.ll | 61 - test/CodeGen/AArch64/neon-misc.ll | 2014 ---- test/CodeGen/AArch64/neon-mla-mls.ll | 1 - test/CodeGen/AArch64/neon-mov.ll | 17 - test/CodeGen/AArch64/neon-mul-div.ll | 754 -- test/CodeGen/AArch64/neon-or-combine.ll | 1 - test/CodeGen/AArch64/neon-perm.ll | 61 - .../AArch64/neon-rounding-halving-add.ll | 106 - test/CodeGen/AArch64/neon-rounding-shift.ll | 122 - .../AArch64/neon-saturating-add-sub.ll | 241 - .../AArch64/neon-saturating-rounding-shift.ll | 122 - test/CodeGen/AArch64/neon-saturating-shift.ll | 122 - test/CodeGen/AArch64/neon-scalar-abs.ll | 62 - test/CodeGen/AArch64/neon-scalar-add-sub.ll | 51 - .../AArch64/neon-scalar-by-elem-fma.ll | 1 - .../AArch64/neon-scalar-by-elem-mul.ll | 124 - test/CodeGen/AArch64/neon-scalar-compare.ll | 344 - test/CodeGen/AArch64/neon-scalar-copy.ll | 11 - test/CodeGen/AArch64/neon-scalar-cvt.ll | 134 - test/CodeGen/AArch64/neon-scalar-ext.ll | 114 - .../AArch64/neon-scalar-extract-narrow.ll | 105 - test/CodeGen/AArch64/neon-scalar-fabd.ll | 21 - test/CodeGen/AArch64/neon-scalar-fcvt.ll | 234 - .../CodeGen/AArch64/neon-scalar-fp-compare.ll | 283 - test/CodeGen/AArch64/neon-scalar-mul.ll | 144 - test/CodeGen/AArch64/neon-scalar-neg.ll | 62 - test/CodeGen/AArch64/neon-scalar-recip.ll | 93 - .../AArch64/neon-scalar-reduce-pairwise.ll | 216 - .../AArch64/neon-scalar-rounding-shift.ll | 39 - .../AArch64/neon-scalar-saturating-add-sub.ll | 243 - .../neon-scalar-saturating-rounding-shift.ll | 95 - .../AArch64/neon-scalar-saturating-shift.ll | 89 - test/CodeGen/AArch64/neon-scalar-shift-imm.ll | 532 - test/CodeGen/AArch64/neon-scalar-shift.ll | 237 - test/CodeGen/AArch64/neon-select_cc.ll | 202 - test/CodeGen/AArch64/neon-shift-left-long.ll | 1 - test/CodeGen/AArch64/neon-shift.ll | 172 - test/CodeGen/AArch64/neon-shl-ashr-lshr.ll | 334 - .../AArch64/neon-simd-ldst-multi-elem.ll | 2317 ---- test/CodeGen/AArch64/neon-simd-ldst-one.ll | 2300 ---- test/CodeGen/AArch64/neon-simd-ldst.ll | 165 - .../AArch64/neon-simd-post-ldst-multi-elem.ll | 355 - .../AArch64/neon-simd-post-ldst-one.ll | 320 - test/CodeGen/AArch64/neon-simd-shift.ll | 1557 --- test/CodeGen/AArch64/neon-simd-tbl.ll | 829 -- test/CodeGen/AArch64/neon-simd-vget.ll | 226 - test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll | 31 - .../AArch64/neon-truncStore-extLoad.ll | 1 - test/CodeGen/AArch64/neon-v1i1-setcc.ll | 69 - .../CodeGen/AArch64/neon-vector-list-spill.ll | 176 - test/CodeGen/AArch64/pic-eh-stubs.ll | 2 - test/CodeGen/AArch64/ragreedy-csr.ll | 1 - .../AArch64/regress-bitcast-formals.ll | 1 - .../CodeGen/AArch64/regress-f128csel-flags.ll | 1 - test/CodeGen/AArch64/regress-fp128-livein.ll | 1 - test/CodeGen/AArch64/regress-tail-livereg.ll | 1 - test/CodeGen/AArch64/regress-tblgen-chains.ll | 7 - .../AArch64/regress-w29-reserved-with-fp.ll | 1 - .../AArch64/regress-wzr-allocatable.ll | 44 - test/CodeGen/AArch64/returnaddr.ll | 1 - test/CodeGen/AArch64/setcc-takes-i32.ll | 1 - test/CodeGen/AArch64/sext_inreg.ll | 202 - test/CodeGen/AArch64/sibling-call.ll | 1 - test/CodeGen/AArch64/sincos-expansion.ll | 1 - .../AArch64/sincospow-vector-expansion.ll | 1 - test/CodeGen/AArch64/stackpointer.ll | 25 - test/CodeGen/AArch64/tail-call.ll | 1 - test/CodeGen/AArch64/tls-dynamic-together.ll | 19 - test/CodeGen/AArch64/tls-dynamics.ll | 121 - test/CodeGen/AArch64/tls-execs.ll | 64 - test/CodeGen/AArch64/tst-br.ll | 1 - .../AArch64/unaligned-vector-ld1-st1.ll | 172 - test/CodeGen/AArch64/variadic.ll | 241 - test/CodeGen/AArch64/zero-reg.ll | 1 - test/DebugInfo/AArch64/cfi-frame.ll | 58 - test/DebugInfo/AArch64/lit.local.cfg | 2 +- test/DebugInfo/AArch64/variable-loc.ll | 94 - test/MC/AArch64/adrp-relocation.s | 1 - test/MC/AArch64/basic-a64-diagnostics.s | 2 - test/MC/AArch64/basic-a64-instructions.s | 243 - test/MC/AArch64/basic-pic.s | 1 - test/MC/AArch64/elf-extern.s | 1 - test/MC/AArch64/elf-objdump.s | 1 - test/MC/AArch64/elf-reloc-addend.s | 10 - test/MC/AArch64/elf-reloc-addsubimm.s | 3 - test/MC/AArch64/elf-reloc-ldrlit.s | 3 - test/MC/AArch64/elf-reloc-ldstunsimm.s | 3 - test/MC/AArch64/elf-reloc-movw.s | 3 - test/MC/AArch64/elf-reloc-pcreladdressing.s | 5 +- test/MC/AArch64/elf-reloc-tstb.s | 3 - test/MC/AArch64/elf-reloc-uncondbrimm.s | 3 - test/MC/AArch64/gicv3-regs-diagnostics.s | 1 - test/MC/AArch64/gicv3-regs.s | 1 - test/MC/AArch64/inline-asm-modifiers.s | 1 - test/MC/AArch64/jump-table.s | 1 - test/MC/AArch64/lit.local.cfg | 4 +- test/MC/AArch64/mapping-across-sections.s | 1 - test/MC/AArch64/mapping-within-section.s | 1 - test/MC/AArch64/neon-2velem.s | 1 - test/MC/AArch64/neon-3vdiff.s | 1 - test/MC/AArch64/neon-aba-abd.s | 1 - test/MC/AArch64/neon-across.s | 1 - test/MC/AArch64/neon-add-pairwise.s | 1 - test/MC/AArch64/neon-add-sub-instructions.s | 1 - test/MC/AArch64/neon-bitwise-instructions.s | 1 - test/MC/AArch64/neon-compare-instructions.s | 1 - test/MC/AArch64/neon-crypto.s | 2 - test/MC/AArch64/neon-diagnostics.s | 227 - test/MC/AArch64/neon-extract.s | 1 - test/MC/AArch64/neon-facge-facgt.s | 1 - test/MC/AArch64/neon-frsqrt-frecp.s | 1 - test/MC/AArch64/neon-halving-add-sub.s | 1 - test/MC/AArch64/neon-max-min-pairwise.s | 1 - test/MC/AArch64/neon-max-min.s | 1 - test/MC/AArch64/neon-mla-mls-instructions.s | 1 - test/MC/AArch64/neon-mov.s | 1 - test/MC/AArch64/neon-mul-div-instructions.s | 1 - test/MC/AArch64/neon-perm.s | 1 - test/MC/AArch64/neon-rounding-halving-add.s | 1 - test/MC/AArch64/neon-rounding-shift.s | 1 - test/MC/AArch64/neon-saturating-add-sub.s | 1 - .../AArch64/neon-saturating-rounding-shift.s | 1 - test/MC/AArch64/neon-saturating-shift.s | 1 - test/MC/AArch64/neon-scalar-abs.s | 1 - test/MC/AArch64/neon-scalar-add-sub.s | 1 - test/MC/AArch64/neon-scalar-by-elem-mla.s | 1 - test/MC/AArch64/neon-scalar-by-elem-mul.s | 1 - .../neon-scalar-by-elem-saturating-mla.s | 1 - .../neon-scalar-by-elem-saturating-mul.s | 1 - test/MC/AArch64/neon-scalar-compare.s | 1 - test/MC/AArch64/neon-scalar-cvt.s | 1 - test/MC/AArch64/neon-scalar-dup.s | 1 - test/MC/AArch64/neon-scalar-extract-narrow.s | 1 - test/MC/AArch64/neon-scalar-fp-compare.s | 1 - test/MC/AArch64/neon-scalar-mul.s | 1 - test/MC/AArch64/neon-scalar-neg.s | 1 - test/MC/AArch64/neon-scalar-recip.s | 1 - test/MC/AArch64/neon-scalar-reduce-pairwise.s | 1 - test/MC/AArch64/neon-scalar-rounding-shift.s | 1 - .../AArch64/neon-scalar-saturating-add-sub.s | 1 - .../neon-scalar-saturating-rounding-shift.s | 1 - .../MC/AArch64/neon-scalar-saturating-shift.s | 1 - test/MC/AArch64/neon-scalar-shift-imm.s | 1 - test/MC/AArch64/neon-scalar-shift.s | 1 - test/MC/AArch64/neon-shift-left-long.s | 1 - test/MC/AArch64/neon-shift.s | 1 - test/MC/AArch64/neon-simd-copy.s | 1 - test/MC/AArch64/neon-simd-ldst-multi-elem.s | 1 - test/MC/AArch64/neon-simd-ldst-one-elem.s | 1 - test/MC/AArch64/neon-simd-misc.s | 1 - .../AArch64/neon-simd-post-ldst-multi-elem.s | 1 - test/MC/AArch64/neon-simd-shift.s | 1 - test/MC/AArch64/neon-sxtl.s | 1 - test/MC/AArch64/neon-tbl.s | 1 - test/MC/AArch64/neon-uxtl.s | 1 - test/MC/AArch64/noneon-diagnostics.s | 3 - test/MC/AArch64/optional-hash.s | 3 - test/MC/AArch64/tls-relocs.s | 142 - test/MC/AArch64/trace-regs-diagnostics.s | 1 - test/MC/AArch64/trace-regs.s | 1 - test/MC/Disassembler/AArch64/lit.local.cfg | 2 +- .../LoopVectorize/AArch64/lit.local.cfg | 2 +- 355 files changed, 73 insertions(+), 67373 deletions(-) delete mode 100644 include/llvm/IR/IntrinsicsAArch64.td delete mode 100644 lib/Target/AArch64/AArch64.h delete mode 100644 lib/Target/AArch64/AArch64.td delete mode 100644 lib/Target/AArch64/AArch64AsmPrinter.cpp delete mode 100644 lib/Target/AArch64/AArch64AsmPrinter.h delete mode 100644 lib/Target/AArch64/AArch64BranchFixupPass.cpp delete mode 100644 lib/Target/AArch64/AArch64CallingConv.td delete mode 100644 lib/Target/AArch64/AArch64FrameLowering.cpp delete mode 100644 lib/Target/AArch64/AArch64FrameLowering.h delete mode 100644 lib/Target/AArch64/AArch64ISelDAGToDAG.cpp delete mode 100644 lib/Target/AArch64/AArch64ISelLowering.cpp delete mode 100644 lib/Target/AArch64/AArch64ISelLowering.h delete mode 100644 lib/Target/AArch64/AArch64InstrFormats.td delete mode 100644 lib/Target/AArch64/AArch64InstrInfo.cpp delete mode 100644 lib/Target/AArch64/AArch64InstrInfo.h delete mode 100644 lib/Target/AArch64/AArch64InstrInfo.td delete mode 100644 lib/Target/AArch64/AArch64InstrNEON.td delete mode 100644 lib/Target/AArch64/AArch64MCInstLower.cpp delete mode 100644 lib/Target/AArch64/AArch64MachineFunctionInfo.cpp delete mode 100644 lib/Target/AArch64/AArch64MachineFunctionInfo.h delete mode 100644 lib/Target/AArch64/AArch64RegisterInfo.cpp delete mode 100644 lib/Target/AArch64/AArch64RegisterInfo.h delete mode 100644 lib/Target/AArch64/AArch64RegisterInfo.td delete mode 100644 lib/Target/AArch64/AArch64Schedule.td delete mode 100644 lib/Target/AArch64/AArch64ScheduleA53.td delete mode 100644 lib/Target/AArch64/AArch64SelectionDAGInfo.cpp delete mode 100644 lib/Target/AArch64/AArch64SelectionDAGInfo.h delete mode 100644 lib/Target/AArch64/AArch64Subtarget.cpp delete mode 100644 lib/Target/AArch64/AArch64Subtarget.h delete mode 100644 lib/Target/AArch64/AArch64TargetMachine.cpp delete mode 100644 lib/Target/AArch64/AArch64TargetMachine.h delete mode 100644 lib/Target/AArch64/AArch64TargetObjectFile.cpp delete mode 100644 lib/Target/AArch64/AArch64TargetObjectFile.h delete mode 100644 lib/Target/AArch64/AArch64TargetTransformInfo.cpp delete mode 100644 lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp delete mode 100644 lib/Target/AArch64/AsmParser/CMakeLists.txt delete mode 100644 lib/Target/AArch64/AsmParser/LLVMBuild.txt delete mode 100644 lib/Target/AArch64/AsmParser/Makefile delete mode 100644 lib/Target/AArch64/CMakeLists.txt delete mode 100644 lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp delete mode 100644 lib/Target/AArch64/Disassembler/CMakeLists.txt delete mode 100644 lib/Target/AArch64/Disassembler/LLVMBuild.txt delete mode 100644 lib/Target/AArch64/Disassembler/Makefile delete mode 100644 lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp delete mode 100644 lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h delete mode 100644 lib/Target/AArch64/InstPrinter/CMakeLists.txt delete mode 100644 lib/Target/AArch64/InstPrinter/LLVMBuild.txt delete mode 100644 lib/Target/AArch64/InstPrinter/Makefile delete mode 100644 lib/Target/AArch64/LLVMBuild.txt delete mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp delete mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp delete mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp delete mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h delete mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h delete mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp delete mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h delete mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp delete mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp delete mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h delete mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp delete mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h delete mode 100644 lib/Target/AArch64/MCTargetDesc/CMakeLists.txt delete mode 100644 lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt delete mode 100644 lib/Target/AArch64/MCTargetDesc/Makefile delete mode 100644 lib/Target/AArch64/Makefile delete mode 100644 lib/Target/AArch64/README.txt delete mode 100644 lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp delete mode 100644 lib/Target/AArch64/TargetInfo/CMakeLists.txt delete mode 100644 lib/Target/AArch64/TargetInfo/LLVMBuild.txt delete mode 100644 lib/Target/AArch64/TargetInfo/Makefile delete mode 100644 lib/Target/AArch64/Utils/AArch64BaseInfo.cpp delete mode 100644 lib/Target/AArch64/Utils/AArch64BaseInfo.h delete mode 100644 lib/Target/AArch64/Utils/CMakeLists.txt delete mode 100644 lib/Target/AArch64/Utils/LLVMBuild.txt delete mode 100644 lib/Target/AArch64/Utils/Makefile delete mode 100644 test/CodeGen/AArch64/andCmpBrToTBZ.ll delete mode 100644 test/CodeGen/AArch64/concatvector-bugs.ll delete mode 100644 test/CodeGen/AArch64/fp128.ll delete mode 100644 test/CodeGen/AArch64/global_merge_1.ll delete mode 100644 test/CodeGen/AArch64/i128-shift.ll delete mode 100644 test/CodeGen/AArch64/inline-asm-constraints.ll delete mode 100644 test/CodeGen/AArch64/inline-asm-modifiers.ll delete mode 100644 test/CodeGen/AArch64/large-frame.ll delete mode 100644 test/CodeGen/AArch64/lit.local.cfg delete mode 100644 test/CodeGen/AArch64/literal_pools_int.ll delete mode 100644 test/CodeGen/AArch64/misched-basic-A53.ll delete mode 100644 test/CodeGen/AArch64/named-reg-alloc.ll delete mode 100644 test/CodeGen/AArch64/named-reg-notareg.ll delete mode 100644 test/CodeGen/AArch64/neon-2velem-high.ll delete mode 100644 test/CodeGen/AArch64/neon-2velem.ll delete mode 100644 test/CodeGen/AArch64/neon-3vdiff.ll delete mode 100644 test/CodeGen/AArch64/neon-aba-abd.ll delete mode 100644 test/CodeGen/AArch64/neon-across.ll delete mode 100644 test/CodeGen/AArch64/neon-add-pairwise.ll delete mode 100644 test/CodeGen/AArch64/neon-add-sub.ll delete mode 100644 test/CodeGen/AArch64/neon-bsl.ll delete mode 100644 test/CodeGen/AArch64/neon-copy.ll delete mode 100644 test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll delete mode 100644 test/CodeGen/AArch64/neon-crypto.ll delete mode 100644 test/CodeGen/AArch64/neon-facge-facgt.ll delete mode 100644 test/CodeGen/AArch64/neon-frsqrt-frecp.ll delete mode 100644 test/CodeGen/AArch64/neon-halving-add-sub.ll delete mode 100644 test/CodeGen/AArch64/neon-load-store-v1i32.ll delete mode 100644 test/CodeGen/AArch64/neon-max-min-pairwise.ll delete mode 100644 test/CodeGen/AArch64/neon-max-min.ll delete mode 100644 test/CodeGen/AArch64/neon-misc-scalar.ll delete mode 100644 test/CodeGen/AArch64/neon-misc.ll delete mode 100644 test/CodeGen/AArch64/neon-mul-div.ll delete mode 100644 test/CodeGen/AArch64/neon-rounding-halving-add.ll delete mode 100644 test/CodeGen/AArch64/neon-rounding-shift.ll delete mode 100644 test/CodeGen/AArch64/neon-saturating-add-sub.ll delete mode 100644 test/CodeGen/AArch64/neon-saturating-rounding-shift.ll delete mode 100644 test/CodeGen/AArch64/neon-saturating-shift.ll delete mode 100644 test/CodeGen/AArch64/neon-scalar-abs.ll delete mode 100644 test/CodeGen/AArch64/neon-scalar-add-sub.ll delete mode 100644 test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll delete mode 100644 test/CodeGen/AArch64/neon-scalar-compare.ll delete mode 100644 test/CodeGen/AArch64/neon-scalar-cvt.ll delete mode 100644 test/CodeGen/AArch64/neon-scalar-ext.ll delete mode 100644 test/CodeGen/AArch64/neon-scalar-extract-narrow.ll delete mode 100644 test/CodeGen/AArch64/neon-scalar-fabd.ll delete mode 100644 test/CodeGen/AArch64/neon-scalar-fcvt.ll delete mode 100644 test/CodeGen/AArch64/neon-scalar-fp-compare.ll delete mode 100644 test/CodeGen/AArch64/neon-scalar-mul.ll delete mode 100644 test/CodeGen/AArch64/neon-scalar-neg.ll delete mode 100644 test/CodeGen/AArch64/neon-scalar-recip.ll delete mode 100644 test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll delete mode 100644 test/CodeGen/AArch64/neon-scalar-rounding-shift.ll delete mode 100644 test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll delete mode 100644 test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll delete mode 100644 test/CodeGen/AArch64/neon-scalar-saturating-shift.ll delete mode 100644 test/CodeGen/AArch64/neon-scalar-shift-imm.ll delete mode 100644 test/CodeGen/AArch64/neon-scalar-shift.ll delete mode 100644 test/CodeGen/AArch64/neon-select_cc.ll delete mode 100644 test/CodeGen/AArch64/neon-shift.ll delete mode 100644 test/CodeGen/AArch64/neon-shl-ashr-lshr.ll delete mode 100644 test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll delete mode 100644 test/CodeGen/AArch64/neon-simd-ldst-one.ll delete mode 100644 test/CodeGen/AArch64/neon-simd-ldst.ll delete mode 100644 test/CodeGen/AArch64/neon-simd-post-ldst-multi-elem.ll delete mode 100644 test/CodeGen/AArch64/neon-simd-post-ldst-one.ll delete mode 100644 test/CodeGen/AArch64/neon-simd-shift.ll delete mode 100644 test/CodeGen/AArch64/neon-simd-tbl.ll delete mode 100644 test/CodeGen/AArch64/neon-simd-vget.ll delete mode 100644 test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll delete mode 100644 test/CodeGen/AArch64/neon-v1i1-setcc.ll delete mode 100644 test/CodeGen/AArch64/neon-vector-list-spill.ll delete mode 100644 test/CodeGen/AArch64/regress-wzr-allocatable.ll delete mode 100644 test/CodeGen/AArch64/sext_inreg.ll delete mode 100644 test/CodeGen/AArch64/stackpointer.ll delete mode 100644 test/CodeGen/AArch64/tls-dynamic-together.ll delete mode 100644 test/CodeGen/AArch64/tls-dynamics.ll delete mode 100644 test/CodeGen/AArch64/tls-execs.ll delete mode 100644 test/CodeGen/AArch64/unaligned-vector-ld1-st1.ll delete mode 100644 test/CodeGen/AArch64/variadic.ll delete mode 100644 test/DebugInfo/AArch64/cfi-frame.ll delete mode 100644 test/DebugInfo/AArch64/variable-loc.ll delete mode 100644 test/MC/AArch64/elf-reloc-addend.s diff --git a/CMakeLists.txt b/CMakeLists.txt index 9ec3e33ad9fe..b19ab0271ab9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,7 +127,6 @@ set(LLVM_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/include) set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name (32/64)" ) set(LLVM_ALL_TARGETS - AArch64 ARM64 ARM CppBackend @@ -144,7 +143,7 @@ set(LLVM_ALL_TARGETS ) # List of targets with JIT support: -set(LLVM_TARGETS_WITH_JIT X86 PowerPC AArch64 ARM64 ARM Mips SystemZ) +set(LLVM_TARGETS_WITH_JIT X86 PowerPC ARM64 ARM Mips SystemZ) set(LLVM_TARGETS_TO_BUILD "all" CACHE STRING "Semicolon-separated list of targets to build, or \"all\".") diff --git a/autoconf/configure.ac b/autoconf/configure.ac index 6b9c17ae4054..344e66af65d7 100644 --- a/autoconf/configure.ac +++ b/autoconf/configure.ac @@ -421,7 +421,7 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch], powerpc*-*) llvm_cv_target_arch="PowerPC" ;; arm64*-*) llvm_cv_target_arch="ARM64" ;; arm*-*) llvm_cv_target_arch="ARM" ;; - aarch64*-*) llvm_cv_target_arch="AArch64" ;; + aarch64*-*) llvm_cv_target_arch="ARM64" ;; mips-* | mips64-*) llvm_cv_target_arch="Mips" ;; mipsel-* | mips64el-*) llvm_cv_target_arch="Mips" ;; xcore-*) llvm_cv_target_arch="XCore" ;; @@ -457,7 +457,7 @@ case $host in powerpc*-*) host_arch="PowerPC" ;; arm64*-*) host_arch="ARM64" ;; arm*-*) host_arch="ARM" ;; - aarch64*-*) host_arch="AArch64" ;; + aarch64*-*) host_arch="ARM64" ;; mips-* | mips64-*) host_arch="Mips" ;; mipsel-* | mips64el-*) host_arch="Mips" ;; xcore-*) host_arch="XCore" ;; @@ -786,7 +786,6 @@ else PowerPC) AC_SUBST(TARGET_HAS_JIT,1) ;; x86_64) AC_SUBST(TARGET_HAS_JIT,1) ;; ARM) AC_SUBST(TARGET_HAS_JIT,1) ;; - AArch64) AC_SUBST(TARGET_HAS_JIT,0) ;; Mips) AC_SUBST(TARGET_HAS_JIT,1) ;; XCore) AC_SUBST(TARGET_HAS_JIT,0) ;; MSP430) AC_SUBST(TARGET_HAS_JIT,0) ;; @@ -797,7 +796,7 @@ else esac fi -TARGETS_WITH_JIT="AArch64 ARM ARM64 Mips PowerPC SystemZ X86" +TARGETS_WITH_JIT="ARM ARM64 Mips PowerPC SystemZ X86" AC_SUBST(TARGETS_WITH_JIT,$TARGETS_WITH_JIT) dnl Allow enablement of building and installing docs @@ -950,7 +949,7 @@ if test "$llvm_cv_enable_crash_overrides" = "yes" ; then fi dnl List all possible targets -ALL_TARGETS="X86 Sparc PowerPC AArch64 ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600" +ALL_TARGETS="X86 Sparc PowerPC ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600" AC_SUBST(ALL_TARGETS,$ALL_TARGETS) dnl Allow specific targets to be specified for building (or not) @@ -971,7 +970,7 @@ case "$enableval" in x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;; powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;; - aarch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;; + aarch64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;; arm64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;; arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;; mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; @@ -990,7 +989,7 @@ case "$enableval" in x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; Sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;; PowerPC) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;; - AArch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;; + AArch64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;; ARM) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;; Mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; diff --git a/configure b/configure index 778aa189d575..a5babe9c2301 100755 --- a/configure +++ b/configure @@ -4153,7 +4153,7 @@ else powerpc*-*) llvm_cv_target_arch="PowerPC" ;; arm64*-*) llvm_cv_target_arch="ARM64" ;; arm*-*) llvm_cv_target_arch="ARM" ;; - aarch64*-*) llvm_cv_target_arch="AArch64" ;; + aarch64*-*) llvm_cv_target_arch="ARM64" ;; mips-* | mips64-*) llvm_cv_target_arch="Mips" ;; mipsel-* | mips64el-*) llvm_cv_target_arch="Mips" ;; xcore-*) llvm_cv_target_arch="XCore" ;; @@ -4190,7 +4190,7 @@ case $host in powerpc*-*) host_arch="PowerPC" ;; arm64*-*) host_arch="ARM64" ;; arm*-*) host_arch="ARM" ;; - aarch64*-*) host_arch="AArch64" ;; + aarch64*-*) host_arch="ARM64" ;; mips-* | mips64-*) host_arch="Mips" ;; mipsel-* | mips64el-*) host_arch="Mips" ;; xcore-*) host_arch="XCore" ;; @@ -5102,8 +5102,6 @@ else x86_64) TARGET_HAS_JIT=1 ;; ARM) TARGET_HAS_JIT=1 - ;; - AArch64) TARGET_HAS_JIT=0 ;; Mips) TARGET_HAS_JIT=1 ;; @@ -5122,7 +5120,7 @@ else esac fi -TARGETS_WITH_JIT="AArch64 ARM ARM64 Mips PowerPC SystemZ X86" +TARGETS_WITH_JIT="ARM ARM64 Mips PowerPC SystemZ X86" TARGETS_WITH_JIT=$TARGETS_WITH_JIT @@ -5359,7 +5357,7 @@ _ACEOF fi -ALL_TARGETS="X86 Sparc PowerPC AArch64 ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600" +ALL_TARGETS="X86 Sparc PowerPC ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600" ALL_TARGETS=$ALL_TARGETS @@ -5382,7 +5380,7 @@ case "$enableval" in x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;; powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;; - aarch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;; + aarch64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;; arm64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;; arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;; mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; @@ -5401,7 +5399,7 @@ case "$enableval" in x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; Sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;; PowerPC) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;; - AArch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;; + AArch64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;; ARM) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;; Mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index 2ba230a09789..b133b4e40962 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -534,7 +534,6 @@ include "llvm/IR/IntrinsicsPowerPC.td" include "llvm/IR/IntrinsicsX86.td" include "llvm/IR/IntrinsicsARM.td" include "llvm/IR/IntrinsicsARM64.td" -include "llvm/IR/IntrinsicsAArch64.td" include "llvm/IR/IntrinsicsXCore.td" include "llvm/IR/IntrinsicsHexagon.td" include "llvm/IR/IntrinsicsNVVM.td" diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td deleted file mode 100644 index 61c0e5d419f4..000000000000 --- a/include/llvm/IR/IntrinsicsAArch64.td +++ /dev/null @@ -1,407 +0,0 @@ -//===- IntrinsicsAArch64.td - Defines AArch64 intrinsics -----------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines all of the AArch64-specific intrinsics. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Advanced SIMD (NEON) - -let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". - -// Vector Absolute Compare (Floating Point) -def int_aarch64_neon_vacgeq : - Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; -def int_aarch64_neon_vacgtq : - Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - -// Vector saturating accumulate -def int_aarch64_neon_suqadd : Neon_2Arg_Intrinsic; -def int_aarch64_neon_usqadd : Neon_2Arg_Intrinsic; - -// Vector Bitwise reverse -def int_aarch64_neon_rbit : Neon_1Arg_Intrinsic; - -// Vector extract and narrow -def int_aarch64_neon_xtn : - Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; - -// Vector floating-point convert -def int_aarch64_neon_frintn : Neon_1Arg_Intrinsic; -def int_aarch64_neon_fsqrt : Neon_1Arg_Intrinsic; -def int_aarch64_neon_vcvtxn : - Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; -def int_aarch64_neon_vcvtzs : - Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; -def int_aarch64_neon_vcvtzu : - Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; - -// Vector maxNum (Floating Point) -def int_aarch64_neon_vmaxnm : Neon_2Arg_Intrinsic; - -// Vector minNum (Floating Point) -def int_aarch64_neon_vminnm : Neon_2Arg_Intrinsic; - -// Vector Pairwise maxNum (Floating Point) -def int_aarch64_neon_vpmaxnm : Neon_2Arg_Intrinsic; - -// Vector Pairwise minNum (Floating Point) -def int_aarch64_neon_vpminnm : Neon_2Arg_Intrinsic; - -// Vector Multiply Extended and Scalar Multiply Extended (Floating Point) -def int_aarch64_neon_vmulx : - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>; - -class Neon_N2V_Intrinsic - : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem]>; -class Neon_N3V_Intrinsic - : Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem]>; -class Neon_N2V_Narrow_Intrinsic - : Intrinsic<[llvm_anyvector_ty], - [LLVMExtendedType<0>, llvm_i32_ty], - [IntrNoMem]>; - -// Vector rounding shift right by immediate (Signed) -def int_aarch64_neon_vsrshr : Neon_N2V_Intrinsic; -def int_aarch64_neon_vurshr : Neon_N2V_Intrinsic; -def int_aarch64_neon_vsqshlu : Neon_N2V_Intrinsic; - -def int_aarch64_neon_vsri : Neon_N3V_Intrinsic; -def int_aarch64_neon_vsli : Neon_N3V_Intrinsic; - -def int_aarch64_neon_vsqshrun : Neon_N2V_Narrow_Intrinsic; -def int_aarch64_neon_vrshrn : Neon_N2V_Narrow_Intrinsic; -def int_aarch64_neon_vsqrshrun : Neon_N2V_Narrow_Intrinsic; -def int_aarch64_neon_vsqshrn : Neon_N2V_Narrow_Intrinsic; -def int_aarch64_neon_vuqshrn : Neon_N2V_Narrow_Intrinsic; -def int_aarch64_neon_vsqrshrn : Neon_N2V_Narrow_Intrinsic; -def int_aarch64_neon_vuqrshrn : Neon_N2V_Narrow_Intrinsic; - -// Vector across -class Neon_Across_Intrinsic - : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; - -def int_aarch64_neon_saddlv : Neon_Across_Intrinsic; -def int_aarch64_neon_uaddlv : Neon_Across_Intrinsic; -def int_aarch64_neon_smaxv : Neon_Across_Intrinsic; -def int_aarch64_neon_umaxv : Neon_Across_Intrinsic; -def int_aarch64_neon_sminv : Neon_Across_Intrinsic; -def int_aarch64_neon_uminv : Neon_Across_Intrinsic; -def int_aarch64_neon_vaddv : Neon_Across_Intrinsic; -def int_aarch64_neon_vmaxv : - Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], [IntrNoMem]>; -def int_aarch64_neon_vminv : - Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], [IntrNoMem]>; -def int_aarch64_neon_vmaxnmv : - Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], [IntrNoMem]>; -def int_aarch64_neon_vminnmv : - Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], [IntrNoMem]>; - -// Vector Table Lookup. -def int_aarch64_neon_vtbl1 : - Intrinsic<[llvm_anyvector_ty], - [llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>; - -def int_aarch64_neon_vtbl2 : - Intrinsic<[llvm_anyvector_ty], - [llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>], - [IntrNoMem]>; - -def int_aarch64_neon_vtbl3 : - Intrinsic<[llvm_anyvector_ty], - [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, - LLVMMatchType<0>], [IntrNoMem]>; - -def int_aarch64_neon_vtbl4 : - Intrinsic<[llvm_anyvector_ty], - [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, - llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>; - -// Vector Table Extension. -// Some elements of the destination vector may not be updated, so the original -// value of that vector is passed as the first argument. The next 1-4 -// arguments after that are the table. -def int_aarch64_neon_vtbx1 : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, llvm_v16i8_ty, LLVMMatchType<0>], - [IntrNoMem]>; - -def int_aarch64_neon_vtbx2 : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty, - LLVMMatchType<0>], [IntrNoMem]>; - -def int_aarch64_neon_vtbx3 : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty, - llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>; - -def int_aarch64_neon_vtbx4 : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty, - llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>], - [IntrNoMem]>; - -// Vector Load/store -def int_aarch64_neon_vld1x2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], - [llvm_ptr_ty, llvm_i32_ty], - [IntrReadArgMem]>; -def int_aarch64_neon_vld1x3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, - LLVMMatchType<0>], - [llvm_ptr_ty, llvm_i32_ty], - [IntrReadArgMem]>; -def int_aarch64_neon_vld1x4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, - LLVMMatchType<0>, LLVMMatchType<0>], - [llvm_ptr_ty, llvm_i32_ty], - [IntrReadArgMem]>; - -def int_aarch64_neon_vst1x2 : Intrinsic<[], - [llvm_ptr_ty, llvm_anyvector_ty, - LLVMMatchType<0>, llvm_i32_ty], - [IntrReadWriteArgMem]>; -def int_aarch64_neon_vst1x3 : Intrinsic<[], - [llvm_ptr_ty, llvm_anyvector_ty, - LLVMMatchType<0>, LLVMMatchType<0>, - llvm_i32_ty], [IntrReadWriteArgMem]>; -def int_aarch64_neon_vst1x4 : Intrinsic<[], - [llvm_ptr_ty, llvm_anyvector_ty, - LLVMMatchType<0>, LLVMMatchType<0>, - LLVMMatchType<0>, llvm_i32_ty], - [IntrReadWriteArgMem]>; - -// Scalar Add -def int_aarch64_neon_vaddds : - Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>; -def int_aarch64_neon_vadddu : - Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>; - - -// Scalar Sub -def int_aarch64_neon_vsubds : - Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>; -def int_aarch64_neon_vsubdu : - Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>; - - -// Scalar Shift -// Scalar Shift Left -def int_aarch64_neon_vshlds : - Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>; -def int_aarch64_neon_vshldu : - Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>; - -// Scalar Saturating Shift Left -def int_aarch64_neon_vqshls : Neon_2Arg_Intrinsic; -def int_aarch64_neon_vqshlu : Neon_2Arg_Intrinsic; - -// Scalar Shift Rouding Left -def int_aarch64_neon_vrshlds : - Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>; -def int_aarch64_neon_vrshldu : - Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>; - -// Scalar Saturating Rounding Shift Left -def int_aarch64_neon_vqrshls : Neon_2Arg_Intrinsic; -def int_aarch64_neon_vqrshlu : Neon_2Arg_Intrinsic; - -// Scalar Reduce Pairwise Add. -def int_aarch64_neon_vpadd : - Intrinsic<[llvm_v1i64_ty], [llvm_v2i64_ty],[IntrNoMem]>; -def int_aarch64_neon_vpfadd : - Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>; - -// Scalar Reduce Pairwise Floating Point Max/Min. -def int_aarch64_neon_vpmax : - Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>; -def int_aarch64_neon_vpmin : - Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>; - -// Scalar Reduce Pairwise Floating Point Maxnm/Minnm. -def int_aarch64_neon_vpfmaxnm : - Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>; -def int_aarch64_neon_vpfminnm : - Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>; - -// Scalar Signed Integer Convert To Floating-point -def int_aarch64_neon_vcvtint2fps : - Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>; - -// Scalar Unsigned Integer Convert To Floating-point -def int_aarch64_neon_vcvtint2fpu : - Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>; - -// Scalar Floating-point Convert -def int_aarch64_neon_fcvtxn : - Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>; -def int_aarch64_neon_fcvtns : - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>; -def int_aarch64_neon_fcvtnu : - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>; -def int_aarch64_neon_fcvtps : - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>; -def int_aarch64_neon_fcvtpu : - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>; -def int_aarch64_neon_fcvtms : - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>; -def int_aarch64_neon_fcvtmu : - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>; -def int_aarch64_neon_fcvtas : - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>; -def int_aarch64_neon_fcvtau : - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>; -def int_aarch64_neon_fcvtzs : - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>; -def int_aarch64_neon_fcvtzu : - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>; - -// Scalar Floating-point Reciprocal Estimate. -def int_aarch64_neon_vrecpe : - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; - -// Scalar Floating-point Reciprocal Exponent -def int_aarch64_neon_vrecpx : - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; - -// Scalar Floating-point Reciprocal Square Root Estimate -def int_aarch64_neon_vrsqrte : - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; - -// Scalar Floating-point Reciprocal Step -def int_aarch64_neon_vrecps : - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; - -// Scalar Floating-point Reciprocal Square Root Step -def int_aarch64_neon_vrsqrts : - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; - -// Compare with vector operands. -class Neon_Cmp_Intrinsic : - Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_anyvector_ty], - [IntrNoMem]>; - -// Floating-point compare with scalar operands. -class Neon_Float_Cmp_Intrinsic : - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty, llvm_anyfloat_ty], - [IntrNoMem]>; - -// Scalar Compare Equal -def int_aarch64_neon_vceq : Neon_Cmp_Intrinsic; -def int_aarch64_neon_fceq : Neon_Float_Cmp_Intrinsic; - -// Scalar Compare Greater-Than or Equal -def int_aarch64_neon_vcge : Neon_Cmp_Intrinsic; -def int_aarch64_neon_vchs : Neon_Cmp_Intrinsic; -def int_aarch64_neon_fcge : Neon_Float_Cmp_Intrinsic; -def int_aarch64_neon_fchs : Neon_Float_Cmp_Intrinsic; - -// Scalar Compare Less-Than or Equal -def int_aarch64_neon_vclez : Neon_Cmp_Intrinsic; -def int_aarch64_neon_fclez : Neon_Float_Cmp_Intrinsic; - -// Scalar Compare Less-Than -def int_aarch64_neon_vcltz : Neon_Cmp_Intrinsic; -def int_aarch64_neon_fcltz : Neon_Float_Cmp_Intrinsic; - -// Scalar Compare Greater-Than -def int_aarch64_neon_vcgt : Neon_Cmp_Intrinsic; -def int_aarch64_neon_vchi : Neon_Cmp_Intrinsic; -def int_aarch64_neon_fcgt : Neon_Float_Cmp_Intrinsic; -def int_aarch64_neon_fchi : Neon_Float_Cmp_Intrinsic; - -// Scalar Compare Bitwise Test Bits -def int_aarch64_neon_vtstd : Neon_Cmp_Intrinsic; - -// Scalar Floating-point Absolute Compare Greater Than Or Equal -def int_aarch64_neon_vcage : Neon_Cmp_Intrinsic; -def int_aarch64_neon_fcage : Neon_Float_Cmp_Intrinsic; - -// Scalar Floating-point Absolute Compare Greater Than -def int_aarch64_neon_vcagt : Neon_Cmp_Intrinsic; -def int_aarch64_neon_fcagt : Neon_Float_Cmp_Intrinsic; - -// Scalar Signed Saturating Accumulated of Unsigned Value -def int_aarch64_neon_vuqadd : Neon_2Arg_Intrinsic; - -// Scalar Unsigned Saturating Accumulated of Signed Value -def int_aarch64_neon_vsqadd : Neon_2Arg_Intrinsic; - -// Scalar Absolute Value -def int_aarch64_neon_vabs : - Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty], [IntrNoMem]>; - -// Scalar Absolute Difference -def int_aarch64_neon_vabd : - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; - -// Scalar Negate Value -def int_aarch64_neon_vneg : - Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty], [IntrNoMem]>; - -// Signed Saturating Doubling Multiply-Add Long -def int_aarch64_neon_vqdmlal : Neon_3Arg_Long_Intrinsic; - -// Signed Saturating Doubling Multiply-Subtract Long -def int_aarch64_neon_vqdmlsl : Neon_3Arg_Long_Intrinsic; - -def int_aarch64_neon_vmull_p64 : - Intrinsic<[llvm_v16i8_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>; - -class Neon_2Arg_ShiftImm_Intrinsic - : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_i32_ty], [IntrNoMem]>; - -class Neon_3Arg_ShiftImm_Intrinsic - : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty, llvm_i32_ty], - [IntrNoMem]>; - -// Scalar Shift Right (Immediate) -def int_aarch64_neon_vshrds_n : Neon_2Arg_ShiftImm_Intrinsic; -def int_aarch64_neon_vshrdu_n : Neon_2Arg_ShiftImm_Intrinsic; - -// Scalar Shift Right and Accumulate (Immediate) -def int_aarch64_neon_vsrads_n : Neon_3Arg_ShiftImm_Intrinsic; -def int_aarch64_neon_vsradu_n : Neon_3Arg_ShiftImm_Intrinsic; - -// Scalar Rounding Shift Right and Accumulate (Immediate) -def int_aarch64_neon_vrsrads_n : Neon_3Arg_ShiftImm_Intrinsic; -def int_aarch64_neon_vrsradu_n : Neon_3Arg_ShiftImm_Intrinsic; - -// Scalar Shift Left (Immediate) -def int_aarch64_neon_vshld_n : Neon_2Arg_ShiftImm_Intrinsic; - -// Scalar Saturating Shift Left (Immediate) -def int_aarch64_neon_vqshls_n : Neon_N2V_Intrinsic; -def int_aarch64_neon_vqshlu_n : Neon_N2V_Intrinsic; - -// Scalar Signed Saturating Shift Left Unsigned (Immediate) -def int_aarch64_neon_vqshlus_n : Neon_N2V_Intrinsic; - -// Scalar Signed Fixed-point Convert To Floating-Point (Immediate) -def int_aarch64_neon_vcvtfxs2fp_n : - Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty, llvm_i32_ty], [IntrNoMem]>; - -// Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate) -def int_aarch64_neon_vcvtfxu2fp_n : - Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty, llvm_i32_ty], [IntrNoMem]>; - -// Scalar Floating-point Convert To Signed Fixed-point (Immediate) -def int_aarch64_neon_vcvtfp2fxs_n : - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>; - -// Scalar Floating-point Convert To Unsigned Fixed-point (Immediate) -def int_aarch64_neon_vcvtfp2fxu_n : - Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>; - -} diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h deleted file mode 100644 index 0297de120cb8..000000000000 --- a/lib/Target/AArch64/AArch64.h +++ /dev/null @@ -1,46 +0,0 @@ -//==-- AArch64.h - Top-level interface for AArch64 representation -*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the entry points for global functions defined in the LLVM -// AArch64 back-end. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_AARCH64_H -#define LLVM_TARGET_AARCH64_H - -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { - -class AArch64AsmPrinter; -class FunctionPass; -class AArch64TargetMachine; -class MachineInstr; -class MCInst; - -FunctionPass *createAArch64ISelDAG(AArch64TargetMachine &TM, - CodeGenOpt::Level OptLevel); - -FunctionPass *createAArch64CleanupLocalDynamicTLSPass(); - -FunctionPass *createAArch64BranchFixupPass(); - -/// \brief Creates an AArch64-specific Target Transformation Info pass. -ImmutablePass *createAArch64TargetTransformInfoPass( - const AArch64TargetMachine *TM); - -void LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, - AArch64AsmPrinter &AP); - - -} - -#endif diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td deleted file mode 100644 index e49afd60c8e3..000000000000 --- a/lib/Target/AArch64/AArch64.td +++ /dev/null @@ -1,83 +0,0 @@ -//===- AArch64.td - Describe the AArch64 Target Machine -------*- tblgen -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the top level entry point for the AArch64 target. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Target-independent interfaces -//===----------------------------------------------------------------------===// - -include "llvm/Target/Target.td" - -//===----------------------------------------------------------------------===// -// AArch64 Subtarget features. -// - -def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true", - "Enable ARMv8 FP">; - -def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", - "Enable Advanced SIMD instructions", [FeatureFPARMv8]>; - -def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", - "Enable cryptographic instructions">; - -//===----------------------------------------------------------------------===// -// AArch64 Processors -// - -include "AArch64Schedule.td" - -class ProcNoItin Features> - : Processor; - -def : Processor<"generic", GenericItineraries, [FeatureFPARMv8, FeatureNEON]>; - -def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", - "Cortex-A53 ARM processors", - [FeatureFPARMv8, - FeatureNEON, - FeatureCrypto]>; - -def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", - "Cortex-A57 ARM processors", - [FeatureFPARMv8, - FeatureNEON, - FeatureCrypto]>; - -def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>; -def : Processor<"cortex-a57", NoItineraries, [ProcA57]>; - -//===----------------------------------------------------------------------===// -// Register File Description -//===----------------------------------------------------------------------===// - -include "AArch64RegisterInfo.td" - -include "AArch64CallingConv.td" - -//===----------------------------------------------------------------------===// -// Instruction Descriptions -//===----------------------------------------------------------------------===// - -include "AArch64InstrInfo.td" - -def AArch64InstrInfo : InstrInfo { - let noNamedPositionallyEncodedOperands = 1; -} - -//===----------------------------------------------------------------------===// -// Declare the target which we are implementing -//===----------------------------------------------------------------------===// - -def AArch64 : Target { - let InstructionSet = AArch64InstrInfo; -} diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp deleted file mode 100644 index 5b5148351fac..000000000000 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ /dev/null @@ -1,303 +0,0 @@ -//===-- AArch64AsmPrinter.cpp - Print machine code to an AArch64 .s file --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to GAS-format AArch64 assembly language. -// -//===----------------------------------------------------------------------===// - -#include "AArch64AsmPrinter.h" -#include "InstPrinter/AArch64InstPrinter.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/CodeGen/MachineModuleInfoImpls.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/IR/DebugInfo.h" -#include "llvm/IR/Mangler.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Support/TargetRegistry.h" - -using namespace llvm; - -#define DEBUG_TYPE "asm-printer" - -/// Try to print a floating-point register as if it belonged to a specified -/// register-class. For example the inline asm operand modifier "b" requires its -/// argument to be printed as "bN". -static bool printModifiedFPRAsmOperand(const MachineOperand &MO, - const TargetRegisterInfo *TRI, - char RegType, raw_ostream &O) { - if (!MO.isReg()) - return true; - - for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) { - if (AArch64::FPR8RegClass.contains(*AR)) { - O << RegType << TRI->getEncodingValue(MO.getReg()); - return false; - } - } - - // The register doesn't correspond to anything floating-point like. - return true; -} - -/// Implements the 'w' and 'x' inline asm operand modifiers, which print a GPR -/// with the obvious type and an immediate 0 as either wzr or xzr. -static bool printModifiedGPRAsmOperand(const MachineOperand &MO, - const TargetRegisterInfo *TRI, - const TargetRegisterClass &RegClass, - raw_ostream &O) { - char Prefix = &RegClass == &AArch64::GPR32RegClass ? 'w' : 'x'; - - if (MO.isImm() && MO.getImm() == 0) { - O << Prefix << "zr"; - return false; - } else if (MO.isReg()) { - if (MO.getReg() == AArch64::XSP || MO.getReg() == AArch64::WSP) { - O << (Prefix == 'x' ? "sp" : "wsp"); - return false; - } - - for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) { - if (RegClass.contains(*AR)) { - O << AArch64InstPrinter::getRegisterName(*AR); - return false; - } - } - } - - return true; -} - -bool AArch64AsmPrinter::printSymbolicAddress(const MachineOperand &MO, - bool PrintImmediatePrefix, - StringRef Suffix, raw_ostream &O) { - StringRef Name; - StringRef Modifier; - switch (MO.getType()) { - default: - return true; - case MachineOperand::MO_GlobalAddress: - Name = getSymbol(MO.getGlobal())->getName(); - - // Global variables may be accessed either via a GOT or in various fun and - // interesting TLS-model specific ways. Set the prefix modifier as - // appropriate here. - if (const GlobalVariable *GV = dyn_cast(MO.getGlobal())) { - Reloc::Model RelocM = TM.getRelocationModel(); - if (GV->isThreadLocal()) { - switch (TM.getTLSModel(GV)) { - case TLSModel::GeneralDynamic: - Modifier = "tlsdesc"; - break; - case TLSModel::LocalDynamic: - Modifier = "dtprel"; - break; - case TLSModel::InitialExec: - Modifier = "gottprel"; - break; - case TLSModel::LocalExec: - Modifier = "tprel"; - break; - } - } else if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) { - Modifier = "got"; - } - } - break; - case MachineOperand::MO_BlockAddress: - Name = GetBlockAddressSymbol(MO.getBlockAddress())->getName(); - break; - case MachineOperand::MO_ConstantPoolIndex: - Name = GetCPISymbol(MO.getIndex())->getName(); - break; - } - - // Some instructions (notably ADRP) don't take the # prefix for - // immediates. Only print it if asked to. - if (PrintImmediatePrefix) - O << '#'; - - // Only need the joining "_" if both the prefix and the suffix are - // non-null. This little block simply takes care of the four possibly - // combinations involved there. - if (Modifier == "" && Suffix == "") - O << Name; - else if (Modifier == "" && Suffix != "") - O << ":" << Suffix << ':' << Name; - else if (Modifier != "" && Suffix == "") - O << ":" << Modifier << ':' << Name; - else - O << ":" << Modifier << '_' << Suffix << ':' << Name; - - return false; -} - -bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, - unsigned AsmVariant, - const char *ExtraCode, raw_ostream &O) { - const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); - - if (!ExtraCode) - ExtraCode = ""; - - switch(ExtraCode[0]) { - default: - if (!AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O)) - return false; - break; - case 'w': - // Output 32-bit general register operand, constant zero as wzr, or stack - // pointer as wsp. Ignored when used with other operand types. - if (!printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI, - AArch64::GPR32RegClass, O)) - return false; - break; - case 'x': - // Output 64-bit general register operand, constant zero as xzr, or stack - // pointer as sp. Ignored when used with other operand types. - if (!printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI, - AArch64::GPR64RegClass, O)) - return false; - break; - case 'H': - // Output higher numbered of a 64-bit general register pair - case 'Q': - // Output least significant register of a 64-bit general register pair - case 'R': - // Output most significant register of a 64-bit general register pair - - // FIXME note: these three operand modifiers will require, to some extent, - // adding a paired GPR64 register class. Initial investigation suggests that - // assertions are hit unless it has a type and is made legal for that type - // in ISelLowering. After that step is made, the number of modifications - // needed explodes (operation legality, calling conventions, stores, reg - // copies ...). - llvm_unreachable("FIXME: Unimplemented register pairs"); - case 'b': - case 'h': - case 's': - case 'd': - case 'q': - if (!printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, - ExtraCode[0], O)) - return false; - break; - case 'A': - // Output symbolic address with appropriate relocation modifier (also - // suitable for ADRP). - if (!printSymbolicAddress(MI->getOperand(OpNum), false, "", O)) - return false; - break; - case 'L': - // Output bits 11:0 of symbolic address with appropriate :lo12: relocation - // modifier. - if (!printSymbolicAddress(MI->getOperand(OpNum), true, "lo12", O)) - return false; - break; - case 'G': - // Output bits 23:12 of symbolic address with appropriate :hi12: relocation - // modifier (currently only for TLS local exec). - if (!printSymbolicAddress(MI->getOperand(OpNum), true, "hi12", O)) - return false; - break; - case 'a': - return PrintAsmMemoryOperand(MI, OpNum, AsmVariant, ExtraCode, O); - } - - // There's actually no operand modifier, which leads to a slightly eclectic - // set of behaviour which we have to handle here. - const MachineOperand &MO = MI->getOperand(OpNum); - switch (MO.getType()) { - default: - llvm_unreachable("Unexpected operand for inline assembly"); - case MachineOperand::MO_Register: - // GCC prints the unmodified operand of a 'w' constraint as the vector - // register. Technically, we could allocate the argument as a VPR128, but - // that leads to extremely dodgy copies being generated to get the data - // there. - if (printModifiedFPRAsmOperand(MO, TRI, 'v', O)) - O << AArch64InstPrinter::getRegisterName(MO.getReg()); - break; - case MachineOperand::MO_Immediate: - O << '#' << MO.getImm(); - break; - case MachineOperand::MO_FPImmediate: - assert(MO.getFPImm()->isExactlyValue(0.0) && "Only FP 0.0 expected"); - O << "#0.0"; - break; - case MachineOperand::MO_BlockAddress: - case MachineOperand::MO_ConstantPoolIndex: - case MachineOperand::MO_GlobalAddress: - return printSymbolicAddress(MO, false, "", O); - } - - return false; -} - -bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, - unsigned OpNum, - unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { - // Currently both the memory constraints (m and Q) behave the same and amount - // to the address as a single register. In future, we may allow "m" to provide - // both a base and an offset. - const MachineOperand &MO = MI->getOperand(OpNum); - assert(MO.isReg() && "unexpected inline assembly memory operand"); - O << '[' << AArch64InstPrinter::getRegisterName(MO.getReg()) << ']'; - return false; -} - -#include "AArch64GenMCPseudoLowering.inc" - -void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { - // Do any auto-generated pseudo lowerings. - if (emitPseudoExpansionLowering(OutStreamer, MI)) - return; - - MCInst TmpInst; - LowerAArch64MachineInstrToMCInst(MI, TmpInst, *this); - EmitToStreamer(OutStreamer, TmpInst); -} - -void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) { - if (Subtarget->isTargetELF()) { - const TargetLoweringObjectFileELF &TLOFELF = - static_cast(getObjFileLowering()); - - MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo(); - - // Output stubs for external and common global variables. - MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); - if (!Stubs.empty()) { - OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); - const DataLayout *TD = TM.getDataLayout(); - - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - OutStreamer.EmitLabel(Stubs[i].first); - OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), - TD->getPointerSize(0)); - } - Stubs.clear(); - } - } -} - -bool AArch64AsmPrinter::runOnMachineFunction(MachineFunction &MF) { - return AsmPrinter::runOnMachineFunction(MF); -} - -// Force static initialization. -extern "C" void LLVMInitializeAArch64AsmPrinter() { - RegisterAsmPrinter X(TheAArch64leTarget); - RegisterAsmPrinter Y(TheAArch64beTarget); -} - diff --git a/lib/Target/AArch64/AArch64AsmPrinter.h b/lib/Target/AArch64/AArch64AsmPrinter.h deleted file mode 100644 index f77553c7b8b6..000000000000 --- a/lib/Target/AArch64/AArch64AsmPrinter.h +++ /dev/null @@ -1,76 +0,0 @@ -// AArch64AsmPrinter.h - Print machine code to an AArch64 .s file -*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the AArch64 assembly printer class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64ASMPRINTER_H -#define LLVM_AARCH64ASMPRINTER_H - -#include "AArch64.h" -#include "AArch64TargetMachine.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/Support/Compiler.h" - -namespace llvm { - -class MCOperand; - -class LLVM_LIBRARY_VISIBILITY AArch64AsmPrinter : public AsmPrinter { - - /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can - /// make the right decision when printing asm code for different targets. - const AArch64Subtarget *Subtarget; - - // emitPseudoExpansionLowering - tblgen'erated. - bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, - const MachineInstr *MI); - - public: - explicit AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) { - Subtarget = &TM.getSubtarget(); - } - - bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const; - - MCOperand lowerSymbolOperand(const MachineOperand &MO, - const MCSymbol *Sym) const; - - void EmitInstruction(const MachineInstr *MI) override; - void EmitEndOfAsmFile(Module &M) override; - - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O) override; - bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O) override; - - /// printSymbolicAddress - Given some kind of reasonably bare symbolic - /// reference, print out the appropriate asm string to represent it. If - /// appropriate, a relocation-specifier will be produced, composed of a - /// general class derived from the MO parameter and an instruction-specific - /// suffix, provided in Suffix. E.g. ":got_lo12:" if a Suffix of "lo12" is - /// given. - bool printSymbolicAddress(const MachineOperand &MO, - bool PrintImmediatePrefix, - StringRef Suffix, raw_ostream &O); - - const char *getPassName() const override { - return "AArch64 Assembly Printer"; - } - - bool runOnMachineFunction(MachineFunction &MF) override; -}; -} // end namespace llvm - -#endif diff --git a/lib/Target/AArch64/AArch64BranchFixupPass.cpp b/lib/Target/AArch64/AArch64BranchFixupPass.cpp deleted file mode 100644 index 585cbee9966b..000000000000 --- a/lib/Target/AArch64/AArch64BranchFixupPass.cpp +++ /dev/null @@ -1,601 +0,0 @@ -//===-- AArch64BranchFixupPass.cpp - AArch64 branch fixup -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a pass that fixes AArch64 branches which have ended up out -// of range for their immediate operands. -// -//===----------------------------------------------------------------------===// - -#include "AArch64.h" -#include "AArch64InstrInfo.h" -#include "Utils/AArch64BaseInfo.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -#define DEBUG_TYPE "aarch64-branch-fixup" - -STATISTIC(NumSplit, "Number of uncond branches inserted"); -STATISTIC(NumCBrFixed, "Number of cond branches fixed"); - -/// Return the worst case padding that could result from unknown offset bits. -/// This does not include alignment padding caused by known offset bits. -/// -/// @param LogAlign log2(alignment) -/// @param KnownBits Number of known low offset bits. -static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) { - if (KnownBits < LogAlign) - return (1u << LogAlign) - (1u << KnownBits); - return 0; -} - -namespace { - /// Due to limited PC-relative displacements, conditional branches to distant - /// blocks may need converting into an unconditional equivalent. For example: - /// tbz w1, #0, far_away - /// becomes - /// tbnz w1, #0, skip - /// b far_away - /// skip: - class AArch64BranchFixup : public MachineFunctionPass { - /// Information about the offset and size of a single basic block. - struct BasicBlockInfo { - /// Distance from the beginning of the function to the beginning of this - /// basic block. - /// - /// Offsets are computed assuming worst case padding before an aligned - /// block. This means that subtracting basic block offsets always gives a - /// conservative estimate of the real distance which may be smaller. - /// - /// Because worst case padding is used, the computed offset of an aligned - /// block may not actually be aligned. - unsigned Offset; - - /// Size of the basic block in bytes. If the block contains inline - /// assembly, this is a worst case estimate. - /// - /// The size does not include any alignment padding whether from the - /// beginning of the block, or from an aligned jump table at the end. - unsigned Size; - - /// The number of low bits in Offset that are known to be exact. The - /// remaining bits of Offset are an upper bound. - uint8_t KnownBits; - - /// When non-zero, the block contains instructions (inline asm) of unknown - /// size. The real size may be smaller than Size bytes by a multiple of 1 - /// << Unalign. - uint8_t Unalign; - - BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0) {} - - /// Compute the number of known offset bits internally to this block. - /// This number should be used to predict worst case padding when - /// splitting the block. - unsigned internalKnownBits() const { - unsigned Bits = Unalign ? Unalign : KnownBits; - // If the block size isn't a multiple of the known bits, assume the - // worst case padding. - if (Size & ((1u << Bits) - 1)) - Bits = countTrailingZeros(Size); - return Bits; - } - - /// Compute the offset immediately following this block. If LogAlign is - /// specified, return the offset the successor block will get if it has - /// this alignment. - unsigned postOffset(unsigned LogAlign = 0) const { - unsigned PO = Offset + Size; - if (!LogAlign) - return PO; - // Add alignment padding from the terminator. - return PO + UnknownPadding(LogAlign, internalKnownBits()); - } - - /// Compute the number of known low bits of postOffset. If this block - /// contains inline asm, the number of known bits drops to the - /// instruction alignment. An aligned terminator may increase the number - /// of know bits. - /// If LogAlign is given, also consider the alignment of the next block. - unsigned postKnownBits(unsigned LogAlign = 0) const { - return std::max(LogAlign, internalKnownBits()); - } - }; - - std::vector BBInfo; - - /// One per immediate branch, keeping the machine instruction pointer, - /// conditional or unconditional, the max displacement, and (if IsCond is - /// true) the corresponding inverted branch opcode. - struct ImmBranch { - MachineInstr *MI; - unsigned OffsetBits : 31; - bool IsCond : 1; - ImmBranch(MachineInstr *mi, unsigned offsetbits, bool cond) - : MI(mi), OffsetBits(offsetbits), IsCond(cond) {} - }; - - /// Keep track of all the immediate branch instructions. - /// - std::vector ImmBranches; - - MachineFunction *MF; - const AArch64InstrInfo *TII; - public: - static char ID; - AArch64BranchFixup() : MachineFunctionPass(ID) {} - - bool runOnMachineFunction(MachineFunction &MF) override; - - const char *getPassName() const override { - return "AArch64 branch fixup pass"; - } - - private: - void initializeFunctionInfo(); - MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI); - void adjustBBOffsetsAfter(MachineBasicBlock *BB); - bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, - unsigned OffsetBits); - bool fixupImmediateBr(ImmBranch &Br); - bool fixupConditionalBr(ImmBranch &Br); - - void computeBlockSize(MachineBasicBlock *MBB); - unsigned getOffsetOf(MachineInstr *MI) const; - void dumpBBs(); - void verify(); - }; - char AArch64BranchFixup::ID = 0; -} - -/// check BBOffsets -void AArch64BranchFixup::verify() { -#ifndef NDEBUG - for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); - MBBI != E; ++MBBI) { - MachineBasicBlock *MBB = MBBI; - unsigned MBBId = MBB->getNumber(); - assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset); - } -#endif -} - -/// print block size and offset information - debugging -void AArch64BranchFixup::dumpBBs() { - DEBUG({ - for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) { - const BasicBlockInfo &BBI = BBInfo[J]; - dbgs() << format("%08x BB#%u\t", BBI.Offset, J) - << " kb=" << unsigned(BBI.KnownBits) - << " ua=" << unsigned(BBI.Unalign) - << format(" size=%#x\n", BBInfo[J].Size); - } - }); -} - -/// Returns an instance of the branch fixup pass. -FunctionPass *llvm::createAArch64BranchFixupPass() { - return new AArch64BranchFixup(); -} - -bool AArch64BranchFixup::runOnMachineFunction(MachineFunction &mf) { - MF = &mf; - DEBUG(dbgs() << "***** AArch64BranchFixup ******"); - TII = (const AArch64InstrInfo*)MF->getTarget().getInstrInfo(); - - // This pass invalidates liveness information when it splits basic blocks. - MF->getRegInfo().invalidateLiveness(); - - // Renumber all of the machine basic blocks in the function, guaranteeing that - // the numbers agree with the position of the block in the function. - MF->RenumberBlocks(); - - // Do the initial scan of the function, building up information about the - // sizes of each block and location of each immediate branch. - initializeFunctionInfo(); - - // Iteratively fix up branches until there is no change. - unsigned NoBRIters = 0; - bool MadeChange = false; - while (true) { - DEBUG(dbgs() << "Beginning iteration #" << NoBRIters << '\n'); - bool BRChange = false; - for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) - BRChange |= fixupImmediateBr(ImmBranches[i]); - if (BRChange && ++NoBRIters > 30) - report_fatal_error("Branch Fix Up pass failed to converge!"); - DEBUG(dumpBBs()); - - if (!BRChange) - break; - MadeChange = true; - } - - // After a while, this might be made debug-only, but it is not expensive. - verify(); - - DEBUG(dbgs() << '\n'; dumpBBs()); - - BBInfo.clear(); - ImmBranches.clear(); - - return MadeChange; -} - -/// Return true if the specified basic block can fallthrough into the block -/// immediately after it. -static bool BBHasFallthrough(MachineBasicBlock *MBB) { - // Get the next machine basic block in the function. - MachineFunction::iterator MBBI = MBB; - // Can't fall off end of function. - if (std::next(MBBI) == MBB->getParent()->end()) - return false; - - MachineBasicBlock *NextBB = std::next(MBBI); - for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), - E = MBB->succ_end(); I != E; ++I) - if (*I == NextBB) - return true; - - return false; -} - -/// Do the initial scan of the function, building up information about the sizes -/// of each block, and each immediate branch. -void AArch64BranchFixup::initializeFunctionInfo() { - BBInfo.clear(); - BBInfo.resize(MF->getNumBlockIDs()); - - // First thing, compute the size of all basic blocks, and see if the function - // has any inline assembly in it. If so, we have to be conservative about - // alignment assumptions, as we don't know for sure the size of any - // instructions in the inline assembly. - for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) - computeBlockSize(I); - - // The known bits of the entry block offset are determined by the function - // alignment. - BBInfo.front().KnownBits = MF->getAlignment(); - - // Compute block offsets and known bits. - adjustBBOffsetsAfter(MF->begin()); - - // Now go back through the instructions and build up our data structures. - for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); - MBBI != E; ++MBBI) { - MachineBasicBlock &MBB = *MBBI; - - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) { - if (I->isDebugValue()) - continue; - - int Opc = I->getOpcode(); - if (I->isBranch()) { - bool IsCond = false; - - // The offsets encoded in instructions here scale by the instruction - // size (4 bytes), effectively increasing their range by 2 bits. - unsigned Bits = 0; - switch (Opc) { - default: - continue; // Ignore other JT branches - case AArch64::TBZxii: - case AArch64::TBZwii: - case AArch64::TBNZxii: - case AArch64::TBNZwii: - IsCond = true; - Bits = 14 + 2; - break; - case AArch64::Bcc: - case AArch64::CBZx: - case AArch64::CBZw: - case AArch64::CBNZx: - case AArch64::CBNZw: - IsCond = true; - Bits = 19 + 2; - break; - case AArch64::Bimm: - Bits = 26 + 2; - break; - } - - // Record this immediate branch. - ImmBranches.push_back(ImmBranch(I, Bits, IsCond)); - } - } - } -} - -/// Compute the size and some alignment information for MBB. This function -/// updates BBInfo directly. -void AArch64BranchFixup::computeBlockSize(MachineBasicBlock *MBB) { - BasicBlockInfo &BBI = BBInfo[MBB->getNumber()]; - BBI.Size = 0; - BBI.Unalign = 0; - - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; - ++I) { - BBI.Size += TII->getInstSizeInBytes(*I); - // For inline asm, GetInstSizeInBytes returns a conservative estimate. - // The actual size may be smaller, but still a multiple of the instr size. - if (I->isInlineAsm()) - BBI.Unalign = 2; - } -} - -/// Return the current offset of the specified machine instruction from the -/// start of the function. This offset changes as stuff is moved around inside -/// the function. -unsigned AArch64BranchFixup::getOffsetOf(MachineInstr *MI) const { - MachineBasicBlock *MBB = MI->getParent(); - - // The offset is composed of two things: the sum of the sizes of all MBB's - // before this instruction's block, and the offset from the start of the block - // it is in. - unsigned Offset = BBInfo[MBB->getNumber()].Offset; - - // Sum instructions before MI in MBB. - for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) { - assert(I != MBB->end() && "Didn't find MI in its own basic block?"); - Offset += TII->getInstSizeInBytes(*I); - } - return Offset; -} - -/// Split the basic block containing MI into two blocks, which are joined by -/// an unconditional branch. Update data structures and renumber blocks to -/// account for this change and returns the newly created block. -MachineBasicBlock * -AArch64BranchFixup::splitBlockBeforeInstr(MachineInstr *MI) { - MachineBasicBlock *OrigBB = MI->getParent(); - - // Create a new MBB for the code after the OrigBB. - MachineBasicBlock *NewBB = - MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); - MachineFunction::iterator MBBI = OrigBB; ++MBBI; - MF->insert(MBBI, NewBB); - - // Splice the instructions starting with MI over to NewBB. - NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end()); - - // Add an unconditional branch from OrigBB to NewBB. - // Note the new unconditional branch is not being recorded. - // There doesn't seem to be meaningful DebugInfo available; this doesn't - // correspond to anything in the source. - BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewBB); - ++NumSplit; - - // Update the CFG. All succs of OrigBB are now succs of NewBB. - NewBB->transferSuccessors(OrigBB); - - // OrigBB branches to NewBB. - OrigBB->addSuccessor(NewBB); - - // Update internal data structures to account for the newly inserted MBB. - MF->RenumberBlocks(NewBB); - - // Insert an entry into BBInfo to align it properly with the (newly - // renumbered) block numbers. - BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); - - // Figure out how large the OrigBB is. As the first half of the original - // block, it cannot contain a tablejump. The size includes - // the new jump we added. (It should be possible to do this without - // recounting everything, but it's very confusing, and this is rarely - // executed.) - computeBlockSize(OrigBB); - - // Figure out how large the NewMBB is. As the second half of the original - // block, it may contain a tablejump. - computeBlockSize(NewBB); - - // All BBOffsets following these blocks must be modified. - adjustBBOffsetsAfter(OrigBB); - - return NewBB; -} - -void AArch64BranchFixup::adjustBBOffsetsAfter(MachineBasicBlock *BB) { - unsigned BBNum = BB->getNumber(); - for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) { - // Get the offset and known bits at the end of the layout predecessor. - // Include the alignment of the current block. - unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment(); - unsigned Offset = BBInfo[i - 1].postOffset(LogAlign); - unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign); - - // This is where block i begins. Stop if the offset is already correct, - // and we have updated 2 blocks. This is the maximum number of blocks - // changed before calling this function. - if (i > BBNum + 2 && - BBInfo[i].Offset == Offset && - BBInfo[i].KnownBits == KnownBits) - break; - - BBInfo[i].Offset = Offset; - BBInfo[i].KnownBits = KnownBits; - } -} - -/// Returns true if the distance between specific MI and specific BB can fit in -/// MI's displacement field. -bool AArch64BranchFixup::isBBInRange(MachineInstr *MI, - MachineBasicBlock *DestBB, - unsigned OffsetBits) { - int64_t BrOffset = getOffsetOf(MI); - int64_t DestOffset = BBInfo[DestBB->getNumber()].Offset; - - DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber() - << " from BB#" << MI->getParent()->getNumber() - << " bits available=" << OffsetBits - << " from " << getOffsetOf(MI) << " to " << DestOffset - << " offset " << int(DestOffset-BrOffset) << "\t" << *MI); - - return isIntN(OffsetBits, DestOffset - BrOffset); -} - -/// Fix up an immediate branch whose destination is too far away to fit in its -/// displacement field. -bool AArch64BranchFixup::fixupImmediateBr(ImmBranch &Br) { - MachineInstr *MI = Br.MI; - MachineBasicBlock *DestBB = nullptr; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - if (MI->getOperand(i).isMBB()) { - DestBB = MI->getOperand(i).getMBB(); - break; - } - } - assert(DestBB && "Branch with no destination BB?"); - - // Check to see if the DestBB is already in-range. - if (isBBInRange(MI, DestBB, Br.OffsetBits)) - return false; - - assert(Br.IsCond && "Only conditional branches should need fixup"); - return fixupConditionalBr(Br); -} - -/// Fix up a conditional branch whose destination is too far away to fit in its -/// displacement field. It is converted to an inverse conditional branch + an -/// unconditional branch to the destination. -bool -AArch64BranchFixup::fixupConditionalBr(ImmBranch &Br) { - MachineInstr *MI = Br.MI; - MachineBasicBlock *MBB = MI->getParent(); - unsigned CondBrMBBOperand = 0; - - // The general idea is to add an unconditional branch to the destination and - // invert the conditional branch to jump over it. Complications occur around - // fallthrough and unreachable ends to the block. - // b.lt L1 - // => - // b.ge L2 - // b L1 - // L2: - - // First we invert the conditional branch, by creating a replacement if - // necessary. This if statement contains all the special handling of different - // branch types. - if (MI->getOpcode() == AArch64::Bcc) { - // The basic block is operand number 1 for Bcc - CondBrMBBOperand = 1; - - A64CC::CondCodes CC = (A64CC::CondCodes)MI->getOperand(0).getImm(); - CC = A64InvertCondCode(CC); - MI->getOperand(0).setImm(CC); - } else { - MachineInstrBuilder InvertedMI; - int InvertedOpcode; - switch (MI->getOpcode()) { - default: llvm_unreachable("Unknown branch type"); - case AArch64::TBZxii: InvertedOpcode = AArch64::TBNZxii; break; - case AArch64::TBZwii: InvertedOpcode = AArch64::TBNZwii; break; - case AArch64::TBNZxii: InvertedOpcode = AArch64::TBZxii; break; - case AArch64::TBNZwii: InvertedOpcode = AArch64::TBZwii; break; - case AArch64::CBZx: InvertedOpcode = AArch64::CBNZx; break; - case AArch64::CBZw: InvertedOpcode = AArch64::CBNZw; break; - case AArch64::CBNZx: InvertedOpcode = AArch64::CBZx; break; - case AArch64::CBNZw: InvertedOpcode = AArch64::CBZw; break; - } - - InvertedMI = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(InvertedOpcode)); - for (unsigned i = 0, e= MI->getNumOperands(); i != e; ++i) { - InvertedMI.addOperand(MI->getOperand(i)); - if (MI->getOperand(i).isMBB()) - CondBrMBBOperand = i; - } - - MI->eraseFromParent(); - MI = Br.MI = InvertedMI; - } - - // If the branch is at the end of its MBB and that has a fall-through block, - // direct the updated conditional branch to the fall-through - // block. Otherwise, split the MBB before the next instruction. - MachineInstr *BMI = &MBB->back(); - bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB); - - ++NumCBrFixed; - if (BMI != MI) { - if (std::next(MachineBasicBlock::iterator(MI)) == std::prev(MBB->end()) && - BMI->getOpcode() == AArch64::Bimm) { - // Last MI in the BB is an unconditional branch. We can swap destinations: - // b.eq L1 (temporarily b.ne L1 after first change) - // b L2 - // => - // b.ne L2 - // b L1 - MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); - if (isBBInRange(MI, NewDest, Br.OffsetBits)) { - DEBUG(dbgs() << " Invert Bcc condition and swap its destination with " - << *BMI); - MachineBasicBlock *DestBB = MI->getOperand(CondBrMBBOperand).getMBB(); - BMI->getOperand(0).setMBB(DestBB); - MI->getOperand(CondBrMBBOperand).setMBB(NewDest); - return true; - } - } - } - - if (NeedSplit) { - MachineBasicBlock::iterator MBBI = MI; ++MBBI; - splitBlockBeforeInstr(MBBI); - // No need for the branch to the next block. We're adding an unconditional - // branch to the destination. - int delta = TII->getInstSizeInBytes(MBB->back()); - BBInfo[MBB->getNumber()].Size -= delta; - MBB->back().eraseFromParent(); - // BBInfo[SplitBB].Offset is wrong temporarily, fixed below - } - - // After splitting and removing the unconditional branch from the original BB, - // the structure is now: - // oldbb: - // [things] - // b.invertedCC L1 - // splitbb/fallthroughbb: - // [old b L2/real continuation] - // - // We now have to change the conditional branch to point to splitbb and add an - // unconditional branch after it to L1, giving the final structure: - // oldbb: - // [things] - // b.invertedCC splitbb - // b L1 - // splitbb/fallthroughbb: - // [old b L2/real continuation] - MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB)); - - DEBUG(dbgs() << " Insert B to BB#" - << MI->getOperand(CondBrMBBOperand).getMBB()->getNumber() - << " also invert condition and change dest. to BB#" - << NextBB->getNumber() << "\n"); - - // Insert a new unconditional branch and fixup the destination of the - // conditional one. Also update the ImmBranch as well as adding a new entry - // for the new branch. - BuildMI(MBB, DebugLoc(), TII->get(AArch64::Bimm)) - .addMBB(MI->getOperand(CondBrMBBOperand).getMBB()); - MI->getOperand(CondBrMBBOperand).setMBB(NextBB); - - BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back()); - - // 26 bits written down in Bimm, specifying a multiple of 4. - unsigned OffsetBits = 26 + 2; - ImmBranches.push_back(ImmBranch(&MBB->back(), OffsetBits, false)); - - adjustBBOffsetsAfter(MBB); - return true; -} diff --git a/lib/Target/AArch64/AArch64CallingConv.td b/lib/Target/AArch64/AArch64CallingConv.td deleted file mode 100644 index 9fe6aae2e32f..000000000000 --- a/lib/Target/AArch64/AArch64CallingConv.td +++ /dev/null @@ -1,197 +0,0 @@ -//==-- AArch64CallingConv.td - Calling Conventions for ARM ----*- tblgen -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// This describes the calling conventions for AArch64 architecture. -//===----------------------------------------------------------------------===// - - -// The AArch64 Procedure Call Standard is unfortunately specified at a slightly -// higher level of abstraction than LLVM's target interface presents. In -// particular, it refers (like other ABIs, in fact) directly to -// structs. However, generic LLVM code takes the liberty of lowering structure -// arguments to the component fields before we see them. -// -// As a result, the obvious direct map from LLVM IR to PCS concepts can't be -// implemented, so the goals of this calling convention are, in decreasing -// priority order: -// 1. Expose *some* way to express the concepts required to implement the -// generic PCS from a front-end. -// 2. Provide a sane ABI for pure LLVM. -// 3. Follow the generic PCS as closely as is naturally possible. -// -// The suggested front-end implementation of PCS features is: -// * Integer, float and vector arguments of all sizes which end up in -// registers are passed and returned via the natural LLVM type. -// * Structure arguments with size <= 16 bytes are passed and returned in -// registers as similar integer or composite types. For example: -// [1 x i64], [2 x i64] or [1 x i128] (if alignment 16 needed). -// * HFAs in registers follow rules similar to small structs: appropriate -// composite types. -// * Structure arguments with size > 16 bytes are passed via a pointer, -// handled completely by the front-end. -// * Structure return values > 16 bytes via an sret pointer argument. -// * Other stack-based arguments (not large structs) are passed using byval -// pointers. Padding arguments are added beforehand to guarantee a large -// struct doesn't later use integer registers. -// -// N.b. this means that it is the front-end's responsibility (if it cares about -// PCS compliance) to check whether enough registers are available for an -// argument when deciding how to pass it. - -class CCIfAlign: - CCIf<"ArgFlags.getOrigAlign() == " # Align, A>; - -def CC_A64_APCS : CallingConv<[ - // SRet is an LLVM-specific concept, so it takes precedence over general ABI - // concerns. However, this rule will be used by C/C++ frontends to implement - // structure return. - CCIfSRet>, - - // Put ByVal arguments directly on the stack. Minimum size and alignment of a - // slot is 64-bit. - CCIfByVal>, - - // Canonicalise the various types that live in different floating-point - // registers. This makes sense because the PCS does not distinguish Short - // Vectors and Floating-point types. - CCIfType<[v1i16, v2i8], CCBitConvertToType>, - CCIfType<[v1i32, v4i8, v2i16], CCBitConvertToType>, - CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64, v1f64], CCBitConvertToType>, - CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCBitConvertToType>, - - // PCS: "C.1: If the argument is a Half-, Single-, Double- or Quad- precision - // Floating-point or Short Vector Type and the NSRN is less than 8, then the - // argument is allocated to the least significant bits of register - // v[NSRN]. The NSRN is incremented by one. The argument has now been - // allocated." - CCIfType<[v1i8], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>, - CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>, - CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>, - CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, - CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - - // PCS: "C.2: If the argument is an HFA and there are sufficient unallocated - // SIMD and Floating-point registers (NSRN - number of elements < 8), then the - // argument is allocated to SIMD and Floating-point registers (with one - // register per element of the HFA). The NSRN is incremented by the number of - // registers used. The argument has now been allocated." - // - // N.b. As above, this rule is the responsibility of the front-end. - - // "C.3: If the argument is an HFA then the NSRN is set to 8 and the size of - // the argument is rounded up to the nearest multiple of 8 bytes." - // - // "C.4: If the argument is an HFA, a Quad-precision Floating-point or Short - // Vector Type then the NSAA is rounded up to the larger of 8 or the Natural - // Alignment of the Argument's type." - // - // It is expected that these will be satisfied by adding dummy arguments to - // the prototype. - - // PCS: "C.5: If the argument is a Half- or Single- precision Floating-point - // type then the size of the argument is set to 8 bytes. The effect is as if - // the argument had been copied to the least significant bits of a 64-bit - // register and the remaining bits filled with unspecified values." - CCIfType<[f16, f32], CCPromoteToType>, - - // PCS: "C.6: If the argument is an HFA, a Half-, Single-, Double- or Quad- - // precision Floating-point or Short Vector Type, then the argument is copied - // to memory at the adjusted NSAA. The NSAA is incremented by the size of the - // argument. The argument has now been allocated." - CCIfType<[f64], CCAssignToStack<8, 8>>, - CCIfType<[f128], CCAssignToStack<16, 16>>, - - // PCS: "C.7: If the argument is an Integral Type, the size of the argument is - // less than or equal to 8 bytes and the NGRN is less than 8, the argument is - // copied to the least significant bits of x[NGRN]. The NGRN is incremented by - // one. The argument has now been allocated." - - // First we implement C.8 and C.9 (128-bit types get even registers). i128 is - // represented as two i64s, the first one being split. If we delayed this - // operation C.8 would never be reached. - CCIfType<[i64], - CCIfSplit>>, - - // Note: the promotion also implements C.14. - CCIfType<[i8, i16, i32], CCPromoteToType>, - - // And now the real implementation of C.7 - CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>, - - // PCS: "C.8: If the argument has an alignment of 16 then the NGRN is rounded - // up to the next even number." - // - // "C.9: If the argument is an Integral Type, the size of the argument is - // equal to 16 and the NGRN is less than 7, the argument is copied to x[NGRN] - // and x[NGRN+1], x[NGRN] shall contain the lower addressed double-word of the - // memory representation of the argument. The NGRN is incremented by two. The - // argument has now been allocated." - // - // Subtlety here: what if alignment is 16 but it is not an integral type? All - // floating-point types have been allocated already, which leaves composite - // types: this is why a front-end may need to produce i128 for a struct <= 16 - // bytes. - - // PCS: "C.10 If the argument is a Composite Type and the size in double-words - // of the argument is not more than 8 minus NGRN, then the argument is copied - // into consecutive general-purpose registers, starting at x[NGRN]. The - // argument is passed as though it had been loaded into the registers from a - // double-word aligned address with an appropriate sequence of LDR - // instructions loading consecutive registers from memory (the contents of any - // unused parts of the registers are unspecified by this standard). The NGRN - // is incremented by the number of registers used. The argument has now been - // allocated." - // - // Another one that's the responsibility of the front-end (sigh). - - // PCS: "C.11: The NGRN is set to 8." - CCCustom<"CC_AArch64NoMoreRegs">, - - // PCS: "C.12: The NSAA is rounded up to the larger of 8 or the Natural - // Alignment of the argument's type." - // - // PCS: "C.13: If the argument is a composite type then the argument is copied - // to memory at the adjusted NSAA. The NSAA is by the size of the - // argument. The argument has now been allocated." - // - // Note that the effect of this corresponds to a memcpy rather than register - // stores so that the struct ends up correctly addressable at the adjusted - // NSAA. - - // PCS: "C.14: If the size of the argument is less than 8 bytes then the size - // of the argument is set to 8 bytes. The effect is as if the argument was - // copied to the least significant bits of a 64-bit register and the remaining - // bits filled with unspecified values." - // - // Integer types were widened above. Floating-point and composite types have - // already been allocated completely. Nothing to do. - - // PCS: "C.15: The argument is copied to memory at the adjusted NSAA. The NSAA - // is incremented by the size of the argument. The argument has now been - // allocated." - CCIfType<[i64], CCIfSplit>>, - CCIfType<[i64], CCAssignToStack<8, 8>> - -]>; - -// According to the PCS, X19-X30 are callee-saved, however only the low 64-bits -// of vector registers (8-15) are callee-saved. The order here is is picked up -// by PrologEpilogInserter.cpp to allocate stack slots, starting from top of -// stack upon entry. This gives the customary layout of x30 at [sp-8], x29 at -// [sp-16], ... -def CSR_PCS : CalleeSavedRegs<(add (sequence "X%u", 30, 19), - (sequence "D%u", 15, 8))>; - - -// TLS descriptor calls are extremely restricted in their changes, to allow -// optimisations in the (hopefully) more common fast path where no real action -// is needed. They actually have to preserve all registers, except for the -// unavoidable X30 and the return register X0. -def TLSDesc : CalleeSavedRegs<(add (sequence "X%u", 29, 1), - (sequence "Q%u", 31, 0))>; diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp deleted file mode 100644 index 972e6f7617b3..000000000000 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ /dev/null @@ -1,626 +0,0 @@ -//===- AArch64FrameLowering.cpp - AArch64 Frame Information ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the AArch64 implementation of TargetFrameLowering class. -// -//===----------------------------------------------------------------------===// - -#include "AArch64.h" -#include "AArch64FrameLowering.h" -#include "AArch64InstrInfo.h" -#include "AArch64MachineFunctionInfo.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/IR/Function.h" -#include "llvm/MC/MachineLocation.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" - -using namespace llvm; - -void AArch64FrameLowering::splitSPAdjustments(uint64_t Total, - uint64_t &Initial, - uint64_t &Residual) const { - // 0x1f0 here is a pessimistic (i.e. realistic) boundary: x-register LDP - // instructions have a 7-bit signed immediate scaled by 8, giving a reach of - // 0x1f8, but stack adjustment should always be a multiple of 16. - if (Total <= 0x1f0) { - Initial = Total; - Residual = 0; - } else { - Initial = 0x1f0; - Residual = Total - Initial; - } -} - -void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { - AArch64MachineFunctionInfo *FuncInfo = - MF.getInfo(); - MachineBasicBlock &MBB = MF.front(); - MachineBasicBlock::iterator MBBI = MBB.begin(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - - MachineModuleInfo &MMI = MF.getMMI(); - const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); - bool NeedsFrameMoves = MMI.hasDebugInfo() - || MF.getFunction()->needsUnwindTableEntry(); - - uint64_t NumInitialBytes, NumResidualBytes; - - // Currently we expect the stack to be laid out by - // sub sp, sp, #initial - // stp x29, x30, [sp, #offset] - // ... - // str xxx, [sp, #offset] - // sub sp, sp, #rest (possibly via extra instructions). - if (MFI->getCalleeSavedInfo().size()) { - // If there are callee-saved registers, we want to store them efficiently as - // a block, and virtual base assignment happens too early to do it for us so - // we adjust the stack in two phases: first just for callee-saved fiddling, - // then to allocate the rest of the frame. - splitSPAdjustments(MFI->getStackSize(), NumInitialBytes, NumResidualBytes); - } else { - // If there aren't any callee-saved registers, two-phase adjustment is - // inefficient. It's more efficient to adjust with NumInitialBytes too - // because when we're in a "callee pops argument space" situation, that pop - // must be tacked onto Initial for correctness. - NumInitialBytes = MFI->getStackSize(); - NumResidualBytes = 0; - } - - // Tell everyone else how much adjustment we're expecting them to use. In - // particular if an adjustment is required for a tail call the epilogue could - // have a different view of things. - FuncInfo->setInitialStackAdjust(NumInitialBytes); - - emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumInitialBytes, - MachineInstr::FrameSetup); - - if (NeedsFrameMoves && NumInitialBytes) { - // We emit this update even if the CFA is set from a frame pointer later so - // that the CFA is valid in the interim. - MachineLocation Dst(MachineLocation::VirtualFP); - unsigned Reg = MRI->getDwarfRegNum(AArch64::XSP, true); - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfa(nullptr, Reg, -NumInitialBytes)); - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } - - // Otherwise we need to set the frame pointer and/or add a second stack - // adjustment. - - bool FPNeedsSetting = hasFP(MF); - for (; MBBI != MBB.end(); ++MBBI) { - // Note that this search makes strong assumptions about the operation used - // to store the frame-pointer: it must be "STP x29, x30, ...". This could - // change in future, but until then there's no point in implementing - // untestable more generic cases. - if (FPNeedsSetting && MBBI->getOpcode() == AArch64::LSPair64_STR - && MBBI->getOperand(0).getReg() == AArch64::X29) { - int64_t X29FrameIdx = MBBI->getOperand(2).getIndex(); - FuncInfo->setFramePointerOffset(MFI->getObjectOffset(X29FrameIdx)); - - ++MBBI; - emitRegUpdate(MBB, MBBI, DL, TII, AArch64::X29, AArch64::XSP, - AArch64::X29, - NumInitialBytes + MFI->getObjectOffset(X29FrameIdx), - MachineInstr::FrameSetup); - - // The offset adjustment used when emitting debugging locations relative - // to whatever frame base is set. AArch64 uses the default frame base (FP - // or SP) and this adjusts the calculations to be correct. - MFI->setOffsetAdjustment(- MFI->getObjectOffset(X29FrameIdx) - - MFI->getStackSize()); - - if (NeedsFrameMoves) { - unsigned Reg = MRI->getDwarfRegNum(AArch64::X29, true); - unsigned Offset = MFI->getObjectOffset(X29FrameIdx); - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfa(nullptr, Reg, Offset)); - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } - - FPNeedsSetting = false; - } - - if (!MBBI->getFlag(MachineInstr::FrameSetup)) - break; - } - - assert(!FPNeedsSetting && "Frame pointer couldn't be set"); - - emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumResidualBytes, - MachineInstr::FrameSetup); - - // Now we emit the rest of the frame setup information, if necessary: we've - // already noted the FP and initial SP moves so we're left with the prologue's - // final SP update and callee-saved register locations. - if (!NeedsFrameMoves) - return; - - // The rest of the stack adjustment - if (!hasFP(MF) && NumResidualBytes) { - MachineLocation Dst(MachineLocation::VirtualFP); - unsigned Reg = MRI->getDwarfRegNum(AArch64::XSP, true); - unsigned Offset = NumResidualBytes + NumInitialBytes; - unsigned CFIIndex = - MMI.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset)); - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } - - // And any callee-saved registers (it's fine to leave them to the end here, - // because the old values are still valid at this point. - const std::vector &CSI = MFI->getCalleeSavedInfo(); - if (CSI.size()) { - for (std::vector::const_iterator I = CSI.begin(), - E = CSI.end(); I != E; ++I) { - unsigned Offset = MFI->getObjectOffset(I->getFrameIdx()); - unsigned Reg = MRI->getDwarfRegNum(I->getReg(), true); - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createOffset(nullptr, Reg, Offset)); - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } - } -} - -void -AArch64FrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - AArch64MachineFunctionInfo *FuncInfo = - MF.getInfo(); - - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - DebugLoc DL = MBBI->getDebugLoc(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - unsigned RetOpcode = MBBI->getOpcode(); - - // Initial and residual are named for consitency with the prologue. Note that - // in the epilogue, the residual adjustment is executed first. - uint64_t NumInitialBytes = FuncInfo->getInitialStackAdjust(); - uint64_t NumResidualBytes = MFI.getStackSize() - NumInitialBytes; - uint64_t ArgumentPopSize = 0; - if (RetOpcode == AArch64::TC_RETURNdi || - RetOpcode == AArch64::TC_RETURNxi) { - MachineOperand &JumpTarget = MBBI->getOperand(0); - MachineOperand &StackAdjust = MBBI->getOperand(1); - - MachineInstrBuilder MIB; - if (RetOpcode == AArch64::TC_RETURNdi) { - MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_Bimm)); - if (JumpTarget.isGlobal()) { - MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), - JumpTarget.getTargetFlags()); - } else { - assert(JumpTarget.isSymbol() && "unexpected tail call destination"); - MIB.addExternalSymbol(JumpTarget.getSymbolName(), - JumpTarget.getTargetFlags()); - } - } else { - assert(RetOpcode == AArch64::TC_RETURNxi && JumpTarget.isReg() - && "Unexpected tail call"); - - MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_BRx)); - MIB.addReg(JumpTarget.getReg(), RegState::Kill); - } - - // Add the extra operands onto the new tail call instruction even though - // they're not used directly (so that liveness is tracked properly etc). - for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) - MIB->addOperand(MBBI->getOperand(i)); - - - // Delete the pseudo instruction TC_RETURN. - MachineInstr *NewMI = std::prev(MBBI); - MBB.erase(MBBI); - MBBI = NewMI; - - // For a tail-call in a callee-pops-arguments environment, some or all of - // the stack may actually be in use for the call's arguments, this is - // calculated during LowerCall and consumed here... - ArgumentPopSize = StackAdjust.getImm(); - } else { - // ... otherwise the amount to pop is *all* of the argument space, - // conveniently stored in the MachineFunctionInfo by - // LowerFormalArguments. This will, of course, be zero for the C calling - // convention. - ArgumentPopSize = FuncInfo->getArgumentStackToRestore(); - } - - assert(NumInitialBytes % 16 == 0 && NumResidualBytes % 16 == 0 - && "refusing to adjust stack by misaligned amt"); - - // We may need to address callee-saved registers differently, so find out the - // bound on the frame indices. - const std::vector &CSI = MFI.getCalleeSavedInfo(); - int MinCSFI = 0; - int MaxCSFI = -1; - - if (CSI.size()) { - MinCSFI = CSI[0].getFrameIdx(); - MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); - } - - // The "residual" stack update comes first from this direction and guarantees - // that SP is NumInitialBytes below its value on function entry, either by a - // direct update or restoring it from the frame pointer. - if (NumInitialBytes + ArgumentPopSize != 0) { - emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, - NumInitialBytes + ArgumentPopSize); - --MBBI; - } - - - // MBBI now points to the instruction just past the last callee-saved - // restoration (either RET/B if NumInitialBytes == 0, or the "ADD sp, sp" - // otherwise). - - // Now we need to find out where to put the bulk of the stack adjustment - MachineBasicBlock::iterator FirstEpilogue = MBBI; - while (MBBI != MBB.begin()) { - --MBBI; - - unsigned FrameOp; - for (FrameOp = 0; FrameOp < MBBI->getNumOperands(); ++FrameOp) { - if (MBBI->getOperand(FrameOp).isFI()) - break; - } - - // If this instruction doesn't have a frame index we've reached the end of - // the callee-save restoration. - if (FrameOp == MBBI->getNumOperands()) - break; - - // Likewise if it *is* a local reference, but not to a callee-saved object. - int FrameIdx = MBBI->getOperand(FrameOp).getIndex(); - if (FrameIdx < MinCSFI || FrameIdx > MaxCSFI) - break; - - FirstEpilogue = MBBI; - } - - if (MF.getFrameInfo()->hasVarSizedObjects()) { - int64_t StaticFrameBase; - StaticFrameBase = -(NumInitialBytes + FuncInfo->getFramePointerOffset()); - emitRegUpdate(MBB, FirstEpilogue, DL, TII, - AArch64::XSP, AArch64::X29, AArch64::NoRegister, - StaticFrameBase); - } else { - emitSPUpdate(MBB, FirstEpilogue, DL,TII, AArch64::X16, NumResidualBytes); - } -} - -int64_t -AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF, - int FrameIndex, - unsigned &FrameReg, - int SPAdj, - bool IsCalleeSaveOp) const { - AArch64MachineFunctionInfo *FuncInfo = - MF.getInfo(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - - int64_t TopOfFrameOffset = MFI->getObjectOffset(FrameIndex); - - assert(!(IsCalleeSaveOp && FuncInfo->getInitialStackAdjust() == 0) - && "callee-saved register in unexpected place"); - - // If the frame for this function is particularly large, we adjust the stack - // in two phases which means the callee-save related operations see a - // different (intermediate) stack size. - int64_t FrameRegPos; - if (IsCalleeSaveOp) { - FrameReg = AArch64::XSP; - FrameRegPos = -static_cast(FuncInfo->getInitialStackAdjust()); - } else if (useFPForAddressing(MF)) { - // Have to use the frame pointer since we have no idea where SP is. - FrameReg = AArch64::X29; - FrameRegPos = FuncInfo->getFramePointerOffset(); - } else { - FrameReg = AArch64::XSP; - FrameRegPos = -static_cast(MFI->getStackSize()) + SPAdj; - } - - return TopOfFrameOffset - FrameRegPos; -} - -void -AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { - const AArch64RegisterInfo *RegInfo = - static_cast(MF.getTarget().getRegisterInfo()); - MachineFrameInfo *MFI = MF.getFrameInfo(); - const AArch64InstrInfo &TII = - *static_cast(MF.getTarget().getInstrInfo()); - - if (hasFP(MF)) { - MF.getRegInfo().setPhysRegUsed(AArch64::X29); - MF.getRegInfo().setPhysRegUsed(AArch64::X30); - } - - // If addressing of local variables is going to be more complicated than - // shoving a base register and an offset into the instruction then we may well - // need to scavenge registers. We should either specifically add an - // callee-save register for this purpose or allocate an extra spill slot. - bool BigStack = - MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF) - || MFI->hasVarSizedObjects() // Access will be from X29: messes things up - || (MFI->adjustsStack() && !hasReservedCallFrame(MF)); - - if (!BigStack) - return; - - // We certainly need some slack space for the scavenger, preferably an extra - // register. - const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(); - MCPhysReg ExtraReg = AArch64::NoRegister; - - for (unsigned i = 0; CSRegs[i]; ++i) { - if (AArch64::GPR64RegClass.contains(CSRegs[i]) && - !MF.getRegInfo().isPhysRegUsed(CSRegs[i])) { - ExtraReg = CSRegs[i]; - break; - } - } - - if (ExtraReg != 0) { - MF.getRegInfo().setPhysRegUsed(ExtraReg); - } else { - assert(RS && "Expect register scavenger to be available"); - - // Create a stack slot for scavenging purposes. PrologEpilogInserter - // helpfully places it near either SP or FP for us to avoid - // infinitely-regression during scavenging. - const TargetRegisterClass *RC = &AArch64::GPR64RegClass; - RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); - } -} - -bool AArch64FrameLowering::determinePrologueDeath(MachineBasicBlock &MBB, - unsigned Reg) const { - // If @llvm.returnaddress is called then it will refer to X30 by some means; - // the prologue store does not kill the register. - if (Reg == AArch64::X30) { - if (MBB.getParent()->getFrameInfo()->isReturnAddressTaken() - && MBB.getParent()->getRegInfo().isLiveIn(Reg)) - return false; - } - - // In all other cases, physical registers are dead after they've been saved - // but live at the beginning of the prologue block. - MBB.addLiveIn(Reg); - return true; -} - -void -AArch64FrameLowering::emitFrameMemOps(bool isPrologue, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - const std::vector &CSI, - const TargetRegisterInfo *TRI, - const LoadStoreMethod PossClasses[], - unsigned NumClasses) const { - DebugLoc DL = MBB.findDebugLoc(MBBI); - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - - // A certain amount of implicit contract is present here. The actual stack - // offsets haven't been allocated officially yet, so for strictly correct code - // we rely on the fact that the elements of CSI are allocated in order - // starting at SP, purely as dictated by size and alignment. In practice since - // this function handles the only accesses to those slots it's not quite so - // important. - // - // We have also ordered the Callee-saved register list in AArch64CallingConv - // so that the above scheme puts registers in order: in particular we want - // &X30 to be &X29+8 for an ABI-correct frame record (PCS 5.2.2) - for (unsigned i = 0, e = CSI.size(); i < e; ++i) { - unsigned Reg = CSI[i].getReg(); - - // First we need to find out which register class the register belongs to so - // that we can use the correct load/store instrucitons. - unsigned ClassIdx; - for (ClassIdx = 0; ClassIdx < NumClasses; ++ClassIdx) { - if (PossClasses[ClassIdx].RegClass->contains(Reg)) - break; - } - assert(ClassIdx != NumClasses - && "Asked to store register in unexpected class"); - const TargetRegisterClass &TheClass = *PossClasses[ClassIdx].RegClass; - - // Now we need to decide whether it's possible to emit a paired instruction: - // for this we want the next register to be in the same class. - MachineInstrBuilder NewMI; - bool Pair = false; - if (i + 1 < CSI.size() && TheClass.contains(CSI[i+1].getReg())) { - Pair = true; - unsigned StLow = 0, StHigh = 0; - if (isPrologue) { - // Most of these registers will be live-in to the MBB and killed by our - // store, though there are exceptions (see determinePrologueDeath). - StLow = getKillRegState(determinePrologueDeath(MBB, CSI[i+1].getReg())); - StHigh = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg())); - } else { - StLow = RegState::Define; - StHigh = RegState::Define; - } - - NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].PairOpcode)) - .addReg(CSI[i+1].getReg(), StLow) - .addReg(CSI[i].getReg(), StHigh); - - // If it's a paired op, we've consumed two registers - ++i; - } else { - unsigned State; - if (isPrologue) { - State = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg())); - } else { - State = RegState::Define; - } - - NewMI = BuildMI(MBB, MBBI, DL, - TII.get(PossClasses[ClassIdx].SingleOpcode)) - .addReg(CSI[i].getReg(), State); - } - - // Note that the FrameIdx refers to the second register in a pair: it will - // be allocated the smaller numeric address and so is the one an LDP/STP - // address must use. - int FrameIdx = CSI[i].getFrameIdx(); - MachineMemOperand::MemOperandFlags Flags; - Flags = isPrologue ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad; - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), - Flags, - Pair ? TheClass.getSize() * 2 : TheClass.getSize(), - MFI.getObjectAlignment(FrameIdx)); - - NewMI.addFrameIndex(FrameIdx) - .addImm(0) // address-register offset - .addMemOperand(MMO); - - if (isPrologue) - NewMI.setMIFlags(MachineInstr::FrameSetup); - - // For aesthetic reasons, during an epilogue we want to emit complementary - // operations to the prologue, but in the opposite order. So we still - // iterate through the CalleeSavedInfo list in order, but we put the - // instructions successively earlier in the MBB. - if (!isPrologue) - --MBBI; - } -} - -bool -AArch64FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - - static const LoadStoreMethod PossibleClasses[] = { - {&AArch64::GPR64RegClass, AArch64::LSPair64_STR, AArch64::LS64_STR}, - {&AArch64::FPR64RegClass, AArch64::LSFPPair64_STR, AArch64::LSFP64_STR}, - }; - const unsigned NumClasses = llvm::array_lengthof(PossibleClasses); - - emitFrameMemOps(/* isPrologue = */ true, MBB, MBBI, CSI, TRI, - PossibleClasses, NumClasses); - - return true; -} - -bool -AArch64FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const { - - if (CSI.empty()) - return false; - - static const LoadStoreMethod PossibleClasses[] = { - {&AArch64::GPR64RegClass, AArch64::LSPair64_LDR, AArch64::LS64_LDR}, - {&AArch64::FPR64RegClass, AArch64::LSFPPair64_LDR, AArch64::LSFP64_LDR}, - }; - const unsigned NumClasses = llvm::array_lengthof(PossibleClasses); - - emitFrameMemOps(/* isPrologue = */ false, MBB, MBBI, CSI, TRI, - PossibleClasses, NumClasses); - - return true; -} - -bool -AArch64FrameLowering::hasFP(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo(); - - // This is a decision of ABI compliance. The AArch64 PCS gives various options - // for conformance, and even at the most stringent level more or less permits - // elimination for leaf functions because there's no loss of functionality - // (for debugging etc).. - if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->hasCalls()) - return true; - - // The following are hard-limits: incorrect code will be generated if we try - // to omit the frame. - return (RI->needsStackRealignment(MF) || - MFI->hasVarSizedObjects() || - MFI->isFrameAddressTaken()); -} - -bool -AArch64FrameLowering::useFPForAddressing(const MachineFunction &MF) const { - return MF.getFrameInfo()->hasVarSizedObjects(); -} - -bool -AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - - // Of the various reasons for having a frame pointer, it's actually only - // variable-sized objects that prevent reservation of a call frame. - return !(hasFP(MF) && MFI->hasVarSizedObjects()); -} - -void -AArch64FrameLowering::eliminateCallFramePseudoInstr( - MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const { - const AArch64InstrInfo &TII = - *static_cast(MF.getTarget().getInstrInfo()); - DebugLoc dl = MI->getDebugLoc(); - int Opcode = MI->getOpcode(); - bool IsDestroy = Opcode == TII.getCallFrameDestroyOpcode(); - uint64_t CalleePopAmount = IsDestroy ? MI->getOperand(1).getImm() : 0; - - if (!hasReservedCallFrame(MF)) { - unsigned Align = getStackAlignment(); - - int64_t Amount = MI->getOperand(0).getImm(); - Amount = RoundUpToAlignment(Amount, Align); - if (!IsDestroy) Amount = -Amount; - - // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it - // doesn't have to pop anything), then the first operand will be zero too so - // this adjustment is a no-op. - if (CalleePopAmount == 0) { - // FIXME: in-function stack adjustment for calls is limited to 12-bits - // because there's no guaranteed temporary register available. Mostly call - // frames will be allocated at the start of a function so this is OK, but - // it is a limitation that needs dealing with. - assert(Amount > -0xfff && Amount < 0xfff && "call frame too large"); - emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, Amount); - } - } else if (CalleePopAmount != 0) { - // If the calling convention demands that the callee pops arguments from the - // stack, we want to add it back if we have a reserved call frame. - assert(CalleePopAmount < 0xfff && "call frame too large"); - emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, -CalleePopAmount); - } - - MBB.erase(MI); -} diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h deleted file mode 100644 index 6ec27e3104f0..000000000000 --- a/lib/Target/AArch64/AArch64FrameLowering.h +++ /dev/null @@ -1,108 +0,0 @@ -//==- AArch64FrameLowering.h - Define frame lowering for AArch64 -*- C++ -*--=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class implements the AArch64-specific parts of the TargetFrameLowering -// class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64_FRAMEINFO_H -#define LLVM_AARCH64_FRAMEINFO_H - -#include "AArch64Subtarget.h" -#include "llvm/Target/TargetFrameLowering.h" - -namespace llvm { -class AArch64Subtarget; - -class AArch64FrameLowering : public TargetFrameLowering { -private: - // In order to unify the spilling and restoring of callee-saved registers into - // emitFrameMemOps, we need to be able to specify which instructions to use - // for the relevant memory operations on each register class. An array of the - // following struct is populated and passed in to achieve this. - struct LoadStoreMethod { - const TargetRegisterClass *RegClass; // E.g. GPR64RegClass - - // The preferred instruction. - unsigned PairOpcode; // E.g. LSPair64_STR - - // Sometimes only a single register can be handled at once. - unsigned SingleOpcode; // E.g. LS64_STR - }; -protected: - const AArch64Subtarget &STI; - -public: - explicit AArch64FrameLowering(const AArch64Subtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0, 16), - STI(sti) { - } - - /// emitProlog/emitEpilog - These methods insert prolog and epilog code into - /// the function. - void emitPrologue(MachineFunction &MF) const override; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; - - /// Decides how much stack adjustment to perform in each phase of the prologue - /// and epilogue. - void splitSPAdjustments(uint64_t Total, uint64_t &Initial, - uint64_t &Residual) const; - - int64_t resolveFrameIndexReference(MachineFunction &MF, int FrameIndex, - unsigned &FrameReg, int SPAdj, - bool IsCalleeSaveOp) const; - - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const override; - - bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const override; - bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const override; - - void - eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const override; - - /// If the register is X30 (i.e. LR) and the return address is used in the - /// function then the callee-save store doesn't actually kill the register, - /// otherwise it does. - bool determinePrologueDeath(MachineBasicBlock &MBB, unsigned Reg) const; - - /// This function emits the loads or stores required during prologue and - /// epilogue as efficiently as possible. - /// - /// The operations involved in setting up and tearing down the frame are - /// similar enough to warrant a shared function, particularly as discrepancies - /// between the two would be disastrous. - void emitFrameMemOps(bool isStore, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI, - const LoadStoreMethod PossibleClasses[], - unsigned NumClasses) const; - - - bool hasFP(const MachineFunction &MF) const override; - - bool useFPForAddressing(const MachineFunction &MF) const; - - /// On AA - bool hasReservedCallFrame(const MachineFunction &MF) const override; - -}; - -} // End llvm namespace - -#endif diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp deleted file mode 100644 index d1d89af6e04d..000000000000 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ /dev/null @@ -1,1576 +0,0 @@ -//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines an instruction selector for the AArch64 target. -// -//===----------------------------------------------------------------------===// - -#include "AArch64.h" -#include "AArch64InstrInfo.h" -#include "AArch64Subtarget.h" -#include "AArch64TargetMachine.h" -#include "Utils/AArch64BaseInfo.h" -#include "llvm/ADT/APSInt.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -#define DEBUG_TYPE "aarch64-isel" - -//===--------------------------------------------------------------------===// -/// AArch64 specific code to select AArch64 machine instructions for -/// SelectionDAG operations. -/// -namespace { - -class AArch64DAGToDAGISel : public SelectionDAGISel { - AArch64TargetMachine &TM; - - /// Keep a pointer to the AArch64Subtarget around so that we can - /// make the right decision when generating code for different targets. - const AArch64Subtarget *Subtarget; - -public: - explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, - CodeGenOpt::Level OptLevel) - : SelectionDAGISel(tm, OptLevel), TM(tm), - Subtarget(&TM.getSubtarget()) { - } - - const char *getPassName() const override { - return "AArch64 Instruction Selection"; - } - - // Include the pieces autogenerated from the target description. -#include "AArch64GenDAGISel.inc" - - template - bool SelectOffsetUImm12(SDValue N, SDValue &UImm12) { - const ConstantSDNode *CN = dyn_cast(N); - if (!CN || CN->getZExtValue() % MemSize != 0 - || CN->getZExtValue() / MemSize > 0xfff) - return false; - - UImm12 = CurDAG->getTargetConstant(CN->getZExtValue() / MemSize, MVT::i64); - return true; - } - - template - bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { - return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); - } - - /// Used for pre-lowered address-reference nodes, so we already know - /// the fields match. This operand's job is simply to add an - /// appropriate shift operand to the MOVZ/MOVK instruction. - template - bool SelectMOVWAddressRef(SDValue N, SDValue &Imm, SDValue &Shift) { - Imm = N; - Shift = CurDAG->getTargetConstant(LogShift, MVT::i32); - return true; - } - - bool SelectFPZeroOperand(SDValue N, SDValue &Dummy); - - bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, - unsigned RegWidth); - - bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector &OutOps) override; - - bool SelectLogicalImm(SDValue N, SDValue &Imm); - - template - bool SelectTSTBOperand(SDValue N, SDValue &FixedPos) { - return SelectTSTBOperand(N, FixedPos, RegWidth); - } - - bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth); - - SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, - unsigned Op64); - - /// Put the given constant into a pool and return a DAG which will give its - /// address. - SDValue getConstantPoolItemAddress(SDLoc DL, const Constant *CV); - - SDNode *TrySelectToMoveImm(SDNode *N); - SDNode *LowerToFPLitPool(SDNode *Node); - SDNode *SelectToLitPool(SDNode *N); - - SDNode* Select(SDNode*) override; -private: - /// Get the opcode for table lookup instruction - unsigned getTBLOpc(bool IsExt, bool Is64Bit, unsigned NumOfVec); - - /// Select NEON table lookup intrinsics. NumVecs should be 1, 2, 3 or 4. - /// IsExt is to indicate if the result will be extended with an argument. - SDNode *SelectVTBL(SDNode *N, unsigned NumVecs, bool IsExt); - - /// Select NEON load intrinsics. NumVecs should be 1, 2, 3 or 4. - SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, - const uint16_t *Opcode); - - /// Select NEON store intrinsics. NumVecs should be 1, 2, 3 or 4. - SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, - const uint16_t *Opcodes); - - /// Form sequences of consecutive 64/128-bit registers for use in NEON - /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have - /// between 1 and 4 elements. If it contains a single element that is returned - /// unchanged; otherwise a REG_SEQUENCE value is returned. - SDValue createDTuple(ArrayRef Vecs); - SDValue createQTuple(ArrayRef Vecs); - - /// Generic helper for the createDTuple/createQTuple - /// functions. Those should almost always be called instead. - SDValue createTuple(ArrayRef Vecs, unsigned RegClassIDs[], - unsigned SubRegs[]); - - /// Select NEON load-duplicate intrinsics. NumVecs should be 2, 3 or 4. - /// The opcode array specifies the instructions used for load. - SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, - const uint16_t *Opcodes); - - /// Select NEON load/store lane intrinsics. NumVecs should be 2, 3 or 4. - /// The opcode arrays specify the instructions used for load/store. - SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, - unsigned NumVecs, const uint16_t *Opcodes); - - SDValue getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD, - SDValue Operand); -}; -} - -bool -AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, - unsigned RegWidth) { - const ConstantFPSDNode *CN = dyn_cast(N); - if (!CN) return false; - - // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits - // is between 1 and 32 for a destination w-register, or 1 and 64 for an - // x-register. - // - // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we - // want THIS_NODE to be 2^fbits. This is much easier to deal with using - // integers. - bool IsExact; - - // fbits is between 1 and 64 in the worst-case, which means the fmul - // could have 2^64 as an actual operand. Need 65 bits of precision. - APSInt IntVal(65, true); - CN->getValueAPF().convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); - - // N.b. isPowerOf2 also checks for > 0. - if (!IsExact || !IntVal.isPowerOf2()) return false; - unsigned FBits = IntVal.logBase2(); - - // Checks above should have guaranteed that we haven't lost information in - // finding FBits, but it must still be in range. - if (FBits == 0 || FBits > RegWidth) return false; - - FixedPos = CurDAG->getTargetConstant(64 - FBits, MVT::i32); - return true; -} - -bool -AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector &OutOps) { - switch (ConstraintCode) { - default: llvm_unreachable("Unrecognised AArch64 memory constraint"); - case 'm': - // FIXME: more freedom is actually permitted for 'm'. We can go - // hunting for a base and an offset if we want. Of course, since - // we don't really know how the operand is going to be used we're - // probably restricted to the load/store pair's simm7 as an offset - // range anyway. - case 'Q': - OutOps.push_back(Op); - } - - return false; -} - -bool -AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) { - ConstantFPSDNode *Imm = dyn_cast(N); - if (!Imm || !Imm->getValueAPF().isPosZero()) - return false; - - // Doesn't actually carry any information, but keeps TableGen quiet. - Dummy = CurDAG->getTargetConstant(0, MVT::i32); - return true; -} - -bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) { - uint32_t Bits; - uint32_t RegWidth = N.getValueType().getSizeInBits(); - - ConstantSDNode *CN = dyn_cast(N); - if (!CN) return false; - - if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits)) - return false; - - Imm = CurDAG->getTargetConstant(Bits, MVT::i32); - return true; -} - -SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) { - SDNode *ResNode; - SDLoc dl(Node); - EVT DestType = Node->getValueType(0); - unsigned DestWidth = DestType.getSizeInBits(); - - unsigned MOVOpcode; - EVT MOVType; - int UImm16, Shift; - uint32_t LogicalBits; - - uint64_t BitPat = cast(Node)->getZExtValue(); - if (A64Imms::isMOVZImm(DestWidth, BitPat, UImm16, Shift)) { - MOVType = DestType; - MOVOpcode = DestWidth == 64 ? AArch64::MOVZxii : AArch64::MOVZwii; - } else if (A64Imms::isMOVNImm(DestWidth, BitPat, UImm16, Shift)) { - MOVType = DestType; - MOVOpcode = DestWidth == 64 ? AArch64::MOVNxii : AArch64::MOVNwii; - } else if (DestWidth == 64 && A64Imms::isMOVNImm(32, BitPat, UImm16, Shift)) { - // To get something like 0x0000_0000_ffff_1234 into a 64-bit register we can - // use a 32-bit instruction: "movn w0, 0xedbc". - MOVType = MVT::i32; - MOVOpcode = AArch64::MOVNwii; - } else if (A64Imms::isLogicalImm(DestWidth, BitPat, LogicalBits)) { - MOVOpcode = DestWidth == 64 ? AArch64::ORRxxi : AArch64::ORRwwi; - uint16_t ZR = DestWidth == 64 ? AArch64::XZR : AArch64::WZR; - - return CurDAG->getMachineNode(MOVOpcode, dl, DestType, - CurDAG->getRegister(ZR, DestType), - CurDAG->getTargetConstant(LogicalBits, MVT::i32)); - } else { - // Can't handle it in one instruction. There's scope for permitting two (or - // more) instructions, but that'll need more thought. - return nullptr; - } - - ResNode = CurDAG->getMachineNode(MOVOpcode, dl, MOVType, - CurDAG->getTargetConstant(UImm16, MVT::i32), - CurDAG->getTargetConstant(Shift, MVT::i32)); - - if (MOVType != DestType) { - ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl, - MVT::i64, MVT::i32, MVT::Other, - CurDAG->getTargetConstant(0, MVT::i64), - SDValue(ResNode, 0), - CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32)); - } - - return ResNode; -} - -SDValue -AArch64DAGToDAGISel::getConstantPoolItemAddress(SDLoc DL, - const Constant *CV) { - EVT PtrVT = getTargetLowering()->getPointerTy(); - - switch (getTargetLowering()->getTargetMachine().getCodeModel()) { - case CodeModel::Small: { - unsigned Alignment = - getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType()); - return CurDAG->getNode( - AArch64ISD::WrapperSmall, DL, PtrVT, - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_NO_FLAG), - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_LO12), - CurDAG->getConstant(Alignment, MVT::i32)); - } - case CodeModel::Large: { - SDNode *LitAddr; - LitAddr = CurDAG->getMachineNode( - AArch64::MOVZxii, DL, PtrVT, - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G3), - CurDAG->getTargetConstant(3, MVT::i32)); - LitAddr = CurDAG->getMachineNode( - AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0), - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC), - CurDAG->getTargetConstant(2, MVT::i32)); - LitAddr = CurDAG->getMachineNode( - AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0), - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC), - CurDAG->getTargetConstant(1, MVT::i32)); - LitAddr = CurDAG->getMachineNode( - AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0), - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC), - CurDAG->getTargetConstant(0, MVT::i32)); - return SDValue(LitAddr, 0); - } - default: - llvm_unreachable("Only small and large code models supported now"); - } -} - -SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) { - SDLoc DL(Node); - uint64_t UnsignedVal = cast(Node)->getZExtValue(); - int64_t SignedVal = cast(Node)->getSExtValue(); - EVT DestType = Node->getValueType(0); - - // Since we may end up loading a 64-bit constant from a 32-bit entry the - // constant in the pool may have a different type to the eventual node. - ISD::LoadExtType Extension; - EVT MemType; - - assert((DestType == MVT::i64 || DestType == MVT::i32) - && "Only expect integer constants at the moment"); - - if (DestType == MVT::i32) { - Extension = ISD::NON_EXTLOAD; - MemType = MVT::i32; - } else if (UnsignedVal <= UINT32_MAX) { - Extension = ISD::ZEXTLOAD; - MemType = MVT::i32; - } else if (SignedVal >= INT32_MIN && SignedVal <= INT32_MAX) { - Extension = ISD::SEXTLOAD; - MemType = MVT::i32; - } else { - Extension = ISD::NON_EXTLOAD; - MemType = MVT::i64; - } - - Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(), - MemType.getSizeInBits()), - UnsignedVal); - SDValue PoolAddr = getConstantPoolItemAddress(DL, CV); - unsigned Alignment = - getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType()); - - return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(), - PoolAddr, - MachinePointerInfo::getConstantPool(), MemType, - /* isVolatile = */ false, - /* isNonTemporal = */ false, - Alignment).getNode(); -} - -SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) { - SDLoc DL(Node); - const ConstantFP *FV = cast(Node)->getConstantFPValue(); - EVT DestType = Node->getValueType(0); - - unsigned Alignment = - getTargetLowering()->getDataLayout()->getABITypeAlignment(FV->getType()); - SDValue PoolAddr = getConstantPoolItemAddress(DL, FV); - - return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr, - MachinePointerInfo::getConstantPool(), - /* isVolatile = */ false, - /* isNonTemporal = */ false, - /* isInvariant = */ true, - Alignment).getNode(); -} - -bool -AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos, - unsigned RegWidth) { - const ConstantSDNode *CN = dyn_cast(N); - if (!CN) return false; - - uint64_t Val = CN->getZExtValue(); - - if (!isPowerOf2_64(Val)) return false; - - unsigned TestedBit = Log2_64(Val); - // Checks above should have guaranteed that we haven't lost information in - // finding TestedBit, but it must still be in range. - if (TestedBit >= RegWidth) return false; - - FixedPos = CurDAG->getTargetConstant(TestedBit, MVT::i64); - return true; -} - -SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8, - unsigned Op16,unsigned Op32, - unsigned Op64) { - // Mostly direct translation to the given operations, except that we preserve - // the AtomicOrdering for use later on. - AtomicSDNode *AN = cast(Node); - EVT VT = AN->getMemoryVT(); - - unsigned Op; - if (VT == MVT::i8) - Op = Op8; - else if (VT == MVT::i16) - Op = Op16; - else if (VT == MVT::i32) - Op = Op32; - else if (VT == MVT::i64) - Op = Op64; - else - llvm_unreachable("Unexpected atomic operation"); - - SmallVector Ops; - for (unsigned i = 1; i < AN->getNumOperands(); ++i) - Ops.push_back(AN->getOperand(i)); - - Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32)); - Ops.push_back(AN->getOperand(0)); // Chain moves to the end - - return CurDAG->SelectNodeTo(Node, Op, AN->getValueType(0), MVT::Other, Ops); -} - -SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef Regs) { - static unsigned RegClassIDs[] = { AArch64::DPairRegClassID, - AArch64::DTripleRegClassID, - AArch64::DQuadRegClassID }; - static unsigned SubRegs[] = { AArch64::dsub_0, AArch64::dsub_1, - AArch64::dsub_2, AArch64::dsub_3 }; - - return createTuple(Regs, RegClassIDs, SubRegs); -} - -SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef Regs) { - static unsigned RegClassIDs[] = { AArch64::QPairRegClassID, - AArch64::QTripleRegClassID, - AArch64::QQuadRegClassID }; - static unsigned SubRegs[] = { AArch64::qsub_0, AArch64::qsub_1, - AArch64::qsub_2, AArch64::qsub_3 }; - - return createTuple(Regs, RegClassIDs, SubRegs); -} - -SDValue AArch64DAGToDAGISel::createTuple(ArrayRef Regs, - unsigned RegClassIDs[], - unsigned SubRegs[]) { - // There's no special register-class for a vector-list of 1 element: it's just - // a vector. - if (Regs.size() == 1) - return Regs[0]; - - assert(Regs.size() >= 2 && Regs.size() <= 4); - - SDLoc DL(Regs[0].getNode()); - - SmallVector Ops; - - // First operand of REG_SEQUENCE is the desired RegClass. - Ops.push_back( - CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32)); - - // Then we get pairs of source & subregister-position for the components. - for (unsigned i = 0; i < Regs.size(); ++i) { - Ops.push_back(Regs[i]); - Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32)); - } - - SDNode *N = - CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); - return SDValue(N, 0); -} - - -// Get the register stride update opcode of a VLD/VST instruction that -// is otherwise equivalent to the given fixed stride updating instruction. -static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { - switch (Opc) { - default: break; - case AArch64::LD1WB_8B_fixed: return AArch64::LD1WB_8B_register; - case AArch64::LD1WB_4H_fixed: return AArch64::LD1WB_4H_register; - case AArch64::LD1WB_2S_fixed: return AArch64::LD1WB_2S_register; - case AArch64::LD1WB_1D_fixed: return AArch64::LD1WB_1D_register; - case AArch64::LD1WB_16B_fixed: return AArch64::LD1WB_16B_register; - case AArch64::LD1WB_8H_fixed: return AArch64::LD1WB_8H_register; - case AArch64::LD1WB_4S_fixed: return AArch64::LD1WB_4S_register; - case AArch64::LD1WB_2D_fixed: return AArch64::LD1WB_2D_register; - - case AArch64::LD2WB_8B_fixed: return AArch64::LD2WB_8B_register; - case AArch64::LD2WB_4H_fixed: return AArch64::LD2WB_4H_register; - case AArch64::LD2WB_2S_fixed: return AArch64::LD2WB_2S_register; - case AArch64::LD2WB_16B_fixed: return AArch64::LD2WB_16B_register; - case AArch64::LD2WB_8H_fixed: return AArch64::LD2WB_8H_register; - case AArch64::LD2WB_4S_fixed: return AArch64::LD2WB_4S_register; - case AArch64::LD2WB_2D_fixed: return AArch64::LD2WB_2D_register; - - case AArch64::LD3WB_8B_fixed: return AArch64::LD3WB_8B_register; - case AArch64::LD3WB_4H_fixed: return AArch64::LD3WB_4H_register; - case AArch64::LD3WB_2S_fixed: return AArch64::LD3WB_2S_register; - case AArch64::LD3WB_16B_fixed: return AArch64::LD3WB_16B_register; - case AArch64::LD3WB_8H_fixed: return AArch64::LD3WB_8H_register; - case AArch64::LD3WB_4S_fixed: return AArch64::LD3WB_4S_register; - case AArch64::LD3WB_2D_fixed: return AArch64::LD3WB_2D_register; - - case AArch64::LD4WB_8B_fixed: return AArch64::LD4WB_8B_register; - case AArch64::LD4WB_4H_fixed: return AArch64::LD4WB_4H_register; - case AArch64::LD4WB_2S_fixed: return AArch64::LD4WB_2S_register; - case AArch64::LD4WB_16B_fixed: return AArch64::LD4WB_16B_register; - case AArch64::LD4WB_8H_fixed: return AArch64::LD4WB_8H_register; - case AArch64::LD4WB_4S_fixed: return AArch64::LD4WB_4S_register; - case AArch64::LD4WB_2D_fixed: return AArch64::LD4WB_2D_register; - - case AArch64::LD1x2WB_8B_fixed: return AArch64::LD1x2WB_8B_register; - case AArch64::LD1x2WB_4H_fixed: return AArch64::LD1x2WB_4H_register; - case AArch64::LD1x2WB_2S_fixed: return AArch64::LD1x2WB_2S_register; - case AArch64::LD1x2WB_1D_fixed: return AArch64::LD1x2WB_1D_register; - case AArch64::LD1x2WB_16B_fixed: return AArch64::LD1x2WB_16B_register; - case AArch64::LD1x2WB_8H_fixed: return AArch64::LD1x2WB_8H_register; - case AArch64::LD1x2WB_4S_fixed: return AArch64::LD1x2WB_4S_register; - case AArch64::LD1x2WB_2D_fixed: return AArch64::LD1x2WB_2D_register; - - case AArch64::LD1x3WB_8B_fixed: return AArch64::LD1x3WB_8B_register; - case AArch64::LD1x3WB_4H_fixed: return AArch64::LD1x3WB_4H_register; - case AArch64::LD1x3WB_2S_fixed: return AArch64::LD1x3WB_2S_register; - case AArch64::LD1x3WB_1D_fixed: return AArch64::LD1x3WB_1D_register; - case AArch64::LD1x3WB_16B_fixed: return AArch64::LD1x3WB_16B_register; - case AArch64::LD1x3WB_8H_fixed: return AArch64::LD1x3WB_8H_register; - case AArch64::LD1x3WB_4S_fixed: return AArch64::LD1x3WB_4S_register; - case AArch64::LD1x3WB_2D_fixed: return AArch64::LD1x3WB_2D_register; - - case AArch64::LD1x4WB_8B_fixed: return AArch64::LD1x4WB_8B_register; - case AArch64::LD1x4WB_4H_fixed: return AArch64::LD1x4WB_4H_register; - case AArch64::LD1x4WB_2S_fixed: return AArch64::LD1x4WB_2S_register; - case AArch64::LD1x4WB_1D_fixed: return AArch64::LD1x4WB_1D_register; - case AArch64::LD1x4WB_16B_fixed: return AArch64::LD1x4WB_16B_register; - case AArch64::LD1x4WB_8H_fixed: return AArch64::LD1x4WB_8H_register; - case AArch64::LD1x4WB_4S_fixed: return AArch64::LD1x4WB_4S_register; - case AArch64::LD1x4WB_2D_fixed: return AArch64::LD1x4WB_2D_register; - - case AArch64::ST1WB_8B_fixed: return AArch64::ST1WB_8B_register; - case AArch64::ST1WB_4H_fixed: return AArch64::ST1WB_4H_register; - case AArch64::ST1WB_2S_fixed: return AArch64::ST1WB_2S_register; - case AArch64::ST1WB_1D_fixed: return AArch64::ST1WB_1D_register; - case AArch64::ST1WB_16B_fixed: return AArch64::ST1WB_16B_register; - case AArch64::ST1WB_8H_fixed: return AArch64::ST1WB_8H_register; - case AArch64::ST1WB_4S_fixed: return AArch64::ST1WB_4S_register; - case AArch64::ST1WB_2D_fixed: return AArch64::ST1WB_2D_register; - - case AArch64::ST2WB_8B_fixed: return AArch64::ST2WB_8B_register; - case AArch64::ST2WB_4H_fixed: return AArch64::ST2WB_4H_register; - case AArch64::ST2WB_2S_fixed: return AArch64::ST2WB_2S_register; - case AArch64::ST2WB_16B_fixed: return AArch64::ST2WB_16B_register; - case AArch64::ST2WB_8H_fixed: return AArch64::ST2WB_8H_register; - case AArch64::ST2WB_4S_fixed: return AArch64::ST2WB_4S_register; - case AArch64::ST2WB_2D_fixed: return AArch64::ST2WB_2D_register; - - case AArch64::ST3WB_8B_fixed: return AArch64::ST3WB_8B_register; - case AArch64::ST3WB_4H_fixed: return AArch64::ST3WB_4H_register; - case AArch64::ST3WB_2S_fixed: return AArch64::ST3WB_2S_register; - case AArch64::ST3WB_16B_fixed: return AArch64::ST3WB_16B_register; - case AArch64::ST3WB_8H_fixed: return AArch64::ST3WB_8H_register; - case AArch64::ST3WB_4S_fixed: return AArch64::ST3WB_4S_register; - case AArch64::ST3WB_2D_fixed: return AArch64::ST3WB_2D_register; - - case AArch64::ST4WB_8B_fixed: return AArch64::ST4WB_8B_register; - case AArch64::ST4WB_4H_fixed: return AArch64::ST4WB_4H_register; - case AArch64::ST4WB_2S_fixed: return AArch64::ST4WB_2S_register; - case AArch64::ST4WB_16B_fixed: return AArch64::ST4WB_16B_register; - case AArch64::ST4WB_8H_fixed: return AArch64::ST4WB_8H_register; - case AArch64::ST4WB_4S_fixed: return AArch64::ST4WB_4S_register; - case AArch64::ST4WB_2D_fixed: return AArch64::ST4WB_2D_register; - - case AArch64::ST1x2WB_8B_fixed: return AArch64::ST1x2WB_8B_register; - case AArch64::ST1x2WB_4H_fixed: return AArch64::ST1x2WB_4H_register; - case AArch64::ST1x2WB_2S_fixed: return AArch64::ST1x2WB_2S_register; - case AArch64::ST1x2WB_1D_fixed: return AArch64::ST1x2WB_1D_register; - case AArch64::ST1x2WB_16B_fixed: return AArch64::ST1x2WB_16B_register; - case AArch64::ST1x2WB_8H_fixed: return AArch64::ST1x2WB_8H_register; - case AArch64::ST1x2WB_4S_fixed: return AArch64::ST1x2WB_4S_register; - case AArch64::ST1x2WB_2D_fixed: return AArch64::ST1x2WB_2D_register; - - case AArch64::ST1x3WB_8B_fixed: return AArch64::ST1x3WB_8B_register; - case AArch64::ST1x3WB_4H_fixed: return AArch64::ST1x3WB_4H_register; - case AArch64::ST1x3WB_2S_fixed: return AArch64::ST1x3WB_2S_register; - case AArch64::ST1x3WB_1D_fixed: return AArch64::ST1x3WB_1D_register; - case AArch64::ST1x3WB_16B_fixed: return AArch64::ST1x3WB_16B_register; - case AArch64::ST1x3WB_8H_fixed: return AArch64::ST1x3WB_8H_register; - case AArch64::ST1x3WB_4S_fixed: return AArch64::ST1x3WB_4S_register; - case AArch64::ST1x3WB_2D_fixed: return AArch64::ST1x3WB_2D_register; - - case AArch64::ST1x4WB_8B_fixed: return AArch64::ST1x4WB_8B_register; - case AArch64::ST1x4WB_4H_fixed: return AArch64::ST1x4WB_4H_register; - case AArch64::ST1x4WB_2S_fixed: return AArch64::ST1x4WB_2S_register; - case AArch64::ST1x4WB_1D_fixed: return AArch64::ST1x4WB_1D_register; - case AArch64::ST1x4WB_16B_fixed: return AArch64::ST1x4WB_16B_register; - case AArch64::ST1x4WB_8H_fixed: return AArch64::ST1x4WB_8H_register; - case AArch64::ST1x4WB_4S_fixed: return AArch64::ST1x4WB_4S_register; - case AArch64::ST1x4WB_2D_fixed: return AArch64::ST1x4WB_2D_register; - - // Post-index of duplicate loads - case AArch64::LD2R_WB_8B_fixed: return AArch64::LD2R_WB_8B_register; - case AArch64::LD2R_WB_4H_fixed: return AArch64::LD2R_WB_4H_register; - case AArch64::LD2R_WB_2S_fixed: return AArch64::LD2R_WB_2S_register; - case AArch64::LD2R_WB_1D_fixed: return AArch64::LD2R_WB_1D_register; - case AArch64::LD2R_WB_16B_fixed: return AArch64::LD2R_WB_16B_register; - case AArch64::LD2R_WB_8H_fixed: return AArch64::LD2R_WB_8H_register; - case AArch64::LD2R_WB_4S_fixed: return AArch64::LD2R_WB_4S_register; - case AArch64::LD2R_WB_2D_fixed: return AArch64::LD2R_WB_2D_register; - - case AArch64::LD3R_WB_8B_fixed: return AArch64::LD3R_WB_8B_register; - case AArch64::LD3R_WB_4H_fixed: return AArch64::LD3R_WB_4H_register; - case AArch64::LD3R_WB_2S_fixed: return AArch64::LD3R_WB_2S_register; - case AArch64::LD3R_WB_1D_fixed: return AArch64::LD3R_WB_1D_register; - case AArch64::LD3R_WB_16B_fixed: return AArch64::LD3R_WB_16B_register; - case AArch64::LD3R_WB_8H_fixed: return AArch64::LD3R_WB_8H_register; - case AArch64::LD3R_WB_4S_fixed: return AArch64::LD3R_WB_4S_register; - case AArch64::LD3R_WB_2D_fixed: return AArch64::LD3R_WB_2D_register; - - case AArch64::LD4R_WB_8B_fixed: return AArch64::LD4R_WB_8B_register; - case AArch64::LD4R_WB_4H_fixed: return AArch64::LD4R_WB_4H_register; - case AArch64::LD4R_WB_2S_fixed: return AArch64::LD4R_WB_2S_register; - case AArch64::LD4R_WB_1D_fixed: return AArch64::LD4R_WB_1D_register; - case AArch64::LD4R_WB_16B_fixed: return AArch64::LD4R_WB_16B_register; - case AArch64::LD4R_WB_8H_fixed: return AArch64::LD4R_WB_8H_register; - case AArch64::LD4R_WB_4S_fixed: return AArch64::LD4R_WB_4S_register; - case AArch64::LD4R_WB_2D_fixed: return AArch64::LD4R_WB_2D_register; - - // Post-index of lane loads - case AArch64::LD2LN_WB_B_fixed: return AArch64::LD2LN_WB_B_register; - case AArch64::LD2LN_WB_H_fixed: return AArch64::LD2LN_WB_H_register; - case AArch64::LD2LN_WB_S_fixed: return AArch64::LD2LN_WB_S_register; - case AArch64::LD2LN_WB_D_fixed: return AArch64::LD2LN_WB_D_register; - - case AArch64::LD3LN_WB_B_fixed: return AArch64::LD3LN_WB_B_register; - case AArch64::LD3LN_WB_H_fixed: return AArch64::LD3LN_WB_H_register; - case AArch64::LD3LN_WB_S_fixed: return AArch64::LD3LN_WB_S_register; - case AArch64::LD3LN_WB_D_fixed: return AArch64::LD3LN_WB_D_register; - - case AArch64::LD4LN_WB_B_fixed: return AArch64::LD4LN_WB_B_register; - case AArch64::LD4LN_WB_H_fixed: return AArch64::LD4LN_WB_H_register; - case AArch64::LD4LN_WB_S_fixed: return AArch64::LD4LN_WB_S_register; - case AArch64::LD4LN_WB_D_fixed: return AArch64::LD4LN_WB_D_register; - - // Post-index of lane stores - case AArch64::ST2LN_WB_B_fixed: return AArch64::ST2LN_WB_B_register; - case AArch64::ST2LN_WB_H_fixed: return AArch64::ST2LN_WB_H_register; - case AArch64::ST2LN_WB_S_fixed: return AArch64::ST2LN_WB_S_register; - case AArch64::ST2LN_WB_D_fixed: return AArch64::ST2LN_WB_D_register; - - case AArch64::ST3LN_WB_B_fixed: return AArch64::ST3LN_WB_B_register; - case AArch64::ST3LN_WB_H_fixed: return AArch64::ST3LN_WB_H_register; - case AArch64::ST3LN_WB_S_fixed: return AArch64::ST3LN_WB_S_register; - case AArch64::ST3LN_WB_D_fixed: return AArch64::ST3LN_WB_D_register; - - case AArch64::ST4LN_WB_B_fixed: return AArch64::ST4LN_WB_B_register; - case AArch64::ST4LN_WB_H_fixed: return AArch64::ST4LN_WB_H_register; - case AArch64::ST4LN_WB_S_fixed: return AArch64::ST4LN_WB_S_register; - case AArch64::ST4LN_WB_D_fixed: return AArch64::ST4LN_WB_D_register; - } - return Opc; // If not one we handle, return it unchanged. -} - -SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, - unsigned NumVecs, - const uint16_t *Opcodes) { - assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); - - EVT VT = N->getValueType(0); - unsigned OpcodeIndex; - bool is64BitVector = VT.is64BitVector(); - switch (VT.getScalarType().getSizeInBits()) { - case 8: OpcodeIndex = is64BitVector ? 0 : 4; break; - case 16: OpcodeIndex = is64BitVector ? 1 : 5; break; - case 32: OpcodeIndex = is64BitVector ? 2 : 6; break; - case 64: OpcodeIndex = is64BitVector ? 3 : 7; break; - default: llvm_unreachable("unhandled vector load type"); - } - unsigned Opc = Opcodes[OpcodeIndex]; - - SmallVector Ops; - unsigned AddrOpIdx = isUpdating ? 1 : 2; - Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address - - if (isUpdating) { - SDValue Inc = N->getOperand(AddrOpIdx + 1); - if (!isa(Inc.getNode())) // Increment in Register - Opc = getVLDSTRegisterUpdateOpcode(Opc); - Ops.push_back(Inc); - } - - Ops.push_back(N->getOperand(0)); // Push back the Chain - - SmallVector ResTys; - // Push back the type of return super register - if (NumVecs == 1) - ResTys.push_back(VT); - else if (NumVecs == 3) - ResTys.push_back(MVT::Untyped); - else { - EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, - is64BitVector ? NumVecs : NumVecs * 2); - ResTys.push_back(ResTy); - } - - if (isUpdating) - ResTys.push_back(MVT::i64); // Type of the updated register - ResTys.push_back(MVT::Other); // Type of the Chain - SDLoc dl(N); - SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); - - // Transfer memoperands. - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast(N)->getMemOperand(); - cast(VLd)->setMemRefs(MemOp, MemOp + 1); - - if (NumVecs == 1) - return VLd; - - // If NumVecs > 1, the return result is a super register containing 2-4 - // consecutive vector registers. - SDValue SuperReg = SDValue(VLd, 0); - - unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0; - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - ReplaceUses(SDValue(N, Vec), - CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); - // Update users of the Chain - ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); - if (isUpdating) - ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); - - return nullptr; -} - -SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, - unsigned NumVecs, - const uint16_t *Opcodes) { - assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); - SDLoc dl(N); - - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast(N)->getMemOperand(); - - unsigned AddrOpIdx = isUpdating ? 1 : 2; - unsigned Vec0Idx = 3; - EVT VT = N->getOperand(Vec0Idx).getValueType(); - unsigned OpcodeIndex; - bool is64BitVector = VT.is64BitVector(); - switch (VT.getScalarType().getSizeInBits()) { - case 8: OpcodeIndex = is64BitVector ? 0 : 4; break; - case 16: OpcodeIndex = is64BitVector ? 1 : 5; break; - case 32: OpcodeIndex = is64BitVector ? 2 : 6; break; - case 64: OpcodeIndex = is64BitVector ? 3 : 7; break; - default: llvm_unreachable("unhandled vector store type"); - } - unsigned Opc = Opcodes[OpcodeIndex]; - - SmallVector ResTys; - if (isUpdating) - ResTys.push_back(MVT::i64); - ResTys.push_back(MVT::Other); // Type for the Chain - - SmallVector Ops; - Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address - - if (isUpdating) { - SDValue Inc = N->getOperand(AddrOpIdx + 1); - if (!isa(Inc.getNode())) // Increment in Register - Opc = getVLDSTRegisterUpdateOpcode(Opc); - Ops.push_back(Inc); - } - - SmallVector Regs(N->op_begin() + Vec0Idx, - N->op_begin() + Vec0Idx + NumVecs); - SDValue SrcReg = is64BitVector ? createDTuple(Regs) : createQTuple(Regs); - Ops.push_back(SrcReg); - - // Push back the Chain - Ops.push_back(N->getOperand(0)); - - // Transfer memoperands. - SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); - cast(VSt)->setMemRefs(MemOp, MemOp + 1); - - return VSt; -} - -SDValue -AArch64DAGToDAGISel::getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD, - SDValue Operand) { - SDNode *Reg = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, DL, - VT, VTD, MVT::Other, - CurDAG->getTargetConstant(0, MVT::i64), - Operand, - CurDAG->getTargetConstant(AArch64::sub_64, MVT::i32)); - return SDValue(Reg, 0); -} - -SDNode *AArch64DAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, - unsigned NumVecs, - const uint16_t *Opcodes) { - assert(NumVecs >=2 && NumVecs <= 4 && "Load Dup NumVecs out-of-range"); - SDLoc dl(N); - - EVT VT = N->getValueType(0); - unsigned OpcodeIndex; - bool is64BitVector = VT.is64BitVector(); - switch (VT.getScalarType().getSizeInBits()) { - case 8: OpcodeIndex = is64BitVector ? 0 : 4; break; - case 16: OpcodeIndex = is64BitVector ? 1 : 5; break; - case 32: OpcodeIndex = is64BitVector ? 2 : 6; break; - case 64: OpcodeIndex = is64BitVector ? 3 : 7; break; - default: llvm_unreachable("unhandled vector duplicate lane load type"); - } - unsigned Opc = Opcodes[OpcodeIndex]; - - SDValue SuperReg; - SmallVector Ops; - Ops.push_back(N->getOperand(1)); // Push back the Memory Address - if (isUpdating) { - SDValue Inc = N->getOperand(2); - if (!isa(Inc.getNode())) // Increment in Register - Opc = getVLDSTRegisterUpdateOpcode(Opc); - Ops.push_back(Inc); - } - Ops.push_back(N->getOperand(0)); // Push back the Chain - - SmallVector ResTys; - // Push back the type of return super register - if (NumVecs == 3) - ResTys.push_back(MVT::Untyped); - else { - EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, - is64BitVector ? NumVecs : NumVecs * 2); - ResTys.push_back(ResTy); - } - if (isUpdating) - ResTys.push_back(MVT::i64); // Type of the updated register - ResTys.push_back(MVT::Other); // Type of the Chain - SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); - - // Transfer memoperands. - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast(N)->getMemOperand(); - cast(VLdDup)->setMemRefs(MemOp, MemOp + 1); - - SuperReg = SDValue(VLdDup, 0); - unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0; - // Update uses of each registers in super register - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - ReplaceUses(SDValue(N, Vec), - CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); - // Update uses of the Chain - ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); - if (isUpdating) - ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); - return nullptr; -} - -// We only have 128-bit vector type of load/store lane instructions. -// If it is 64-bit vector, we also select it to the 128-bit instructions. -// Just use SUBREG_TO_REG to adapt the input to 128-bit vector and -// EXTRACT_SUBREG to get the 64-bit vector from the 128-bit vector output. -SDNode *AArch64DAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, - bool isUpdating, unsigned NumVecs, - const uint16_t *Opcodes) { - assert(NumVecs >= 2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); - SDLoc dl(N); - unsigned AddrOpIdx = isUpdating ? 1 : 2; - unsigned Vec0Idx = 3; - - SDValue Chain = N->getOperand(0); - unsigned Lane = - cast(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); - EVT VT = N->getOperand(Vec0Idx).getValueType(); - bool is64BitVector = VT.is64BitVector(); - EVT VT64; // 64-bit Vector Type - - if (is64BitVector) { - VT64 = VT; - VT = EVT::getVectorVT(*CurDAG->getContext(), VT.getVectorElementType(), - VT.getVectorNumElements() * 2); - } - - unsigned OpcodeIndex; - switch (VT.getScalarType().getSizeInBits()) { - case 8: OpcodeIndex = 0; break; - case 16: OpcodeIndex = 1; break; - case 32: OpcodeIndex = 2; break; - case 64: OpcodeIndex = 3; break; - default: llvm_unreachable("unhandled vector lane load/store type"); - } - unsigned Opc = Opcodes[OpcodeIndex]; - - SmallVector ResTys; - if (IsLoad) { - // Push back the type of return super register - if (NumVecs == 3) - ResTys.push_back(MVT::Untyped); - else { - EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, - is64BitVector ? NumVecs : NumVecs * 2); - ResTys.push_back(ResTy); - } - } - if (isUpdating) - ResTys.push_back(MVT::i64); // Type of the updated register - ResTys.push_back(MVT::Other); // Type of Chain - SmallVector Ops; - Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address - if (isUpdating) { - SDValue Inc = N->getOperand(AddrOpIdx + 1); - if (!isa(Inc.getNode())) // Increment in Register - Opc = getVLDSTRegisterUpdateOpcode(Opc); - Ops.push_back(Inc); - } - - SmallVector Regs(N->op_begin() + Vec0Idx, - N->op_begin() + Vec0Idx + NumVecs); - if (is64BitVector) - for (unsigned i = 0; i < Regs.size(); i++) - Regs[i] = getTargetSubregToReg(AArch64::sub_64, dl, VT, VT64, Regs[i]); - SDValue SuperReg = createQTuple(Regs); - - Ops.push_back(SuperReg); // Source Reg - SDValue LaneValue = CurDAG->getTargetConstant(Lane, MVT::i32); - Ops.push_back(LaneValue); - Ops.push_back(Chain); // Push back the Chain - - SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast(N)->getMemOperand(); - cast(VLdLn)->setMemRefs(MemOp, MemOp + 1); - if (!IsLoad) - return VLdLn; - - // Extract the subregisters. - SuperReg = SDValue(VLdLn, 0); - unsigned Sub0 = AArch64::qsub_0; - // Update uses of each registers in super register - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDValue SUB0 = CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg); - if (is64BitVector) { - SUB0 = CurDAG->getTargetExtractSubreg(AArch64::sub_64, dl, VT64, SUB0); - } - ReplaceUses(SDValue(N, Vec), SUB0); - } - ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); - if (isUpdating) - ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); - return nullptr; -} - -unsigned AArch64DAGToDAGISel::getTBLOpc(bool IsExt, bool Is64Bit, - unsigned NumOfVec) { - assert(NumOfVec >= 1 && NumOfVec <= 4 && "VST NumVecs out-of-range"); - - unsigned Opc = 0; - switch (NumOfVec) { - default: - break; - case 1: - if (IsExt) - Opc = Is64Bit ? AArch64::TBX1_8b : AArch64::TBX1_16b; - else - Opc = Is64Bit ? AArch64::TBL1_8b : AArch64::TBL1_16b; - break; - case 2: - if (IsExt) - Opc = Is64Bit ? AArch64::TBX2_8b : AArch64::TBX2_16b; - else - Opc = Is64Bit ? AArch64::TBL2_8b : AArch64::TBL2_16b; - break; - case 3: - if (IsExt) - Opc = Is64Bit ? AArch64::TBX3_8b : AArch64::TBX3_16b; - else - Opc = Is64Bit ? AArch64::TBL3_8b : AArch64::TBL3_16b; - break; - case 4: - if (IsExt) - Opc = Is64Bit ? AArch64::TBX4_8b : AArch64::TBX4_16b; - else - Opc = Is64Bit ? AArch64::TBL4_8b : AArch64::TBL4_16b; - break; - } - - return Opc; -} - -SDNode *AArch64DAGToDAGISel::SelectVTBL(SDNode *N, unsigned NumVecs, - bool IsExt) { - assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); - SDLoc dl(N); - - // Check the element of look up table is 64-bit or not - unsigned Vec0Idx = IsExt ? 2 : 1; - assert(!N->getOperand(Vec0Idx + 0).getValueType().is64BitVector() && - "The element of lookup table for vtbl and vtbx must be 128-bit"); - - // Check the return value type is 64-bit or not - EVT ResVT = N->getValueType(0); - bool is64BitRes = ResVT.is64BitVector(); - - // Create new SDValue for vector list - SmallVector Regs(N->op_begin() + Vec0Idx, - N->op_begin() + Vec0Idx + NumVecs); - SDValue TblReg = createQTuple(Regs); - unsigned Opc = getTBLOpc(IsExt, is64BitRes, NumVecs); - - SmallVector Ops; - if (IsExt) - Ops.push_back(N->getOperand(1)); - Ops.push_back(TblReg); - Ops.push_back(N->getOperand(Vec0Idx + NumVecs)); - return CurDAG->getMachineNode(Opc, dl, ResVT, Ops); -} - -SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { - // Dump information about the Node being selected - DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n"); - - if (Node->isMachineOpcode()) { - DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); - Node->setNodeId(-1); - return nullptr; - } - - switch (Node->getOpcode()) { - case ISD::ATOMIC_LOAD_ADD: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_ADD_I8, - AArch64::ATOMIC_LOAD_ADD_I16, - AArch64::ATOMIC_LOAD_ADD_I32, - AArch64::ATOMIC_LOAD_ADD_I64); - case ISD::ATOMIC_LOAD_SUB: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_SUB_I8, - AArch64::ATOMIC_LOAD_SUB_I16, - AArch64::ATOMIC_LOAD_SUB_I32, - AArch64::ATOMIC_LOAD_SUB_I64); - case ISD::ATOMIC_LOAD_AND: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_AND_I8, - AArch64::ATOMIC_LOAD_AND_I16, - AArch64::ATOMIC_LOAD_AND_I32, - AArch64::ATOMIC_LOAD_AND_I64); - case ISD::ATOMIC_LOAD_OR: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_OR_I8, - AArch64::ATOMIC_LOAD_OR_I16, - AArch64::ATOMIC_LOAD_OR_I32, - AArch64::ATOMIC_LOAD_OR_I64); - case ISD::ATOMIC_LOAD_XOR: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_XOR_I8, - AArch64::ATOMIC_LOAD_XOR_I16, - AArch64::ATOMIC_LOAD_XOR_I32, - AArch64::ATOMIC_LOAD_XOR_I64); - case ISD::ATOMIC_LOAD_NAND: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_NAND_I8, - AArch64::ATOMIC_LOAD_NAND_I16, - AArch64::ATOMIC_LOAD_NAND_I32, - AArch64::ATOMIC_LOAD_NAND_I64); - case ISD::ATOMIC_LOAD_MIN: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_MIN_I8, - AArch64::ATOMIC_LOAD_MIN_I16, - AArch64::ATOMIC_LOAD_MIN_I32, - AArch64::ATOMIC_LOAD_MIN_I64); - case ISD::ATOMIC_LOAD_MAX: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_MAX_I8, - AArch64::ATOMIC_LOAD_MAX_I16, - AArch64::ATOMIC_LOAD_MAX_I32, - AArch64::ATOMIC_LOAD_MAX_I64); - case ISD::ATOMIC_LOAD_UMIN: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_UMIN_I8, - AArch64::ATOMIC_LOAD_UMIN_I16, - AArch64::ATOMIC_LOAD_UMIN_I32, - AArch64::ATOMIC_LOAD_UMIN_I64); - case ISD::ATOMIC_LOAD_UMAX: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_UMAX_I8, - AArch64::ATOMIC_LOAD_UMAX_I16, - AArch64::ATOMIC_LOAD_UMAX_I32, - AArch64::ATOMIC_LOAD_UMAX_I64); - case ISD::ATOMIC_SWAP: - return SelectAtomic(Node, - AArch64::ATOMIC_SWAP_I8, - AArch64::ATOMIC_SWAP_I16, - AArch64::ATOMIC_SWAP_I32, - AArch64::ATOMIC_SWAP_I64); - case ISD::ATOMIC_CMP_SWAP: - return SelectAtomic(Node, - AArch64::ATOMIC_CMP_SWAP_I8, - AArch64::ATOMIC_CMP_SWAP_I16, - AArch64::ATOMIC_CMP_SWAP_I32, - AArch64::ATOMIC_CMP_SWAP_I64); - case ISD::FrameIndex: { - int FI = cast(Node)->getIndex(); - EVT PtrTy = getTargetLowering()->getPointerTy(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, PtrTy); - return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy, - TFI, CurDAG->getTargetConstant(0, PtrTy)); - } - case ISD::Constant: { - SDNode *ResNode = nullptr; - if (cast(Node)->getZExtValue() == 0) { - // XZR and WZR are probably even better than an actual move: most of the - // time they can be folded into another instruction with *no* cost. - - EVT Ty = Node->getValueType(0); - assert((Ty == MVT::i32 || Ty == MVT::i64) && "unexpected type"); - uint16_t Register = Ty == MVT::i32 ? AArch64::WZR : AArch64::XZR; - ResNode = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), - SDLoc(Node), - Register, Ty).getNode(); - } - - // Next best option is a move-immediate, see if we can do that. - if (!ResNode) { - ResNode = TrySelectToMoveImm(Node); - } - - if (ResNode) - return ResNode; - - // If even that fails we fall back to a lit-pool entry at the moment. Future - // tuning may change this to a sequence of MOVZ/MOVN/MOVK instructions. - ResNode = SelectToLitPool(Node); - assert(ResNode && "We need *some* way to materialise a constant"); - - // We want to continue selection at this point since the litpool access - // generated used generic nodes for simplicity. - ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); - Node = ResNode; - break; - } - case ISD::ConstantFP: { - if (A64Imms::isFPImm(cast(Node)->getValueAPF())) { - // FMOV will take care of it from TableGen - break; - } - - SDNode *ResNode = LowerToFPLitPool(Node); - ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); - - // We want to continue selection at this point since the litpool access - // generated used generic nodes for simplicity. - Node = ResNode; - break; - } - case AArch64ISD::NEON_LD1_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD1WB_8B_fixed, AArch64::LD1WB_4H_fixed, - AArch64::LD1WB_2S_fixed, AArch64::LD1WB_1D_fixed, - AArch64::LD1WB_16B_fixed, AArch64::LD1WB_8H_fixed, - AArch64::LD1WB_4S_fixed, AArch64::LD1WB_2D_fixed - }; - return SelectVLD(Node, true, 1, Opcodes); - } - case AArch64ISD::NEON_LD2_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD2WB_8B_fixed, AArch64::LD2WB_4H_fixed, - AArch64::LD2WB_2S_fixed, AArch64::LD1x2WB_1D_fixed, - AArch64::LD2WB_16B_fixed, AArch64::LD2WB_8H_fixed, - AArch64::LD2WB_4S_fixed, AArch64::LD2WB_2D_fixed - }; - return SelectVLD(Node, true, 2, Opcodes); - } - case AArch64ISD::NEON_LD3_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD3WB_8B_fixed, AArch64::LD3WB_4H_fixed, - AArch64::LD3WB_2S_fixed, AArch64::LD1x3WB_1D_fixed, - AArch64::LD3WB_16B_fixed, AArch64::LD3WB_8H_fixed, - AArch64::LD3WB_4S_fixed, AArch64::LD3WB_2D_fixed - }; - return SelectVLD(Node, true, 3, Opcodes); - } - case AArch64ISD::NEON_LD4_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD4WB_8B_fixed, AArch64::LD4WB_4H_fixed, - AArch64::LD4WB_2S_fixed, AArch64::LD1x4WB_1D_fixed, - AArch64::LD4WB_16B_fixed, AArch64::LD4WB_8H_fixed, - AArch64::LD4WB_4S_fixed, AArch64::LD4WB_2D_fixed - }; - return SelectVLD(Node, true, 4, Opcodes); - } - case AArch64ISD::NEON_LD1x2_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD1x2WB_8B_fixed, AArch64::LD1x2WB_4H_fixed, - AArch64::LD1x2WB_2S_fixed, AArch64::LD1x2WB_1D_fixed, - AArch64::LD1x2WB_16B_fixed, AArch64::LD1x2WB_8H_fixed, - AArch64::LD1x2WB_4S_fixed, AArch64::LD1x2WB_2D_fixed - }; - return SelectVLD(Node, true, 2, Opcodes); - } - case AArch64ISD::NEON_LD1x3_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD1x3WB_8B_fixed, AArch64::LD1x3WB_4H_fixed, - AArch64::LD1x3WB_2S_fixed, AArch64::LD1x3WB_1D_fixed, - AArch64::LD1x3WB_16B_fixed, AArch64::LD1x3WB_8H_fixed, - AArch64::LD1x3WB_4S_fixed, AArch64::LD1x3WB_2D_fixed - }; - return SelectVLD(Node, true, 3, Opcodes); - } - case AArch64ISD::NEON_LD1x4_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD1x4WB_8B_fixed, AArch64::LD1x4WB_4H_fixed, - AArch64::LD1x4WB_2S_fixed, AArch64::LD1x4WB_1D_fixed, - AArch64::LD1x4WB_16B_fixed, AArch64::LD1x4WB_8H_fixed, - AArch64::LD1x4WB_4S_fixed, AArch64::LD1x4WB_2D_fixed - }; - return SelectVLD(Node, true, 4, Opcodes); - } - case AArch64ISD::NEON_ST1_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST1WB_8B_fixed, AArch64::ST1WB_4H_fixed, - AArch64::ST1WB_2S_fixed, AArch64::ST1WB_1D_fixed, - AArch64::ST1WB_16B_fixed, AArch64::ST1WB_8H_fixed, - AArch64::ST1WB_4S_fixed, AArch64::ST1WB_2D_fixed - }; - return SelectVST(Node, true, 1, Opcodes); - } - case AArch64ISD::NEON_ST2_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST2WB_8B_fixed, AArch64::ST2WB_4H_fixed, - AArch64::ST2WB_2S_fixed, AArch64::ST1x2WB_1D_fixed, - AArch64::ST2WB_16B_fixed, AArch64::ST2WB_8H_fixed, - AArch64::ST2WB_4S_fixed, AArch64::ST2WB_2D_fixed - }; - return SelectVST(Node, true, 2, Opcodes); - } - case AArch64ISD::NEON_ST3_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST3WB_8B_fixed, AArch64::ST3WB_4H_fixed, - AArch64::ST3WB_2S_fixed, AArch64::ST1x3WB_1D_fixed, - AArch64::ST3WB_16B_fixed, AArch64::ST3WB_8H_fixed, - AArch64::ST3WB_4S_fixed, AArch64::ST3WB_2D_fixed - }; - return SelectVST(Node, true, 3, Opcodes); - } - case AArch64ISD::NEON_ST4_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST4WB_8B_fixed, AArch64::ST4WB_4H_fixed, - AArch64::ST4WB_2S_fixed, AArch64::ST1x4WB_1D_fixed, - AArch64::ST4WB_16B_fixed, AArch64::ST4WB_8H_fixed, - AArch64::ST4WB_4S_fixed, AArch64::ST4WB_2D_fixed - }; - return SelectVST(Node, true, 4, Opcodes); - } - case AArch64ISD::NEON_LD2DUP: { - static const uint16_t Opcodes[] = { - AArch64::LD2R_8B, AArch64::LD2R_4H, AArch64::LD2R_2S, - AArch64::LD2R_1D, AArch64::LD2R_16B, AArch64::LD2R_8H, - AArch64::LD2R_4S, AArch64::LD2R_2D - }; - return SelectVLDDup(Node, false, 2, Opcodes); - } - case AArch64ISD::NEON_LD3DUP: { - static const uint16_t Opcodes[] = { - AArch64::LD3R_8B, AArch64::LD3R_4H, AArch64::LD3R_2S, - AArch64::LD3R_1D, AArch64::LD3R_16B, AArch64::LD3R_8H, - AArch64::LD3R_4S, AArch64::LD3R_2D - }; - return SelectVLDDup(Node, false, 3, Opcodes); - } - case AArch64ISD::NEON_LD4DUP: { - static const uint16_t Opcodes[] = { - AArch64::LD4R_8B, AArch64::LD4R_4H, AArch64::LD4R_2S, - AArch64::LD4R_1D, AArch64::LD4R_16B, AArch64::LD4R_8H, - AArch64::LD4R_4S, AArch64::LD4R_2D - }; - return SelectVLDDup(Node, false, 4, Opcodes); - } - case AArch64ISD::NEON_LD2DUP_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD2R_WB_8B_fixed, AArch64::LD2R_WB_4H_fixed, - AArch64::LD2R_WB_2S_fixed, AArch64::LD2R_WB_1D_fixed, - AArch64::LD2R_WB_16B_fixed, AArch64::LD2R_WB_8H_fixed, - AArch64::LD2R_WB_4S_fixed, AArch64::LD2R_WB_2D_fixed - }; - return SelectVLDDup(Node, true, 2, Opcodes); - } - case AArch64ISD::NEON_LD3DUP_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD3R_WB_8B_fixed, AArch64::LD3R_WB_4H_fixed, - AArch64::LD3R_WB_2S_fixed, AArch64::LD3R_WB_1D_fixed, - AArch64::LD3R_WB_16B_fixed, AArch64::LD3R_WB_8H_fixed, - AArch64::LD3R_WB_4S_fixed, AArch64::LD3R_WB_2D_fixed - }; - return SelectVLDDup(Node, true, 3, Opcodes); - } - case AArch64ISD::NEON_LD4DUP_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD4R_WB_8B_fixed, AArch64::LD4R_WB_4H_fixed, - AArch64::LD4R_WB_2S_fixed, AArch64::LD4R_WB_1D_fixed, - AArch64::LD4R_WB_16B_fixed, AArch64::LD4R_WB_8H_fixed, - AArch64::LD4R_WB_4S_fixed, AArch64::LD4R_WB_2D_fixed - }; - return SelectVLDDup(Node, true, 4, Opcodes); - } - case AArch64ISD::NEON_LD2LN_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD2LN_WB_B_fixed, AArch64::LD2LN_WB_H_fixed, - AArch64::LD2LN_WB_S_fixed, AArch64::LD2LN_WB_D_fixed - }; - return SelectVLDSTLane(Node, true, true, 2, Opcodes); - } - case AArch64ISD::NEON_LD3LN_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD3LN_WB_B_fixed, AArch64::LD3LN_WB_H_fixed, - AArch64::LD3LN_WB_S_fixed, AArch64::LD3LN_WB_D_fixed - }; - return SelectVLDSTLane(Node, true, true, 3, Opcodes); - } - case AArch64ISD::NEON_LD4LN_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD4LN_WB_B_fixed, AArch64::LD4LN_WB_H_fixed, - AArch64::LD4LN_WB_S_fixed, AArch64::LD4LN_WB_D_fixed - }; - return SelectVLDSTLane(Node, true, true, 4, Opcodes); - } - case AArch64ISD::NEON_ST2LN_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST2LN_WB_B_fixed, AArch64::ST2LN_WB_H_fixed, - AArch64::ST2LN_WB_S_fixed, AArch64::ST2LN_WB_D_fixed - }; - return SelectVLDSTLane(Node, false, true, 2, Opcodes); - } - case AArch64ISD::NEON_ST3LN_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST3LN_WB_B_fixed, AArch64::ST3LN_WB_H_fixed, - AArch64::ST3LN_WB_S_fixed, AArch64::ST3LN_WB_D_fixed - }; - return SelectVLDSTLane(Node, false, true, 3, Opcodes); - } - case AArch64ISD::NEON_ST4LN_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST4LN_WB_B_fixed, AArch64::ST4LN_WB_H_fixed, - AArch64::ST4LN_WB_S_fixed, AArch64::ST4LN_WB_D_fixed - }; - return SelectVLDSTLane(Node, false, true, 4, Opcodes); - } - case AArch64ISD::NEON_ST1x2_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST1x2WB_8B_fixed, AArch64::ST1x2WB_4H_fixed, - AArch64::ST1x2WB_2S_fixed, AArch64::ST1x2WB_1D_fixed, - AArch64::ST1x2WB_16B_fixed, AArch64::ST1x2WB_8H_fixed, - AArch64::ST1x2WB_4S_fixed, AArch64::ST1x2WB_2D_fixed - }; - return SelectVST(Node, true, 2, Opcodes); - } - case AArch64ISD::NEON_ST1x3_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST1x3WB_8B_fixed, AArch64::ST1x3WB_4H_fixed, - AArch64::ST1x3WB_2S_fixed, AArch64::ST1x3WB_1D_fixed, - AArch64::ST1x3WB_16B_fixed, AArch64::ST1x3WB_8H_fixed, - AArch64::ST1x3WB_4S_fixed, AArch64::ST1x3WB_2D_fixed - }; - return SelectVST(Node, true, 3, Opcodes); - } - case AArch64ISD::NEON_ST1x4_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST1x4WB_8B_fixed, AArch64::ST1x4WB_4H_fixed, - AArch64::ST1x4WB_2S_fixed, AArch64::ST1x4WB_1D_fixed, - AArch64::ST1x4WB_16B_fixed, AArch64::ST1x4WB_8H_fixed, - AArch64::ST1x4WB_4S_fixed, AArch64::ST1x4WB_2D_fixed - }; - return SelectVST(Node, true, 4, Opcodes); - } - case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntNo = cast(Node->getOperand(0))->getZExtValue(); - bool IsExt = false; - switch (IntNo) { - default: - break; - case Intrinsic::aarch64_neon_vtbx1: - IsExt = true; - case Intrinsic::aarch64_neon_vtbl1: - return SelectVTBL(Node, 1, IsExt); - case Intrinsic::aarch64_neon_vtbx2: - IsExt = true; - case Intrinsic::aarch64_neon_vtbl2: - return SelectVTBL(Node, 2, IsExt); - case Intrinsic::aarch64_neon_vtbx3: - IsExt = true; - case Intrinsic::aarch64_neon_vtbl3: - return SelectVTBL(Node, 3, IsExt); - case Intrinsic::aarch64_neon_vtbx4: - IsExt = true; - case Intrinsic::aarch64_neon_vtbl4: - return SelectVTBL(Node, 4, IsExt); - } - break; - } - case ISD::INTRINSIC_VOID: - case ISD::INTRINSIC_W_CHAIN: { - unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); - switch (IntNo) { - default: - break; - case Intrinsic::arm_neon_vld1: { - static const uint16_t Opcodes[] = { - AArch64::LD1_8B, AArch64::LD1_4H, AArch64::LD1_2S, AArch64::LD1_1D, - AArch64::LD1_16B, AArch64::LD1_8H, AArch64::LD1_4S, AArch64::LD1_2D - }; - return SelectVLD(Node, false, 1, Opcodes); - } - case Intrinsic::arm_neon_vld2: { - static const uint16_t Opcodes[] = { - AArch64::LD2_8B, AArch64::LD2_4H, AArch64::LD2_2S, AArch64::LD1x2_1D, - AArch64::LD2_16B, AArch64::LD2_8H, AArch64::LD2_4S, AArch64::LD2_2D - }; - return SelectVLD(Node, false, 2, Opcodes); - } - case Intrinsic::arm_neon_vld3: { - static const uint16_t Opcodes[] = { - AArch64::LD3_8B, AArch64::LD3_4H, AArch64::LD3_2S, AArch64::LD1x3_1D, - AArch64::LD3_16B, AArch64::LD3_8H, AArch64::LD3_4S, AArch64::LD3_2D - }; - return SelectVLD(Node, false, 3, Opcodes); - } - case Intrinsic::arm_neon_vld4: { - static const uint16_t Opcodes[] = { - AArch64::LD4_8B, AArch64::LD4_4H, AArch64::LD4_2S, AArch64::LD1x4_1D, - AArch64::LD4_16B, AArch64::LD4_8H, AArch64::LD4_4S, AArch64::LD4_2D - }; - return SelectVLD(Node, false, 4, Opcodes); - } - case Intrinsic::aarch64_neon_vld1x2: { - static const uint16_t Opcodes[] = { - AArch64::LD1x2_8B, AArch64::LD1x2_4H, AArch64::LD1x2_2S, - AArch64::LD1x2_1D, AArch64::LD1x2_16B, AArch64::LD1x2_8H, - AArch64::LD1x2_4S, AArch64::LD1x2_2D - }; - return SelectVLD(Node, false, 2, Opcodes); - } - case Intrinsic::aarch64_neon_vld1x3: { - static const uint16_t Opcodes[] = { - AArch64::LD1x3_8B, AArch64::LD1x3_4H, AArch64::LD1x3_2S, - AArch64::LD1x3_1D, AArch64::LD1x3_16B, AArch64::LD1x3_8H, - AArch64::LD1x3_4S, AArch64::LD1x3_2D - }; - return SelectVLD(Node, false, 3, Opcodes); - } - case Intrinsic::aarch64_neon_vld1x4: { - static const uint16_t Opcodes[] = { - AArch64::LD1x4_8B, AArch64::LD1x4_4H, AArch64::LD1x4_2S, - AArch64::LD1x4_1D, AArch64::LD1x4_16B, AArch64::LD1x4_8H, - AArch64::LD1x4_4S, AArch64::LD1x4_2D - }; - return SelectVLD(Node, false, 4, Opcodes); - } - case Intrinsic::arm_neon_vst1: { - static const uint16_t Opcodes[] = { - AArch64::ST1_8B, AArch64::ST1_4H, AArch64::ST1_2S, AArch64::ST1_1D, - AArch64::ST1_16B, AArch64::ST1_8H, AArch64::ST1_4S, AArch64::ST1_2D - }; - return SelectVST(Node, false, 1, Opcodes); - } - case Intrinsic::arm_neon_vst2: { - static const uint16_t Opcodes[] = { - AArch64::ST2_8B, AArch64::ST2_4H, AArch64::ST2_2S, AArch64::ST1x2_1D, - AArch64::ST2_16B, AArch64::ST2_8H, AArch64::ST2_4S, AArch64::ST2_2D - }; - return SelectVST(Node, false, 2, Opcodes); - } - case Intrinsic::arm_neon_vst3: { - static const uint16_t Opcodes[] = { - AArch64::ST3_8B, AArch64::ST3_4H, AArch64::ST3_2S, AArch64::ST1x3_1D, - AArch64::ST3_16B, AArch64::ST3_8H, AArch64::ST3_4S, AArch64::ST3_2D - }; - return SelectVST(Node, false, 3, Opcodes); - } - case Intrinsic::arm_neon_vst4: { - static const uint16_t Opcodes[] = { - AArch64::ST4_8B, AArch64::ST4_4H, AArch64::ST4_2S, AArch64::ST1x4_1D, - AArch64::ST4_16B, AArch64::ST4_8H, AArch64::ST4_4S, AArch64::ST4_2D - }; - return SelectVST(Node, false, 4, Opcodes); - } - case Intrinsic::aarch64_neon_vst1x2: { - static const uint16_t Opcodes[] = { - AArch64::ST1x2_8B, AArch64::ST1x2_4H, AArch64::ST1x2_2S, - AArch64::ST1x2_1D, AArch64::ST1x2_16B, AArch64::ST1x2_8H, - AArch64::ST1x2_4S, AArch64::ST1x2_2D - }; - return SelectVST(Node, false, 2, Opcodes); - } - case Intrinsic::aarch64_neon_vst1x3: { - static const uint16_t Opcodes[] = { - AArch64::ST1x3_8B, AArch64::ST1x3_4H, AArch64::ST1x3_2S, - AArch64::ST1x3_1D, AArch64::ST1x3_16B, AArch64::ST1x3_8H, - AArch64::ST1x3_4S, AArch64::ST1x3_2D - }; - return SelectVST(Node, false, 3, Opcodes); - } - case Intrinsic::aarch64_neon_vst1x4: { - static const uint16_t Opcodes[] = { - AArch64::ST1x4_8B, AArch64::ST1x4_4H, AArch64::ST1x4_2S, - AArch64::ST1x4_1D, AArch64::ST1x4_16B, AArch64::ST1x4_8H, - AArch64::ST1x4_4S, AArch64::ST1x4_2D - }; - return SelectVST(Node, false, 4, Opcodes); - } - case Intrinsic::arm_neon_vld2lane: { - static const uint16_t Opcodes[] = { - AArch64::LD2LN_B, AArch64::LD2LN_H, AArch64::LD2LN_S, AArch64::LD2LN_D - }; - return SelectVLDSTLane(Node, true, false, 2, Opcodes); - } - case Intrinsic::arm_neon_vld3lane: { - static const uint16_t Opcodes[] = { - AArch64::LD3LN_B, AArch64::LD3LN_H, AArch64::LD3LN_S, AArch64::LD3LN_D - }; - return SelectVLDSTLane(Node, true, false, 3, Opcodes); - } - case Intrinsic::arm_neon_vld4lane: { - static const uint16_t Opcodes[] = { - AArch64::LD4LN_B, AArch64::LD4LN_H, AArch64::LD4LN_S, AArch64::LD4LN_D - }; - return SelectVLDSTLane(Node, true, false, 4, Opcodes); - } - case Intrinsic::arm_neon_vst2lane: { - static const uint16_t Opcodes[] = { - AArch64::ST2LN_B, AArch64::ST2LN_H, AArch64::ST2LN_S, AArch64::ST2LN_D - }; - return SelectVLDSTLane(Node, false, false, 2, Opcodes); - } - case Intrinsic::arm_neon_vst3lane: { - static const uint16_t Opcodes[] = { - AArch64::ST3LN_B, AArch64::ST3LN_H, AArch64::ST3LN_S, AArch64::ST3LN_D - }; - return SelectVLDSTLane(Node, false, false, 3, Opcodes); - } - case Intrinsic::arm_neon_vst4lane: { - static const uint16_t Opcodes[] = { - AArch64::ST4LN_B, AArch64::ST4LN_H, AArch64::ST4LN_S, AArch64::ST4LN_D - }; - return SelectVLDSTLane(Node, false, false, 4, Opcodes); - } - } // End of switch IntNo - break; - } // End of case ISD::INTRINSIC_VOID and :ISD::INTRINSIC_W_CHAIN - default: - break; // Let generic code handle it - } - - SDNode *ResNode = SelectCode(Node); - - DEBUG(dbgs() << "=> "; - if (ResNode == nullptr || ResNode == Node) - Node->dump(CurDAG); - else - ResNode->dump(CurDAG); - dbgs() << "\n"); - - return ResNode; -} - -/// This pass converts a legalized DAG into a AArch64-specific DAG, ready for -/// instruction scheduling. -FunctionPass *llvm::createAArch64ISelDAG(AArch64TargetMachine &TM, - CodeGenOpt::Level OptLevel) { - return new AArch64DAGToDAGISel(TM, OptLevel); -} diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp deleted file mode 100644 index d02a03ccb2a7..000000000000 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ /dev/null @@ -1,5564 +0,0 @@ -//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation -----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interfaces that AArch64 uses to lower LLVM code into a -// selection DAG. -// -//===----------------------------------------------------------------------===// - -#include "AArch64.h" -#include "AArch64ISelLowering.h" -#include "AArch64MachineFunctionInfo.h" -#include "AArch64Subtarget.h" -#include "AArch64TargetMachine.h" -#include "AArch64TargetObjectFile.h" -#include "Utils/AArch64BaseInfo.h" -#include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/IR/CallingConv.h" -#include "llvm/Support/MathExtras.h" - -using namespace llvm; - -#define DEBUG_TYPE "aarch64-isel" - -static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) { - assert (TM.getSubtarget().isTargetELF() && - "unknown subtarget type"); - return new AArch64ElfTargetObjectFile(); -} - -AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) - : TargetLowering(TM, createTLOF(TM)), Itins(TM.getInstrItineraryData()) { - - const AArch64Subtarget *Subtarget = &TM.getSubtarget(); - - // SIMD compares set the entire lane's bits to 1 - setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); - - // Scalar register <-> type mapping - addRegisterClass(MVT::i32, &AArch64::GPR32RegClass); - addRegisterClass(MVT::i64, &AArch64::GPR64RegClass); - - if (Subtarget->hasFPARMv8()) { - addRegisterClass(MVT::f16, &AArch64::FPR16RegClass); - addRegisterClass(MVT::f32, &AArch64::FPR32RegClass); - addRegisterClass(MVT::f64, &AArch64::FPR64RegClass); - addRegisterClass(MVT::f128, &AArch64::FPR128RegClass); - } - - if (Subtarget->hasNEON()) { - // And the vectors - addRegisterClass(MVT::v1i8, &AArch64::FPR8RegClass); - addRegisterClass(MVT::v1i16, &AArch64::FPR16RegClass); - addRegisterClass(MVT::v1i32, &AArch64::FPR32RegClass); - addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass); - addRegisterClass(MVT::v1f64, &AArch64::FPR64RegClass); - addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass); - addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass); - addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass); - addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass); - addRegisterClass(MVT::v2f32, &AArch64::FPR64RegClass); - addRegisterClass(MVT::v16i8, &AArch64::FPR128RegClass); - addRegisterClass(MVT::v8i16, &AArch64::FPR128RegClass); - addRegisterClass(MVT::v4i32, &AArch64::FPR128RegClass); - addRegisterClass(MVT::v2i64, &AArch64::FPR128RegClass); - addRegisterClass(MVT::v4f32, &AArch64::FPR128RegClass); - addRegisterClass(MVT::v2f64, &AArch64::FPR128RegClass); - } - - computeRegisterProperties(); - - // We combine OR nodes for bitfield and NEON BSL operations. - setTargetDAGCombine(ISD::OR); - - setTargetDAGCombine(ISD::AND); - setTargetDAGCombine(ISD::SRA); - setTargetDAGCombine(ISD::SRL); - setTargetDAGCombine(ISD::SHL); - - setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); - setTargetDAGCombine(ISD::INTRINSIC_VOID); - setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); - - // AArch64 does not have i1 loads, or much of anything for i1 really. - setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); - - setStackPointerRegisterToSaveRestore(AArch64::XSP); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); - setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); - - // We'll lower globals to wrappers for selection. - setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); - setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); - - // A64 instructions have the comparison predicate attached to the user of the - // result, but having a separate comparison is valuable for matching. - setOperationAction(ISD::BR_CC, MVT::i32, Custom); - setOperationAction(ISD::BR_CC, MVT::i64, Custom); - setOperationAction(ISD::BR_CC, MVT::f32, Custom); - setOperationAction(ISD::BR_CC, MVT::f64, Custom); - - setOperationAction(ISD::SELECT, MVT::i32, Custom); - setOperationAction(ISD::SELECT, MVT::i64, Custom); - setOperationAction(ISD::SELECT, MVT::f32, Custom); - setOperationAction(ISD::SELECT, MVT::f64, Custom); - - setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); - setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); - - setOperationAction(ISD::BRCOND, MVT::Other, Custom); - - setOperationAction(ISD::SETCC, MVT::i32, Custom); - setOperationAction(ISD::SETCC, MVT::i64, Custom); - setOperationAction(ISD::SETCC, MVT::f32, Custom); - setOperationAction(ISD::SETCC, MVT::f64, Custom); - - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::JumpTable, MVT::i32, Custom); - setOperationAction(ISD::JumpTable, MVT::i64, Custom); - - setOperationAction(ISD::VASTART, MVT::Other, Custom); - setOperationAction(ISD::VACOPY, MVT::Other, Custom); - setOperationAction(ISD::VAEND, MVT::Other, Expand); - setOperationAction(ISD::VAARG, MVT::Other, Expand); - - setOperationAction(ISD::BlockAddress, MVT::i64, Custom); - setOperationAction(ISD::ConstantPool, MVT::i64, Custom); - - setOperationAction(ISD::ROTL, MVT::i32, Expand); - setOperationAction(ISD::ROTL, MVT::i64, Expand); - - setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::UREM, MVT::i64, Expand); - setOperationAction(ISD::UDIVREM, MVT::i32, Expand); - setOperationAction(ISD::UDIVREM, MVT::i64, Expand); - - setOperationAction(ISD::SREM, MVT::i32, Expand); - setOperationAction(ISD::SREM, MVT::i64, Expand); - setOperationAction(ISD::SDIVREM, MVT::i32, Expand); - setOperationAction(ISD::SDIVREM, MVT::i64, Expand); - - setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); - - setOperationAction(ISD::CTPOP, MVT::i32, Expand); - setOperationAction(ISD::CTPOP, MVT::i64, Expand); - - // Legal floating-point operations. - setOperationAction(ISD::FABS, MVT::f32, Legal); - setOperationAction(ISD::FABS, MVT::f64, Legal); - - setOperationAction(ISD::FCEIL, MVT::f32, Legal); - setOperationAction(ISD::FCEIL, MVT::f64, Legal); - - setOperationAction(ISD::FFLOOR, MVT::f32, Legal); - setOperationAction(ISD::FFLOOR, MVT::f64, Legal); - - setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); - - setOperationAction(ISD::FNEG, MVT::f32, Legal); - setOperationAction(ISD::FNEG, MVT::f64, Legal); - - setOperationAction(ISD::FRINT, MVT::f32, Legal); - setOperationAction(ISD::FRINT, MVT::f64, Legal); - - setOperationAction(ISD::FSQRT, MVT::f32, Legal); - setOperationAction(ISD::FSQRT, MVT::f64, Legal); - - setOperationAction(ISD::FTRUNC, MVT::f32, Legal); - setOperationAction(ISD::FTRUNC, MVT::f64, Legal); - - setOperationAction(ISD::ConstantFP, MVT::f32, Legal); - setOperationAction(ISD::ConstantFP, MVT::f64, Legal); - setOperationAction(ISD::ConstantFP, MVT::f128, Legal); - - // Illegal floating-point operations. - setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); - - setOperationAction(ISD::FCOS, MVT::f32, Expand); - setOperationAction(ISD::FCOS, MVT::f64, Expand); - - setOperationAction(ISD::FEXP, MVT::f32, Expand); - setOperationAction(ISD::FEXP, MVT::f64, Expand); - - setOperationAction(ISD::FEXP2, MVT::f32, Expand); - setOperationAction(ISD::FEXP2, MVT::f64, Expand); - - setOperationAction(ISD::FLOG, MVT::f32, Expand); - setOperationAction(ISD::FLOG, MVT::f64, Expand); - - setOperationAction(ISD::FLOG2, MVT::f32, Expand); - setOperationAction(ISD::FLOG2, MVT::f64, Expand); - - setOperationAction(ISD::FLOG10, MVT::f32, Expand); - setOperationAction(ISD::FLOG10, MVT::f64, Expand); - - setOperationAction(ISD::FPOW, MVT::f32, Expand); - setOperationAction(ISD::FPOW, MVT::f64, Expand); - - setOperationAction(ISD::FPOWI, MVT::f32, Expand); - setOperationAction(ISD::FPOWI, MVT::f64, Expand); - - setOperationAction(ISD::FREM, MVT::f32, Expand); - setOperationAction(ISD::FREM, MVT::f64, Expand); - - setOperationAction(ISD::FSIN, MVT::f32, Expand); - setOperationAction(ISD::FSIN, MVT::f64, Expand); - - setOperationAction(ISD::FSINCOS, MVT::f32, Expand); - setOperationAction(ISD::FSINCOS, MVT::f64, Expand); - - // Virtually no operation on f128 is legal, but LLVM can't expand them when - // there's a valid register class, so we need custom operations in most cases. - setOperationAction(ISD::FABS, MVT::f128, Expand); - setOperationAction(ISD::FADD, MVT::f128, Custom); - setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); - setOperationAction(ISD::FCOS, MVT::f128, Expand); - setOperationAction(ISD::FDIV, MVT::f128, Custom); - setOperationAction(ISD::FMA, MVT::f128, Expand); - setOperationAction(ISD::FMUL, MVT::f128, Custom); - setOperationAction(ISD::FNEG, MVT::f128, Expand); - setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand); - setOperationAction(ISD::FP_ROUND, MVT::f128, Expand); - setOperationAction(ISD::FPOW, MVT::f128, Expand); - setOperationAction(ISD::FREM, MVT::f128, Expand); - setOperationAction(ISD::FRINT, MVT::f128, Expand); - setOperationAction(ISD::FSIN, MVT::f128, Expand); - setOperationAction(ISD::FSINCOS, MVT::f128, Expand); - setOperationAction(ISD::FSQRT, MVT::f128, Expand); - setOperationAction(ISD::FSUB, MVT::f128, Custom); - setOperationAction(ISD::FTRUNC, MVT::f128, Expand); - setOperationAction(ISD::SETCC, MVT::f128, Custom); - setOperationAction(ISD::BR_CC, MVT::f128, Custom); - setOperationAction(ISD::SELECT, MVT::f128, Expand); - setOperationAction(ISD::SELECT_CC, MVT::f128, Custom); - setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); - - // Lowering for many of the conversions is actually specified by the non-f128 - // type. The LowerXXX function will be trivial when f128 isn't involved. - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom); - setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); - setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); - - // i128 shift operation support - setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); - setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); - setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); - - // This prevents LLVM trying to compress double constants into a floating - // constant-pool entry and trying to load from there. It's of doubtful benefit - // for A64: we'd need LDR followed by FCVT, I believe. - setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand); - - setTruncStoreAction(MVT::f128, MVT::f64, Expand); - setTruncStoreAction(MVT::f128, MVT::f32, Expand); - setTruncStoreAction(MVT::f128, MVT::f16, Expand); - setTruncStoreAction(MVT::f64, MVT::f32, Expand); - setTruncStoreAction(MVT::f64, MVT::f16, Expand); - setTruncStoreAction(MVT::f32, MVT::f16, Expand); - - setExceptionPointerRegister(AArch64::X0); - setExceptionSelectorRegister(AArch64::X1); - - if (Subtarget->hasNEON()) { - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v1i64, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v16i8, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Expand); - - setOperationAction(ISD::BUILD_VECTOR, MVT::v1i8, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v1i16, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v1i32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v1f64, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); - - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f32, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1f64, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); - - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Legal); - - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i8, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); - - setOperationAction(ISD::SETCC, MVT::v8i8, Custom); - setOperationAction(ISD::SETCC, MVT::v16i8, Custom); - setOperationAction(ISD::SETCC, MVT::v4i16, Custom); - setOperationAction(ISD::SETCC, MVT::v8i16, Custom); - setOperationAction(ISD::SETCC, MVT::v2i32, Custom); - setOperationAction(ISD::SETCC, MVT::v4i32, Custom); - setOperationAction(ISD::SETCC, MVT::v1i64, Custom); - setOperationAction(ISD::SETCC, MVT::v2i64, Custom); - setOperationAction(ISD::SETCC, MVT::v2f32, Custom); - setOperationAction(ISD::SETCC, MVT::v4f32, Custom); - setOperationAction(ISD::SETCC, MVT::v1f64, Custom); - setOperationAction(ISD::SETCC, MVT::v2f64, Custom); - - setOperationAction(ISD::FFLOOR, MVT::v2f32, Legal); - setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); - setOperationAction(ISD::FFLOOR, MVT::v1f64, Legal); - setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); - - setOperationAction(ISD::FCEIL, MVT::v2f32, Legal); - setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); - setOperationAction(ISD::FCEIL, MVT::v1f64, Legal); - setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); - - setOperationAction(ISD::FTRUNC, MVT::v2f32, Legal); - setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); - setOperationAction(ISD::FTRUNC, MVT::v1f64, Legal); - setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); - - setOperationAction(ISD::FRINT, MVT::v2f32, Legal); - setOperationAction(ISD::FRINT, MVT::v4f32, Legal); - setOperationAction(ISD::FRINT, MVT::v1f64, Legal); - setOperationAction(ISD::FRINT, MVT::v2f64, Legal); - - setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); - - setOperationAction(ISD::FROUND, MVT::v2f32, Legal); - setOperationAction(ISD::FROUND, MVT::v4f32, Legal); - setOperationAction(ISD::FROUND, MVT::v1f64, Legal); - setOperationAction(ISD::FROUND, MVT::v2f64, Legal); - - setOperationAction(ISD::SINT_TO_FP, MVT::v1i8, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v1i16, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v1i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom); - - setOperationAction(ISD::UINT_TO_FP, MVT::v1i8, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v1i16, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v1i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom); - - setOperationAction(ISD::FP_TO_SINT, MVT::v1i8, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v1i16, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v1i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Custom); - - setOperationAction(ISD::FP_TO_UINT, MVT::v1i8, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v1i16, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v1i32, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Custom); - - // Neon does not support vector divide/remainder operations except - // floating-point divide. - setOperationAction(ISD::SDIV, MVT::v1i8, Expand); - setOperationAction(ISD::SDIV, MVT::v8i8, Expand); - setOperationAction(ISD::SDIV, MVT::v16i8, Expand); - setOperationAction(ISD::SDIV, MVT::v1i16, Expand); - setOperationAction(ISD::SDIV, MVT::v4i16, Expand); - setOperationAction(ISD::SDIV, MVT::v8i16, Expand); - setOperationAction(ISD::SDIV, MVT::v1i32, Expand); - setOperationAction(ISD::SDIV, MVT::v2i32, Expand); - setOperationAction(ISD::SDIV, MVT::v4i32, Expand); - setOperationAction(ISD::SDIV, MVT::v1i64, Expand); - setOperationAction(ISD::SDIV, MVT::v2i64, Expand); - - setOperationAction(ISD::UDIV, MVT::v1i8, Expand); - setOperationAction(ISD::UDIV, MVT::v8i8, Expand); - setOperationAction(ISD::UDIV, MVT::v16i8, Expand); - setOperationAction(ISD::UDIV, MVT::v1i16, Expand); - setOperationAction(ISD::UDIV, MVT::v4i16, Expand); - setOperationAction(ISD::UDIV, MVT::v8i16, Expand); - setOperationAction(ISD::UDIV, MVT::v1i32, Expand); - setOperationAction(ISD::UDIV, MVT::v2i32, Expand); - setOperationAction(ISD::UDIV, MVT::v4i32, Expand); - setOperationAction(ISD::UDIV, MVT::v1i64, Expand); - setOperationAction(ISD::UDIV, MVT::v2i64, Expand); - - setOperationAction(ISD::SREM, MVT::v1i8, Expand); - setOperationAction(ISD::SREM, MVT::v8i8, Expand); - setOperationAction(ISD::SREM, MVT::v16i8, Expand); - setOperationAction(ISD::SREM, MVT::v1i16, Expand); - setOperationAction(ISD::SREM, MVT::v4i16, Expand); - setOperationAction(ISD::SREM, MVT::v8i16, Expand); - setOperationAction(ISD::SREM, MVT::v1i32, Expand); - setOperationAction(ISD::SREM, MVT::v2i32, Expand); - setOperationAction(ISD::SREM, MVT::v4i32, Expand); - setOperationAction(ISD::SREM, MVT::v1i64, Expand); - setOperationAction(ISD::SREM, MVT::v2i64, Expand); - - setOperationAction(ISD::UREM, MVT::v1i8, Expand); - setOperationAction(ISD::UREM, MVT::v8i8, Expand); - setOperationAction(ISD::UREM, MVT::v16i8, Expand); - setOperationAction(ISD::UREM, MVT::v1i16, Expand); - setOperationAction(ISD::UREM, MVT::v4i16, Expand); - setOperationAction(ISD::UREM, MVT::v8i16, Expand); - setOperationAction(ISD::UREM, MVT::v1i32, Expand); - setOperationAction(ISD::UREM, MVT::v2i32, Expand); - setOperationAction(ISD::UREM, MVT::v4i32, Expand); - setOperationAction(ISD::UREM, MVT::v1i64, Expand); - setOperationAction(ISD::UREM, MVT::v2i64, Expand); - - setOperationAction(ISD::FREM, MVT::v2f32, Expand); - setOperationAction(ISD::FREM, MVT::v4f32, Expand); - setOperationAction(ISD::FREM, MVT::v1f64, Expand); - setOperationAction(ISD::FREM, MVT::v2f64, Expand); - - setOperationAction(ISD::SELECT, MVT::v8i8, Expand); - setOperationAction(ISD::SELECT, MVT::v16i8, Expand); - setOperationAction(ISD::SELECT, MVT::v4i16, Expand); - setOperationAction(ISD::SELECT, MVT::v8i16, Expand); - setOperationAction(ISD::SELECT, MVT::v2i32, Expand); - setOperationAction(ISD::SELECT, MVT::v4i32, Expand); - setOperationAction(ISD::SELECT, MVT::v1i64, Expand); - setOperationAction(ISD::SELECT, MVT::v2i64, Expand); - setOperationAction(ISD::SELECT, MVT::v2f32, Expand); - setOperationAction(ISD::SELECT, MVT::v4f32, Expand); - setOperationAction(ISD::SELECT, MVT::v1f64, Expand); - setOperationAction(ISD::SELECT, MVT::v2f64, Expand); - - setOperationAction(ISD::SELECT_CC, MVT::v8i8, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v16i8, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v4i16, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v8i16, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v2i32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v4i32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v1i64, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v2i64, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v2f32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v4f32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v1f64, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v2f64, Custom); - - // Vector ExtLoad and TruncStore are expanded. - for (unsigned I = MVT::FIRST_VECTOR_VALUETYPE; - I <= MVT::LAST_VECTOR_VALUETYPE; ++I) { - MVT VT = (MVT::SimpleValueType) I; - setLoadExtAction(ISD::SEXTLOAD, VT, Expand); - setLoadExtAction(ISD::ZEXTLOAD, VT, Expand); - setLoadExtAction(ISD::EXTLOAD, VT, Expand); - for (unsigned II = MVT::FIRST_VECTOR_VALUETYPE; - II <= MVT::LAST_VECTOR_VALUETYPE; ++II) { - MVT VT1 = (MVT::SimpleValueType) II; - // A TruncStore has two vector types of the same number of elements - // and different element sizes. - if (VT.getVectorNumElements() == VT1.getVectorNumElements() && - VT.getVectorElementType().getSizeInBits() - > VT1.getVectorElementType().getSizeInBits()) - setTruncStoreAction(VT, VT1, Expand); - } - - setOperationAction(ISD::MULHS, VT, Expand); - setOperationAction(ISD::SMUL_LOHI, VT, Expand); - setOperationAction(ISD::MULHU, VT, Expand); - setOperationAction(ISD::UMUL_LOHI, VT, Expand); - - setOperationAction(ISD::BSWAP, VT, Expand); - } - - // There is no v1i64/v2i64 multiply, expand v1i64/v2i64 to GPR i64 multiply. - // FIXME: For a v2i64 multiply, we copy VPR to GPR and do 2 i64 multiplies, - // and then copy back to VPR. This solution may be optimized by Following 3 - // NEON instructions: - // pmull v2.1q, v0.1d, v1.1d - // pmull2 v3.1q, v0.2d, v1.2d - // ins v2.d[1], v3.d[0] - // As currently we can't verify the correctness of such assumption, we can - // do such optimization in the future. - setOperationAction(ISD::MUL, MVT::v1i64, Expand); - setOperationAction(ISD::MUL, MVT::v2i64, Expand); - - setOperationAction(ISD::FCOS, MVT::v2f64, Expand); - setOperationAction(ISD::FCOS, MVT::v4f32, Expand); - setOperationAction(ISD::FCOS, MVT::v2f32, Expand); - setOperationAction(ISD::FSIN, MVT::v2f64, Expand); - setOperationAction(ISD::FSIN, MVT::v4f32, Expand); - setOperationAction(ISD::FSIN, MVT::v2f32, Expand); - setOperationAction(ISD::FPOW, MVT::v2f64, Expand); - setOperationAction(ISD::FPOW, MVT::v4f32, Expand); - setOperationAction(ISD::FPOW, MVT::v2f32, Expand); - } - - setTargetDAGCombine(ISD::SIGN_EXTEND); - setTargetDAGCombine(ISD::VSELECT); - - MaskAndBranchFoldingIsLegal = true; -} - -EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { - // It's reasonably important that this value matches the "natural" legal - // promotion from i1 for scalar types. Otherwise LegalizeTypes can get itself - // in a twist (e.g. inserting an any_extend which then becomes i64 -> i64). - if (!VT.isVector()) return MVT::i32; - return VT.changeVectorElementTypeToInteger(); -} - -static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, - unsigned &LdrOpc, - unsigned &StrOpc) { - static const unsigned LoadBares[] = {AArch64::LDXR_byte, AArch64::LDXR_hword, - AArch64::LDXR_word, AArch64::LDXR_dword}; - static const unsigned LoadAcqs[] = {AArch64::LDAXR_byte, AArch64::LDAXR_hword, - AArch64::LDAXR_word, AArch64::LDAXR_dword}; - static const unsigned StoreBares[] = {AArch64::STXR_byte, AArch64::STXR_hword, - AArch64::STXR_word, AArch64::STXR_dword}; - static const unsigned StoreRels[] = {AArch64::STLXR_byte,AArch64::STLXR_hword, - AArch64::STLXR_word, AArch64::STLXR_dword}; - - const unsigned *LoadOps, *StoreOps; - if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent) - LoadOps = LoadAcqs; - else - LoadOps = LoadBares; - - if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) - StoreOps = StoreRels; - else - StoreOps = StoreBares; - - assert(isPowerOf2_32(Size) && Size <= 8 && - "unsupported size for atomic binary op!"); - - LdrOpc = LoadOps[Log2_32(Size)]; - StrOpc = StoreOps[Log2_32(Size)]; -} - -// FIXME: AArch64::DTripleRegClass and AArch64::QTripleRegClass don't really -// have value type mapped, and they are both being defined as MVT::untyped. -// Without knowing the MVT type, MachineLICM::getRegisterClassIDAndCost -// would fail to figure out the register pressure correctly. -std::pair -AArch64TargetLowering::findRepresentativeClass(MVT VT) const{ - const TargetRegisterClass *RRC = nullptr; - uint8_t Cost = 1; - switch (VT.SimpleTy) { - default: - return TargetLowering::findRepresentativeClass(VT); - case MVT::v4i64: - RRC = &AArch64::QPairRegClass; - Cost = 2; - break; - case MVT::v8i64: - RRC = &AArch64::QQuadRegClass; - Cost = 4; - break; - } - return std::make_pair(RRC, Cost); -} - -MachineBasicBlock * -AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Size, - unsigned BinOpcode) const { - // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; - - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned incr = MI->getOperand(2).getReg(); - AtomicOrdering Ord = static_cast(MI->getOperand(3).getImm()); - DebugLoc dl = MI->getDebugLoc(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - - unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, Ord, ldrOpc, strOpc); - - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loopMBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - const TargetRegisterClass *TRC - = Size == 8 ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; - unsigned scratch = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); - - // thisMBB: - // ... - // fallthrough --> loopMBB - BB->addSuccessor(loopMBB); - - // loopMBB: - // ldxr dest, ptr - // scratch, dest, incr - // stxr stxr_status, scratch, ptr - // cbnz stxr_status, loopMBB - // fallthrough --> exitMBB - BB = loopMBB; - BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - if (BinOpcode) { - // All arithmetic operations we'll be creating are designed to take an extra - // shift or extend operand, which we can conveniently set to zero. - - // Operand order needs to go the other way for NAND. - if (BinOpcode == AArch64::BICwww_lsl || BinOpcode == AArch64::BICxxx_lsl) - BuildMI(BB, dl, TII->get(BinOpcode), scratch) - .addReg(incr).addReg(dest).addImm(0); - else - BuildMI(BB, dl, TII->get(BinOpcode), scratch) - .addReg(dest).addReg(incr).addImm(0); - } - - // From the stxr, the register is GPR32; from the cmp it's GPR32wsp - unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass); - MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass); - - BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(scratch).addReg(ptr); - BuildMI(BB, dl, TII->get(AArch64::CBNZw)) - .addReg(stxr_status).addMBB(loopMBB); - - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Size, - unsigned CmpOp, - A64CC::CondCodes Cond) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; - - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned incr = MI->getOperand(2).getReg(); - AtomicOrdering Ord = static_cast(MI->getOperand(3).getImm()); - - unsigned oldval = dest; - DebugLoc dl = MI->getDebugLoc(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - const TargetRegisterClass *TRC, *TRCsp; - if (Size == 8) { - TRC = &AArch64::GPR64RegClass; - TRCsp = &AArch64::GPR64xspRegClass; - } else { - TRC = &AArch64::GPR32RegClass; - TRCsp = &AArch64::GPR32wspRegClass; - } - - unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, Ord, ldrOpc, strOpc); - - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loopMBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - unsigned scratch = MRI.createVirtualRegister(TRC); - MRI.constrainRegClass(scratch, TRCsp); - - // thisMBB: - // ... - // fallthrough --> loopMBB - BB->addSuccessor(loopMBB); - - // loopMBB: - // ldxr dest, ptr - // cmp incr, dest (, sign extend if necessary) - // csel scratch, dest, incr, cond - // stxr stxr_status, scratch, ptr - // cbnz stxr_status, loopMBB - // fallthrough --> exitMBB - BB = loopMBB; - BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - - // Build compare and cmov instructions. - MRI.constrainRegClass(incr, TRCsp); - BuildMI(BB, dl, TII->get(CmpOp)) - .addReg(incr).addReg(oldval).addImm(0); - - BuildMI(BB, dl, TII->get(Size == 8 ? AArch64::CSELxxxc : AArch64::CSELwwwc), - scratch) - .addReg(oldval).addReg(incr).addImm(Cond); - - unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass); - MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass); - - BuildMI(BB, dl, TII->get(strOpc), stxr_status) - .addReg(scratch).addReg(ptr); - BuildMI(BB, dl, TII->get(AArch64::CBNZw)) - .addReg(stxr_status).addMBB(loopMBB); - - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Size) const { - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned oldval = MI->getOperand(2).getReg(); - unsigned newval = MI->getOperand(3).getReg(); - AtomicOrdering Ord = static_cast(MI->getOperand(4).getImm()); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc dl = MI->getDebugLoc(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - const TargetRegisterClass *TRCsp; - TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass; - - unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, Ord, ldrOpc, strOpc); - - MachineFunction *MF = BB->getParent(); - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; // insert the new blocks after the current block - - MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loop1MBB); - MF->insert(It, loop2MBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - // thisMBB: - // ... - // fallthrough --> loop1MBB - BB->addSuccessor(loop1MBB); - - // loop1MBB: - // ldxr dest, [ptr] - // cmp dest, oldval - // b.ne exitMBB - BB = loop1MBB; - BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - - unsigned CmpOp = Size == 8 ? AArch64::CMPxx_lsl : AArch64::CMPww_lsl; - MRI.constrainRegClass(dest, TRCsp); - BuildMI(BB, dl, TII->get(CmpOp)) - .addReg(dest).addReg(oldval).addImm(0); - BuildMI(BB, dl, TII->get(AArch64::Bcc)) - .addImm(A64CC::NE).addMBB(exitMBB); - BB->addSuccessor(loop2MBB); - BB->addSuccessor(exitMBB); - - // loop2MBB: - // strex stxr_status, newval, [ptr] - // cbnz stxr_status, loop1MBB - BB = loop2MBB; - unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass); - MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass); - - BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(newval).addReg(ptr); - BuildMI(BB, dl, TII->get(AArch64::CBNZw)) - .addReg(stxr_status).addMBB(loop1MBB); - BB->addSuccessor(loop1MBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI, - MachineBasicBlock *MBB) const { - // We materialise the F128CSEL pseudo-instruction using conditional branches - // and loads, giving an instruciton sequence like: - // str q0, [sp] - // b.ne IfTrue - // b Finish - // IfTrue: - // str q1, [sp] - // Finish: - // ldr q0, [sp] - // - // Using virtual registers would probably not be beneficial since COPY - // instructions are expensive for f128 (there's no actual instruction to - // implement them). - // - // An alternative would be to do an integer-CSEL on some address. E.g.: - // mov x0, sp - // add x1, sp, #16 - // str q0, [x0] - // str q1, [x1] - // csel x0, x0, x1, ne - // ldr q0, [x0] - // - // It's unclear which approach is actually optimal. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - MachineFunction *MF = MBB->getParent(); - const BasicBlock *LLVM_BB = MBB->getBasicBlock(); - DebugLoc DL = MI->getDebugLoc(); - MachineFunction::iterator It = MBB; - ++It; - - unsigned DestReg = MI->getOperand(0).getReg(); - unsigned IfTrueReg = MI->getOperand(1).getReg(); - unsigned IfFalseReg = MI->getOperand(2).getReg(); - unsigned CondCode = MI->getOperand(3).getImm(); - bool NZCVKilled = MI->getOperand(4).isKill(); - - MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, TrueBB); - MF->insert(It, EndBB); - - // Transfer rest of current basic-block to EndBB - EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), - MBB->end()); - EndBB->transferSuccessorsAndUpdatePHIs(MBB); - - // We need somewhere to store the f128 value needed. - int ScratchFI = MF->getFrameInfo()->CreateSpillStackObject(16, 16); - - // [... start of incoming MBB ...] - // str qIFFALSE, [sp] - // b.cc IfTrue - // b Done - BuildMI(MBB, DL, TII->get(AArch64::LSFP128_STR)) - .addReg(IfFalseReg) - .addFrameIndex(ScratchFI) - .addImm(0); - BuildMI(MBB, DL, TII->get(AArch64::Bcc)) - .addImm(CondCode) - .addMBB(TrueBB); - BuildMI(MBB, DL, TII->get(AArch64::Bimm)) - .addMBB(EndBB); - MBB->addSuccessor(TrueBB); - MBB->addSuccessor(EndBB); - - if (!NZCVKilled) { - // NZCV is live-through TrueBB. - TrueBB->addLiveIn(AArch64::NZCV); - EndBB->addLiveIn(AArch64::NZCV); - } - - // IfTrue: - // str qIFTRUE, [sp] - BuildMI(TrueBB, DL, TII->get(AArch64::LSFP128_STR)) - .addReg(IfTrueReg) - .addFrameIndex(ScratchFI) - .addImm(0); - - // Note: fallthrough. We can rely on LLVM adding a branch if it reorders the - // blocks. - TrueBB->addSuccessor(EndBB); - - // Done: - // ldr qDEST, [sp] - // [... rest of incoming MBB ...] - MachineInstr *StartOfEnd = EndBB->begin(); - BuildMI(*EndBB, StartOfEnd, DL, TII->get(AArch64::LSFP128_LDR), DestReg) - .addFrameIndex(ScratchFI) - .addImm(0); - - MI->eraseFromParent(); - return EndBB; -} - -MachineBasicBlock * -AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const { - switch (MI->getOpcode()) { - default: llvm_unreachable("Unhandled instruction with custom inserter"); - case AArch64::F128CSEL: - return EmitF128CSEL(MI, MBB); - case AArch64::ATOMIC_LOAD_ADD_I8: - return emitAtomicBinary(MI, MBB, 1, AArch64::ADDwww_lsl); - case AArch64::ATOMIC_LOAD_ADD_I16: - return emitAtomicBinary(MI, MBB, 2, AArch64::ADDwww_lsl); - case AArch64::ATOMIC_LOAD_ADD_I32: - return emitAtomicBinary(MI, MBB, 4, AArch64::ADDwww_lsl); - case AArch64::ATOMIC_LOAD_ADD_I64: - return emitAtomicBinary(MI, MBB, 8, AArch64::ADDxxx_lsl); - - case AArch64::ATOMIC_LOAD_SUB_I8: - return emitAtomicBinary(MI, MBB, 1, AArch64::SUBwww_lsl); - case AArch64::ATOMIC_LOAD_SUB_I16: - return emitAtomicBinary(MI, MBB, 2, AArch64::SUBwww_lsl); - case AArch64::ATOMIC_LOAD_SUB_I32: - return emitAtomicBinary(MI, MBB, 4, AArch64::SUBwww_lsl); - case AArch64::ATOMIC_LOAD_SUB_I64: - return emitAtomicBinary(MI, MBB, 8, AArch64::SUBxxx_lsl); - - case AArch64::ATOMIC_LOAD_AND_I8: - return emitAtomicBinary(MI, MBB, 1, AArch64::ANDwww_lsl); - case AArch64::ATOMIC_LOAD_AND_I16: - return emitAtomicBinary(MI, MBB, 2, AArch64::ANDwww_lsl); - case AArch64::ATOMIC_LOAD_AND_I32: - return emitAtomicBinary(MI, MBB, 4, AArch64::ANDwww_lsl); - case AArch64::ATOMIC_LOAD_AND_I64: - return emitAtomicBinary(MI, MBB, 8, AArch64::ANDxxx_lsl); - - case AArch64::ATOMIC_LOAD_OR_I8: - return emitAtomicBinary(MI, MBB, 1, AArch64::ORRwww_lsl); - case AArch64::ATOMIC_LOAD_OR_I16: - return emitAtomicBinary(MI, MBB, 2, AArch64::ORRwww_lsl); - case AArch64::ATOMIC_LOAD_OR_I32: - return emitAtomicBinary(MI, MBB, 4, AArch64::ORRwww_lsl); - case AArch64::ATOMIC_LOAD_OR_I64: - return emitAtomicBinary(MI, MBB, 8, AArch64::ORRxxx_lsl); - - case AArch64::ATOMIC_LOAD_XOR_I8: - return emitAtomicBinary(MI, MBB, 1, AArch64::EORwww_lsl); - case AArch64::ATOMIC_LOAD_XOR_I16: - return emitAtomicBinary(MI, MBB, 2, AArch64::EORwww_lsl); - case AArch64::ATOMIC_LOAD_XOR_I32: - return emitAtomicBinary(MI, MBB, 4, AArch64::EORwww_lsl); - case AArch64::ATOMIC_LOAD_XOR_I64: - return emitAtomicBinary(MI, MBB, 8, AArch64::EORxxx_lsl); - - case AArch64::ATOMIC_LOAD_NAND_I8: - return emitAtomicBinary(MI, MBB, 1, AArch64::BICwww_lsl); - case AArch64::ATOMIC_LOAD_NAND_I16: - return emitAtomicBinary(MI, MBB, 2, AArch64::BICwww_lsl); - case AArch64::ATOMIC_LOAD_NAND_I32: - return emitAtomicBinary(MI, MBB, 4, AArch64::BICwww_lsl); - case AArch64::ATOMIC_LOAD_NAND_I64: - return emitAtomicBinary(MI, MBB, 8, AArch64::BICxxx_lsl); - - case AArch64::ATOMIC_LOAD_MIN_I8: - return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::GT); - case AArch64::ATOMIC_LOAD_MIN_I16: - return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::GT); - case AArch64::ATOMIC_LOAD_MIN_I32: - return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::GT); - case AArch64::ATOMIC_LOAD_MIN_I64: - return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::GT); - - case AArch64::ATOMIC_LOAD_MAX_I8: - return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::LT); - case AArch64::ATOMIC_LOAD_MAX_I16: - return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::LT); - case AArch64::ATOMIC_LOAD_MAX_I32: - return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LT); - case AArch64::ATOMIC_LOAD_MAX_I64: - return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LT); - - case AArch64::ATOMIC_LOAD_UMIN_I8: - return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::HI); - case AArch64::ATOMIC_LOAD_UMIN_I16: - return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::HI); - case AArch64::ATOMIC_LOAD_UMIN_I32: - return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::HI); - case AArch64::ATOMIC_LOAD_UMIN_I64: - return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::HI); - - case AArch64::ATOMIC_LOAD_UMAX_I8: - return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::LO); - case AArch64::ATOMIC_LOAD_UMAX_I16: - return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::LO); - case AArch64::ATOMIC_LOAD_UMAX_I32: - return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LO); - case AArch64::ATOMIC_LOAD_UMAX_I64: - return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LO); - - case AArch64::ATOMIC_SWAP_I8: - return emitAtomicBinary(MI, MBB, 1, 0); - case AArch64::ATOMIC_SWAP_I16: - return emitAtomicBinary(MI, MBB, 2, 0); - case AArch64::ATOMIC_SWAP_I32: - return emitAtomicBinary(MI, MBB, 4, 0); - case AArch64::ATOMIC_SWAP_I64: - return emitAtomicBinary(MI, MBB, 8, 0); - - case AArch64::ATOMIC_CMP_SWAP_I8: - return emitAtomicCmpSwap(MI, MBB, 1); - case AArch64::ATOMIC_CMP_SWAP_I16: - return emitAtomicCmpSwap(MI, MBB, 2); - case AArch64::ATOMIC_CMP_SWAP_I32: - return emitAtomicCmpSwap(MI, MBB, 4); - case AArch64::ATOMIC_CMP_SWAP_I64: - return emitAtomicCmpSwap(MI, MBB, 8); - } -} - - -const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { - switch (Opcode) { - case AArch64ISD::BR_CC: return "AArch64ISD::BR_CC"; - case AArch64ISD::Call: return "AArch64ISD::Call"; - case AArch64ISD::FPMOV: return "AArch64ISD::FPMOV"; - case AArch64ISD::GOTLoad: return "AArch64ISD::GOTLoad"; - case AArch64ISD::BFI: return "AArch64ISD::BFI"; - case AArch64ISD::EXTR: return "AArch64ISD::EXTR"; - case AArch64ISD::Ret: return "AArch64ISD::Ret"; - case AArch64ISD::SBFX: return "AArch64ISD::SBFX"; - case AArch64ISD::SELECT_CC: return "AArch64ISD::SELECT_CC"; - case AArch64ISD::SETCC: return "AArch64ISD::SETCC"; - case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN"; - case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER"; - case AArch64ISD::TLSDESCCALL: return "AArch64ISD::TLSDESCCALL"; - case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge"; - case AArch64ISD::WrapperSmall: return "AArch64ISD::WrapperSmall"; - - case AArch64ISD::NEON_MOVIMM: - return "AArch64ISD::NEON_MOVIMM"; - case AArch64ISD::NEON_MVNIMM: - return "AArch64ISD::NEON_MVNIMM"; - case AArch64ISD::NEON_FMOVIMM: - return "AArch64ISD::NEON_FMOVIMM"; - case AArch64ISD::NEON_CMP: - return "AArch64ISD::NEON_CMP"; - case AArch64ISD::NEON_CMPZ: - return "AArch64ISD::NEON_CMPZ"; - case AArch64ISD::NEON_TST: - return "AArch64ISD::NEON_TST"; - case AArch64ISD::NEON_QSHLs: - return "AArch64ISD::NEON_QSHLs"; - case AArch64ISD::NEON_QSHLu: - return "AArch64ISD::NEON_QSHLu"; - case AArch64ISD::NEON_VDUP: - return "AArch64ISD::NEON_VDUP"; - case AArch64ISD::NEON_VDUPLANE: - return "AArch64ISD::NEON_VDUPLANE"; - case AArch64ISD::NEON_REV16: - return "AArch64ISD::NEON_REV16"; - case AArch64ISD::NEON_REV32: - return "AArch64ISD::NEON_REV32"; - case AArch64ISD::NEON_REV64: - return "AArch64ISD::NEON_REV64"; - case AArch64ISD::NEON_UZP1: - return "AArch64ISD::NEON_UZP1"; - case AArch64ISD::NEON_UZP2: - return "AArch64ISD::NEON_UZP2"; - case AArch64ISD::NEON_ZIP1: - return "AArch64ISD::NEON_ZIP1"; - case AArch64ISD::NEON_ZIP2: - return "AArch64ISD::NEON_ZIP2"; - case AArch64ISD::NEON_TRN1: - return "AArch64ISD::NEON_TRN1"; - case AArch64ISD::NEON_TRN2: - return "AArch64ISD::NEON_TRN2"; - case AArch64ISD::NEON_LD1_UPD: - return "AArch64ISD::NEON_LD1_UPD"; - case AArch64ISD::NEON_LD2_UPD: - return "AArch64ISD::NEON_LD2_UPD"; - case AArch64ISD::NEON_LD3_UPD: - return "AArch64ISD::NEON_LD3_UPD"; - case AArch64ISD::NEON_LD4_UPD: - return "AArch64ISD::NEON_LD4_UPD"; - case AArch64ISD::NEON_ST1_UPD: - return "AArch64ISD::NEON_ST1_UPD"; - case AArch64ISD::NEON_ST2_UPD: - return "AArch64ISD::NEON_ST2_UPD"; - case AArch64ISD::NEON_ST3_UPD: - return "AArch64ISD::NEON_ST3_UPD"; - case AArch64ISD::NEON_ST4_UPD: - return "AArch64ISD::NEON_ST4_UPD"; - case AArch64ISD::NEON_LD1x2_UPD: - return "AArch64ISD::NEON_LD1x2_UPD"; - case AArch64ISD::NEON_LD1x3_UPD: - return "AArch64ISD::NEON_LD1x3_UPD"; - case AArch64ISD::NEON_LD1x4_UPD: - return "AArch64ISD::NEON_LD1x4_UPD"; - case AArch64ISD::NEON_ST1x2_UPD: - return "AArch64ISD::NEON_ST1x2_UPD"; - case AArch64ISD::NEON_ST1x3_UPD: - return "AArch64ISD::NEON_ST1x3_UPD"; - case AArch64ISD::NEON_ST1x4_UPD: - return "AArch64ISD::NEON_ST1x4_UPD"; - case AArch64ISD::NEON_LD2DUP: - return "AArch64ISD::NEON_LD2DUP"; - case AArch64ISD::NEON_LD3DUP: - return "AArch64ISD::NEON_LD3DUP"; - case AArch64ISD::NEON_LD4DUP: - return "AArch64ISD::NEON_LD4DUP"; - case AArch64ISD::NEON_LD2DUP_UPD: - return "AArch64ISD::NEON_LD2DUP_UPD"; - case AArch64ISD::NEON_LD3DUP_UPD: - return "AArch64ISD::NEON_LD3DUP_UPD"; - case AArch64ISD::NEON_LD4DUP_UPD: - return "AArch64ISD::NEON_LD4DUP_UPD"; - case AArch64ISD::NEON_LD2LN_UPD: - return "AArch64ISD::NEON_LD2LN_UPD"; - case AArch64ISD::NEON_LD3LN_UPD: - return "AArch64ISD::NEON_LD3LN_UPD"; - case AArch64ISD::NEON_LD4LN_UPD: - return "AArch64ISD::NEON_LD4LN_UPD"; - case AArch64ISD::NEON_ST2LN_UPD: - return "AArch64ISD::NEON_ST2LN_UPD"; - case AArch64ISD::NEON_ST3LN_UPD: - return "AArch64ISD::NEON_ST3LN_UPD"; - case AArch64ISD::NEON_ST4LN_UPD: - return "AArch64ISD::NEON_ST4LN_UPD"; - case AArch64ISD::NEON_VEXTRACT: - return "AArch64ISD::NEON_VEXTRACT"; - default: - return nullptr; - } -} - -static const MCPhysReg AArch64FPRArgRegs[] = { - AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, - AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7 -}; -static const unsigned NumFPRArgRegs = llvm::array_lengthof(AArch64FPRArgRegs); - -static const MCPhysReg AArch64ArgRegs[] = { - AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, - AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7 -}; -static const unsigned NumArgRegs = llvm::array_lengthof(AArch64ArgRegs); - -static bool CC_AArch64NoMoreRegs(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State) { - // Mark all remaining general purpose registers as allocated. We don't - // backtrack: if (for example) an i128 gets put on the stack, no subsequent - // i64 will go in registers (C.11). - for (unsigned i = 0; i < NumArgRegs; ++i) - State.AllocateReg(AArch64ArgRegs[i]); - - return false; -} - -#include "AArch64GenCallingConv.inc" - -CCAssignFn *AArch64TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { - - switch(CC) { - default: llvm_unreachable("Unsupported calling convention"); - case CallingConv::Fast: - case CallingConv::C: - return CC_A64_APCS; - } -} - -void -AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, - SDLoc DL, SDValue &Chain) const { - MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - AArch64MachineFunctionInfo *FuncInfo - = MF.getInfo(); - - SmallVector MemOps; - - unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(AArch64ArgRegs, - NumArgRegs); - unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(AArch64FPRArgRegs, - NumFPRArgRegs); - - unsigned GPRSaveSize = 8 * (NumArgRegs - FirstVariadicGPR); - int GPRIdx = 0; - if (GPRSaveSize != 0) { - GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false); - - SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy()); - - for (unsigned i = FirstVariadicGPR; i < NumArgRegs; ++i) { - unsigned VReg = MF.addLiveIn(AArch64ArgRegs[i], &AArch64::GPR64RegClass); - SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64); - SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN, - MachinePointerInfo::getStack(i * 8), - false, false, 0); - MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, - DAG.getConstant(8, getPointerTy())); - } - } - - if (getSubtarget()->hasFPARMv8()) { - unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR); - int FPRIdx = 0; - // According to the AArch64 Procedure Call Standard, section B.1/B.3, we - // can omit a register save area if we know we'll never use registers of - // that class. - if (FPRSaveSize != 0) { - FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false); - - SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy()); - - for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) { - unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i], - &AArch64::FPR128RegClass); - SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128); - SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN, - MachinePointerInfo::getStack(i * 16), - false, false, 0); - MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, - DAG.getConstant(16, getPointerTy())); - } - } - FuncInfo->setVariadicFPRIdx(FPRIdx); - FuncInfo->setVariadicFPRSize(FPRSaveSize); - } - - unsigned StackOffset = RoundUpToAlignment(CCInfo.getNextStackOffset(), 8); - int StackIdx = MFI->CreateFixedObject(8, StackOffset, true); - - FuncInfo->setVariadicStackIdx(StackIdx); - FuncInfo->setVariadicGPRIdx(GPRIdx); - FuncInfo->setVariadicGPRSize(GPRSaveSize); - - if (!MemOps.empty()) { - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); - } -} - - -SDValue -AArch64TargetLowering::LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const { - MachineFunction &MF = DAG.getMachineFunction(); - AArch64MachineFunctionInfo *FuncInfo - = MF.getInfo(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; - - SmallVector ArgLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv)); - - SmallVector ArgValues; - - SDValue ArgValue; - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; - ISD::ArgFlagsTy Flags = Ins[i].Flags; - - if (Flags.isByVal()) { - // Byval is used for small structs and HFAs in the PCS, but the system - // should work in a non-compliant manner for larger structs. - EVT PtrTy = getPointerTy(); - int Size = Flags.getByValSize(); - unsigned NumRegs = (Size + 7) / 8; - - uint32_t BEAlign = 0; - if (Size < 8 && !getSubtarget()->isLittle()) - BEAlign = 8-Size; - unsigned FrameIdx = MFI->CreateFixedObject(8 * NumRegs, - VA.getLocMemOffset() + BEAlign, - false); - SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy); - InVals.push_back(FrameIdxN); - - continue; - } else if (VA.isRegLoc()) { - MVT RegVT = VA.getLocVT(); - const TargetRegisterClass *RC = getRegClassFor(RegVT); - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); - - ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); - } else { // VA.isRegLoc() - assert(VA.isMemLoc()); - - int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, - VA.getLocMemOffset(), true); - - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - ArgValue = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, - MachinePointerInfo::getFixedStack(FI), - false, false, false, 0); - - - } - - switch (VA.getLocInfo()) { - default: llvm_unreachable("Unknown loc info!"); - case CCValAssign::Full: break; - case CCValAssign::BCvt: - ArgValue = DAG.getNode(ISD::BITCAST,dl, VA.getValVT(), ArgValue); - break; - case CCValAssign::SExt: - case CCValAssign::ZExt: - case CCValAssign::AExt: - case CCValAssign::FPExt: { - unsigned DestSize = VA.getValVT().getSizeInBits(); - unsigned DestSubReg; - - switch (DestSize) { - case 8: DestSubReg = AArch64::sub_8; break; - case 16: DestSubReg = AArch64::sub_16; break; - case 32: DestSubReg = AArch64::sub_32; break; - case 64: DestSubReg = AArch64::sub_64; break; - default: llvm_unreachable("Unexpected argument promotion"); - } - - ArgValue = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, - VA.getValVT(), ArgValue, - DAG.getTargetConstant(DestSubReg, MVT::i32)), - 0); - break; - } - } - - InVals.push_back(ArgValue); - } - - if (isVarArg) - SaveVarArgRegisters(CCInfo, DAG, dl, Chain); - - unsigned StackArgSize = CCInfo.getNextStackOffset(); - if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) { - // This is a non-standard ABI so by fiat I say we're allowed to make full - // use of the stack area to be popped, which must be aligned to 16 bytes in - // any case: - StackArgSize = RoundUpToAlignment(StackArgSize, 16); - - // If we're expected to restore the stack (e.g. fastcc) then we'll be adding - // a multiple of 16. - FuncInfo->setArgumentStackToRestore(StackArgSize); - - // This realignment carries over to the available bytes below. Our own - // callers will guarantee the space is free by giving an aligned value to - // CALLSEQ_START. - } - // Even if we're not expected to free up the space, it's useful to know how - // much is there while considering tail calls (because we can reuse it). - FuncInfo->setBytesInStackArgArea(StackArgSize); - - return Chain; -} - -SDValue -AArch64TargetLowering::LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - SDLoc dl, SelectionDAG &DAG) const { - // CCValAssign - represent the assignment of the return value to a location. - SmallVector RVLocs; - - // CCState - Info about the registers and stack slots. - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); - - // Analyze outgoing return values. - CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv)); - - SDValue Flag; - SmallVector RetOps(1, Chain); - - for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { - // PCS: "If the type, T, of the result of a function is such that - // void func(T arg) would require that arg be passed as a value in a - // register (or set of registers) according to the rules in 5.4, then the - // result is returned in the same registers as would be used for such an - // argument. - // - // Otherwise, the caller shall reserve a block of memory of sufficient - // size and alignment to hold the result. The address of the memory block - // shall be passed as an additional argument to the function in x8." - // - // This is implemented in two places. The register-return values are dealt - // with here, more complex returns are passed as an sret parameter, which - // means we don't have to worry about it during actual return. - CCValAssign &VA = RVLocs[i]; - assert(VA.isRegLoc() && "Only register-returns should be created by PCS"); - - - SDValue Arg = OutVals[i]; - - // There's no convenient note in the ABI about this as there is for normal - // arguments, but it says return values are passed in the same registers as - // an argument would be. I believe that includes the comments about - // unspecified higher bits, putting the burden of widening on the *caller* - // for return values. - switch (VA.getLocInfo()) { - default: llvm_unreachable("Unknown loc info"); - case CCValAssign::Full: break; - case CCValAssign::SExt: - case CCValAssign::ZExt: - case CCValAssign::AExt: - // Floating-point values should only be extended when they're going into - // memory, which can't happen here so an integer extend is acceptable. - Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); - break; - case CCValAssign::BCvt: - Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); - break; - } - - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); - Flag = Chain.getValue(1); - RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); - } - - RetOps[0] = Chain; // Update chain. - - // Add the flag if we have it. - if (Flag.getNode()) - RetOps.push_back(Flag); - - return DAG.getNode(AArch64ISD::Ret, dl, MVT::Other, RetOps); -} - -unsigned AArch64TargetLowering::getByValTypeAlignment(Type *Ty) const { - // This is a new backend. For anything more precise than this a FE should - // set an explicit alignment. - return 4; -} - -SDValue -AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, - SmallVectorImpl &InVals) const { - SelectionDAG &DAG = CLI.DAG; - SDLoc &dl = CLI.DL; - SmallVectorImpl &Outs = CLI.Outs; - SmallVectorImpl &OutVals = CLI.OutVals; - SmallVectorImpl &Ins = CLI.Ins; - SDValue Chain = CLI.Chain; - SDValue Callee = CLI.Callee; - bool &IsTailCall = CLI.IsTailCall; - CallingConv::ID CallConv = CLI.CallConv; - bool IsVarArg = CLI.IsVarArg; - - MachineFunction &MF = DAG.getMachineFunction(); - AArch64MachineFunctionInfo *FuncInfo - = MF.getInfo(); - bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; - bool IsStructRet = !Outs.empty() && Outs[0].Flags.isSRet(); - bool IsSibCall = false; - - if (IsTailCall) { - IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, - IsVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), - Outs, OutVals, Ins, DAG); - - if (!IsTailCall && CLI.CS && CLI.CS->isMustTailCall()) - report_fatal_error("failed to perform tail call elimination on a call " - "site marked musttail"); - - // A sibling call is one where we're under the usual C ABI and not planning - // to change that but can still do a tail call: - if (!TailCallOpt && IsTailCall) - IsSibCall = true; - } - - SmallVector ArgLocs; - CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv)); - - // On AArch64 (and all other architectures I'm aware of) the most this has to - // do is adjust the stack pointer. - unsigned NumBytes = RoundUpToAlignment(CCInfo.getNextStackOffset(), 16); - if (IsSibCall) { - // Since we're not changing the ABI to make this a tail call, the memory - // operands are already available in the caller's incoming argument space. - NumBytes = 0; - } - - // FPDiff is the byte offset of the call's argument area from the callee's. - // Stores to callee stack arguments will be placed in FixedStackSlots offset - // by this amount for a tail call. In a sibling call it must be 0 because the - // caller will deallocate the entire stack and the callee still expects its - // arguments to begin at SP+0. Completely unused for non-tail calls. - int FPDiff = 0; - - if (IsTailCall && !IsSibCall) { - unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); - - // FPDiff will be negative if this tail call requires more space than we - // would automatically have in our incoming argument space. Positive if we - // can actually shrink the stack. - FPDiff = NumReusableBytes - NumBytes; - - // The stack pointer must be 16-byte aligned at all times it's used for a - // memory operation, which in practice means at *all* times and in - // particular across call boundaries. Therefore our own arguments started at - // a 16-byte aligned SP and the delta applied for the tail call should - // satisfy the same constraint. - assert(FPDiff % 16 == 0 && "unaligned stack on tail call"); - } - - if (!IsSibCall) - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), - dl); - - SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, AArch64::XSP, - getPointerTy()); - - SmallVector MemOpChains; - SmallVector, 8> RegsToPass; - - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; - ISD::ArgFlagsTy Flags = Outs[i].Flags; - SDValue Arg = OutVals[i]; - - // Callee does the actual widening, so all extensions just use an implicit - // definition of the rest of the Loc. Aesthetically, this would be nicer as - // an ANY_EXTEND, but that isn't valid for floating-point types and this - // alternative works on integer types too. - switch (VA.getLocInfo()) { - default: llvm_unreachable("Unknown loc info!"); - case CCValAssign::Full: break; - case CCValAssign::SExt: - case CCValAssign::ZExt: - case CCValAssign::AExt: - case CCValAssign::FPExt: { - unsigned SrcSize = VA.getValVT().getSizeInBits(); - unsigned SrcSubReg; - - switch (SrcSize) { - case 8: SrcSubReg = AArch64::sub_8; break; - case 16: SrcSubReg = AArch64::sub_16; break; - case 32: SrcSubReg = AArch64::sub_32; break; - case 64: SrcSubReg = AArch64::sub_64; break; - default: llvm_unreachable("Unexpected argument promotion"); - } - - Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, - VA.getLocVT(), - DAG.getUNDEF(VA.getLocVT()), - Arg, - DAG.getTargetConstant(SrcSubReg, MVT::i32)), - 0); - - break; - } - case CCValAssign::BCvt: - Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); - break; - } - - if (VA.isRegLoc()) { - // A normal register (sub-) argument. For now we just note it down because - // we want to copy things into registers as late as possible to avoid - // register-pressure (and possibly worse). - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); - continue; - } - - assert(VA.isMemLoc() && "unexpected argument location"); - - SDValue DstAddr; - MachinePointerInfo DstInfo; - if (IsTailCall) { - uint32_t OpSize = Flags.isByVal() ? Flags.getByValSize() : - VA.getLocVT().getSizeInBits(); - OpSize = (OpSize + 7) / 8; - int32_t Offset = VA.getLocMemOffset() + FPDiff; - int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); - - DstAddr = DAG.getFrameIndex(FI, getPointerTy()); - DstInfo = MachinePointerInfo::getFixedStack(FI); - - // Make sure any stack arguments overlapping with where we're storing are - // loaded before this eventual operation. Otherwise they'll be clobbered. - Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI); - } else { - uint32_t OpSize = Flags.isByVal() ? Flags.getByValSize()*8 : - VA.getLocVT().getSizeInBits(); - OpSize = (OpSize + 7) / 8; - uint32_t BEAlign = 0; - if (OpSize < 8 && !getSubtarget()->isLittle()) - BEAlign = 8-OpSize; - SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() + BEAlign); - - DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); - DstInfo = MachinePointerInfo::getStack(VA.getLocMemOffset()); - } - - if (Flags.isByVal()) { - SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i64); - SDValue Cpy = DAG.getMemcpy(Chain, dl, DstAddr, Arg, SizeNode, - Flags.getByValAlign(), - /*isVolatile = */ false, - /*alwaysInline = */ false, - DstInfo, MachinePointerInfo()); - MemOpChains.push_back(Cpy); - } else { - // Normal stack argument, put it where it's needed. - SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo, - false, false, 0); - MemOpChains.push_back(Store); - } - } - - // The loads and stores generated above shouldn't clash with each - // other. Combining them with this TokenFactor notes that fact for the rest of - // the backend. - if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); - - // Most of the rest of the instructions need to be glued together; we don't - // want assignments to actual registers used by a call to be rearranged by a - // well-meaning scheduler. - SDValue InFlag; - - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - - // The linker is responsible for inserting veneers when necessary to put a - // function call destination in range, so we don't need to bother with a - // wrapper here. - if (GlobalAddressSDNode *G = dyn_cast(Callee)) { - const GlobalValue *GV = G->getGlobal(); - Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); - } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { - const char *Sym = S->getSymbol(); - Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy()); - } - - // We don't usually want to end the call-sequence here because we would tidy - // the frame up *after* the call, however in the ABI-changing tail-call case - // we've carefully laid out the parameters so that when sp is reset they'll be - // in the correct location. - if (IsTailCall && !IsSibCall) { - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(0, true), InFlag, dl); - InFlag = Chain.getValue(1); - } - - // We produce the following DAG scheme for the actual call instruction: - // (AArch64Call Chain, Callee, reg1, ..., regn, preserveMask, inflag? - // - // Most arguments aren't going to be used and just keep the values live as - // far as LLVM is concerned. It's expected to be selected as simply "bl - // callee" (for a direct, non-tail call). - std::vector Ops; - Ops.push_back(Chain); - Ops.push_back(Callee); - - if (IsTailCall) { - // Each tail call may have to adjust the stack by a different amount, so - // this information must travel along with the operation for eventual - // consumption by emitEpilogue. - Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32)); - } - - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) - Ops.push_back(DAG.getRegister(RegsToPass[i].first, - RegsToPass[i].second.getValueType())); - - - // Add a register mask operand representing the call-preserved registers. This - // is used later in codegen to constrain register-allocation. - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); - assert(Mask && "Missing call preserved mask for calling convention"); - Ops.push_back(DAG.getRegisterMask(Mask)); - - // If we needed glue, put it in as the last argument. - if (InFlag.getNode()) - Ops.push_back(InFlag); - - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - - if (IsTailCall) { - return DAG.getNode(AArch64ISD::TC_RETURN, dl, NodeTys, Ops); - } - - Chain = DAG.getNode(AArch64ISD::Call, dl, NodeTys, Ops); - InFlag = Chain.getValue(1); - - // Now we can reclaim the stack, just as well do it before working out where - // our return value is. - if (!IsSibCall) { - uint64_t CalleePopBytes - = DoesCalleeRestoreStack(CallConv, TailCallOpt) ? NumBytes : 0; - - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(CalleePopBytes, true), - InFlag, dl); - InFlag = Chain.getValue(1); - } - - return LowerCallResult(Chain, InFlag, CallConv, - IsVarArg, Ins, dl, DAG, InVals); -} - -SDValue -AArch64TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, - CallingConv::ID CallConv, bool IsVarArg, - const SmallVectorImpl &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const { - // Assign locations to each value returned by this call. - SmallVector RVLocs; - CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); - CCInfo.AnalyzeCallResult(Ins, CCAssignFnForNode(CallConv)); - - for (unsigned i = 0; i != RVLocs.size(); ++i) { - CCValAssign VA = RVLocs[i]; - - // Return values that are too big to fit into registers should use an sret - // pointer, so this can be a lot simpler than the main argument code. - assert(VA.isRegLoc() && "Memory locations not expected for call return"); - - SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), - InFlag); - Chain = Val.getValue(1); - InFlag = Val.getValue(2); - - switch (VA.getLocInfo()) { - default: llvm_unreachable("Unknown loc info!"); - case CCValAssign::Full: break; - case CCValAssign::BCvt: - Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); - break; - case CCValAssign::ZExt: - case CCValAssign::SExt: - case CCValAssign::AExt: - // Floating-point arguments only get extended/truncated if they're going - // in memory, so using the integer operation is acceptable here. - Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); - break; - } - - InVals.push_back(Val); - } - - return Chain; -} - -bool -AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, - CallingConv::ID CalleeCC, - bool IsVarArg, - bool IsCalleeStructRet, - bool IsCallerStructRet, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - SelectionDAG& DAG) const { - - // For CallingConv::C this function knows whether the ABI needs - // changing. That's not true for other conventions so they will have to opt in - // manually. - if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C) - return false; - - const MachineFunction &MF = DAG.getMachineFunction(); - const Function *CallerF = MF.getFunction(); - CallingConv::ID CallerCC = CallerF->getCallingConv(); - bool CCMatch = CallerCC == CalleeCC; - - // Byval parameters hand the function a pointer directly into the stack area - // we want to reuse during a tail call. Working around this *is* possible (see - // X86) but less efficient and uglier in LowerCall. - for (Function::const_arg_iterator i = CallerF->arg_begin(), - e = CallerF->arg_end(); i != e; ++i) - if (i->hasByValAttr()) - return false; - - if (getTargetMachine().Options.GuaranteedTailCallOpt) { - if (IsTailCallConvention(CalleeCC) && CCMatch) - return true; - return false; - } - - // Now we search for cases where we can use a tail call without changing the - // ABI. Sibcall is used in some places (particularly gcc) to refer to this - // concept. - - // I want anyone implementing a new calling convention to think long and hard - // about this assert. - assert((!IsVarArg || CalleeCC == CallingConv::C) - && "Unexpected variadic calling convention"); - - if (IsVarArg && !Outs.empty()) { - // At least two cases here: if caller is fastcc then we can't have any - // memory arguments (we'd be expected to clean up the stack afterwards). If - // caller is C then we could potentially use its argument area. - - // FIXME: for now we take the most conservative of these in both cases: - // disallow all variadic memory operands. - SmallVector ArgLocs; - CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); - - CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC)); - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) - if (!ArgLocs[i].isRegLoc()) - return false; - } - - // If the calling conventions do not match, then we'd better make sure the - // results are returned in the same way as what the caller expects. - if (!CCMatch) { - SmallVector RVLocs1; - CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), - getTargetMachine(), RVLocs1, *DAG.getContext()); - CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC)); - - SmallVector RVLocs2; - CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), - getTargetMachine(), RVLocs2, *DAG.getContext()); - CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC)); - - if (RVLocs1.size() != RVLocs2.size()) - return false; - for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { - if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) - return false; - if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) - return false; - if (RVLocs1[i].isRegLoc()) { - if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) - return false; - } else { - if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) - return false; - } - } - } - - // Nothing more to check if the callee is taking no arguments - if (Outs.empty()) - return true; - - SmallVector ArgLocs; - CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); - - CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC)); - - const AArch64MachineFunctionInfo *FuncInfo - = MF.getInfo(); - - // If the stack arguments for this call would fit into our own save area then - // the call can be made tail. - return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea(); -} - -bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC, - bool TailCallOpt) const { - return CallCC == CallingConv::Fast && TailCallOpt; -} - -bool AArch64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const { - return CallCC == CallingConv::Fast; -} - -SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain, - SelectionDAG &DAG, - MachineFrameInfo *MFI, - int ClobberedFI) const { - SmallVector ArgChains; - int64_t FirstByte = MFI->getObjectOffset(ClobberedFI); - int64_t LastByte = FirstByte + MFI->getObjectSize(ClobberedFI) - 1; - - // Include the original chain at the beginning of the list. When this is - // used by target LowerCall hooks, this helps legalize find the - // CALLSEQ_BEGIN node. - ArgChains.push_back(Chain); - - // Add a chain value for each stack argument corresponding - for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(), - UE = DAG.getEntryNode().getNode()->use_end(); U != UE; ++U) - if (LoadSDNode *L = dyn_cast(*U)) - if (FrameIndexSDNode *FI = dyn_cast(L->getBasePtr())) - if (FI->getIndex() < 0) { - int64_t InFirstByte = MFI->getObjectOffset(FI->getIndex()); - int64_t InLastByte = InFirstByte; - InLastByte += MFI->getObjectSize(FI->getIndex()) - 1; - - if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) || - (FirstByte <= InFirstByte && InFirstByte <= LastByte)) - ArgChains.push_back(SDValue(L, 1)); - } - - // Build a tokenfactor for all the chains. - return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains); -} - -static A64CC::CondCodes IntCCToA64CC(ISD::CondCode CC) { - switch (CC) { - case ISD::SETEQ: return A64CC::EQ; - case ISD::SETGT: return A64CC::GT; - case ISD::SETGE: return A64CC::GE; - case ISD::SETLT: return A64CC::LT; - case ISD::SETLE: return A64CC::LE; - case ISD::SETNE: return A64CC::NE; - case ISD::SETUGT: return A64CC::HI; - case ISD::SETUGE: return A64CC::HS; - case ISD::SETULT: return A64CC::LO; - case ISD::SETULE: return A64CC::LS; - default: llvm_unreachable("Unexpected condition code"); - } -} - -bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Val) const { - // icmp is implemented using adds/subs immediate, which take an unsigned - // 12-bit immediate, optionally shifted left by 12 bits. - - // Symmetric by using adds/subs - if (Val < 0) - Val = -Val; - - return (Val & ~0xfff) == 0 || (Val & ~0xfff000) == 0; -} - -SDValue AArch64TargetLowering::getSelectableIntSetCC(SDValue LHS, SDValue RHS, - ISD::CondCode CC, SDValue &A64cc, - SelectionDAG &DAG, SDLoc &dl) const { - if (ConstantSDNode *RHSC = dyn_cast(RHS.getNode())) { - int64_t C = 0; - EVT VT = RHSC->getValueType(0); - bool knownInvalid = false; - - // I'm not convinced the rest of LLVM handles these edge cases properly, but - // we can at least get it right. - if (isSignedIntSetCC(CC)) { - C = RHSC->getSExtValue(); - } else if (RHSC->getZExtValue() > INT64_MAX) { - // A 64-bit constant not representable by a signed 64-bit integer is far - // too big to fit into a SUBS immediate anyway. - knownInvalid = true; - } else { - C = RHSC->getZExtValue(); - } - - if (!knownInvalid && !isLegalICmpImmediate(C)) { - // Constant does not fit, try adjusting it by one? - switch (CC) { - default: break; - case ISD::SETLT: - case ISD::SETGE: - if (isLegalICmpImmediate(C-1)) { - CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; - RHS = DAG.getConstant(C-1, VT); - } - break; - case ISD::SETULT: - case ISD::SETUGE: - if (isLegalICmpImmediate(C-1)) { - CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; - RHS = DAG.getConstant(C-1, VT); - } - break; - case ISD::SETLE: - case ISD::SETGT: - if (isLegalICmpImmediate(C+1)) { - CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; - RHS = DAG.getConstant(C+1, VT); - } - break; - case ISD::SETULE: - case ISD::SETUGT: - if (isLegalICmpImmediate(C+1)) { - CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; - RHS = DAG.getConstant(C+1, VT); - } - break; - } - } - } - - A64CC::CondCodes CondCode = IntCCToA64CC(CC); - A64cc = DAG.getConstant(CondCode, MVT::i32); - return DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, - DAG.getCondCode(CC)); -} - -static A64CC::CondCodes FPCCToA64CC(ISD::CondCode CC, - A64CC::CondCodes &Alternative) { - A64CC::CondCodes CondCode = A64CC::Invalid; - Alternative = A64CC::Invalid; - - switch (CC) { - default: llvm_unreachable("Unknown FP condition!"); - case ISD::SETEQ: - case ISD::SETOEQ: CondCode = A64CC::EQ; break; - case ISD::SETGT: - case ISD::SETOGT: CondCode = A64CC::GT; break; - case ISD::SETGE: - case ISD::SETOGE: CondCode = A64CC::GE; break; - case ISD::SETOLT: CondCode = A64CC::MI; break; - case ISD::SETOLE: CondCode = A64CC::LS; break; - case ISD::SETONE: CondCode = A64CC::MI; Alternative = A64CC::GT; break; - case ISD::SETO: CondCode = A64CC::VC; break; - case ISD::SETUO: CondCode = A64CC::VS; break; - case ISD::SETUEQ: CondCode = A64CC::EQ; Alternative = A64CC::VS; break; - case ISD::SETUGT: CondCode = A64CC::HI; break; - case ISD::SETUGE: CondCode = A64CC::PL; break; - case ISD::SETLT: - case ISD::SETULT: CondCode = A64CC::LT; break; - case ISD::SETLE: - case ISD::SETULE: CondCode = A64CC::LE; break; - case ISD::SETNE: - case ISD::SETUNE: CondCode = A64CC::NE; break; - } - return CondCode; -} - -SDValue -AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { - SDLoc DL(Op); - EVT PtrVT = getPointerTy(); - const BlockAddress *BA = cast(Op)->getBlockAddress(); - - switch(getTargetMachine().getCodeModel()) { - case CodeModel::Small: - // The most efficient code is PC-relative anyway for the small memory model, - // so we don't need to worry about relocation model. - return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, - DAG.getTargetBlockAddress(BA, PtrVT, 0, - AArch64II::MO_NO_FLAG), - DAG.getTargetBlockAddress(BA, PtrVT, 0, - AArch64II::MO_LO12), - DAG.getConstant(/*Alignment=*/ 4, MVT::i32)); - case CodeModel::Large: - return DAG.getNode( - AArch64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G3), - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G2_NC), - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G1_NC), - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G0_NC)); - default: - llvm_unreachable("Only small and large code models supported now"); - } -} - - -// (BRCOND chain, val, dest) -SDValue -AArch64TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); - SDValue Chain = Op.getOperand(0); - SDValue TheBit = Op.getOperand(1); - SDValue DestBB = Op.getOperand(2); - - // AArch64 BooleanContents is the default UndefinedBooleanContent, which means - // that as the consumer we are responsible for ignoring rubbish in higher - // bits. - TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit, - DAG.getConstant(1, MVT::i32)); - - SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit, - DAG.getConstant(0, TheBit.getValueType()), - DAG.getCondCode(ISD::SETNE)); - - return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, Chain, - A64CMP, DAG.getConstant(A64CC::NE, MVT::i32), - DestBB); -} - -// (BR_CC chain, condcode, lhs, rhs, dest) -SDValue -AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); - SDValue Chain = Op.getOperand(0); - ISD::CondCode CC = cast(Op.getOperand(1))->get(); - SDValue LHS = Op.getOperand(2); - SDValue RHS = Op.getOperand(3); - SDValue DestBB = Op.getOperand(4); - - if (LHS.getValueType() == MVT::f128) { - // f128 comparisons are lowered to runtime calls by a routine which sets - // LHS, RHS and CC appropriately for the rest of this function to continue. - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); - - // If softenSetCCOperands returned a scalar, we need to compare the result - // against zero to select between true and false values. - if (!RHS.getNode()) { - RHS = DAG.getConstant(0, LHS.getValueType()); - CC = ISD::SETNE; - } - } - - if (LHS.getValueType().isInteger()) { - SDValue A64cc; - - // Integers are handled in a separate function because the combinations of - // immediates and tests can get hairy and we may want to fiddle things. - SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl); - - return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, - Chain, CmpOp, A64cc, DestBB); - } - - // Note that some LLVM floating-point CondCodes can't be lowered to a single - // conditional branch, hence FPCCToA64CC can set a second test, where either - // passing is sufficient. - A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; - CondCode = FPCCToA64CC(CC, Alternative); - SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); - SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, - DAG.getCondCode(CC)); - SDValue A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, - Chain, SetCC, A64cc, DestBB); - - if (Alternative != A64CC::Invalid) { - A64cc = DAG.getConstant(Alternative, MVT::i32); - A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, - A64BR_CC, SetCC, A64cc, DestBB); - - } - - return A64BR_CC; -} - -SDValue -AArch64TargetLowering::LowerF128ToCall(SDValue Op, SelectionDAG &DAG, - RTLIB::Libcall Call) const { - ArgListTy Args; - ArgListEntry Entry; - for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { - EVT ArgVT = Op.getOperand(i).getValueType(); - Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - Entry.Node = Op.getOperand(i); Entry.Ty = ArgTy; - Entry.isSExt = false; - Entry.isZExt = false; - Args.push_back(Entry); - } - SDValue Callee = DAG.getExternalSymbol(getLibcallName(Call), getPointerTy()); - - Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext()); - - // By default, the input chain to this libcall is the entry node of the - // function. If the libcall is going to be emitted as a tail call then - // isUsedByReturnOnly will change it to the right chain if the return - // node which is being folded has a non-entry input chain. - SDValue InChain = DAG.getEntryNode(); - - // isTailCall may be true since the callee does not reference caller stack - // frame. Check if it's in the right position. - SDValue TCChain = InChain; - bool isTailCall = isInTailCallPosition(DAG, Op.getNode(), TCChain); - if (isTailCall) - InChain = TCChain; - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(SDLoc(Op)).setChain(InChain) - .setCallee(getLibcallCallingConv(Call), RetTy, Callee, &Args, 0) - .setTailCall(isTailCall); - - std::pair CallInfo = LowerCallTo(CLI); - - if (!CallInfo.second.getNode()) - // It's a tailcall, return the chain (which is the DAG root). - return DAG.getRoot(); - - return CallInfo.first; -} - -SDValue -AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { - if (Op.getOperand(0).getValueType() != MVT::f128) { - // It's legal except when f128 is involved - return Op; - } - - RTLIB::Libcall LC; - LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType()); - - SDValue SrcVal = Op.getOperand(0); - return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, - /*isSigned*/ false, SDLoc(Op)).first; -} - -SDValue -AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { - assert(Op.getValueType() == MVT::f128 && "Unexpected lowering"); - - RTLIB::Libcall LC; - LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType()); - - return LowerF128ToCall(Op, DAG, LC); -} - -static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG, - bool IsSigned) { - SDLoc dl(Op); - EVT VT = Op.getValueType(); - SDValue Vec = Op.getOperand(0); - EVT OpVT = Vec.getValueType(); - unsigned Opc = IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT; - - if (VT.getVectorNumElements() == 1) { - assert(OpVT == MVT::v1f64 && "Unexpected vector type!"); - if (VT.getSizeInBits() == OpVT.getSizeInBits()) - return Op; - return DAG.UnrollVectorOp(Op.getNode()); - } - - if (VT.getSizeInBits() > OpVT.getSizeInBits()) { - assert(Vec.getValueType() == MVT::v2f32 && VT == MVT::v2i64 && - "Unexpected vector type!"); - Vec = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, Vec); - return DAG.getNode(Opc, dl, VT, Vec); - } else if (VT.getSizeInBits() < OpVT.getSizeInBits()) { - EVT CastVT = EVT::getIntegerVT(*DAG.getContext(), - OpVT.getVectorElementType().getSizeInBits()); - CastVT = - EVT::getVectorVT(*DAG.getContext(), CastVT, VT.getVectorNumElements()); - Vec = DAG.getNode(Opc, dl, CastVT, Vec); - return DAG.getNode(ISD::TRUNCATE, dl, VT, Vec); - } - return DAG.getNode(Opc, dl, VT, Vec); -} - -static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { - // We custom lower concat_vectors with 4, 8, or 16 operands that are all the - // same operand and of type v1* using the DUP instruction. - unsigned NumOps = Op->getNumOperands(); - if (NumOps == 2) { - assert(Op.getValueType().getSizeInBits() == 128 && "unexpected concat"); - return Op; - } - - if (NumOps != 4 && NumOps != 8 && NumOps != 16) - return SDValue(); - - // Must be a single value for VDUP. - SDValue Op0 = Op.getOperand(0); - for (unsigned i = 1; i < NumOps; ++i) { - SDValue OpN = Op.getOperand(i); - if (Op0 != OpN) - return SDValue(); - } - - // Verify the value type. - EVT EltVT = Op0.getValueType(); - switch (NumOps) { - default: llvm_unreachable("Unexpected number of operands"); - case 4: - if (EltVT != MVT::v1i16 && EltVT != MVT::v1i32) - return SDValue(); - break; - case 8: - if (EltVT != MVT::v1i8 && EltVT != MVT::v1i16) - return SDValue(); - break; - case 16: - if (EltVT != MVT::v1i8) - return SDValue(); - break; - } - - SDLoc DL(Op); - EVT VT = Op.getValueType(); - // VDUP produces better code for constants. - if (Op0->getOpcode() == ISD::BUILD_VECTOR) - return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Op0->getOperand(0)); - return DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, Op0, - DAG.getConstant(0, MVT::i64)); -} - -SDValue -AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, - bool IsSigned) const { - if (Op.getValueType().isVector()) - return LowerVectorFP_TO_INT(Op, DAG, IsSigned); - if (Op.getOperand(0).getValueType() != MVT::f128) { - // It's legal except when f128 is involved - return Op; - } - - RTLIB::Libcall LC; - if (IsSigned) - LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType()); - else - LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType()); - - return LowerF128ToCall(Op, DAG, LC); -} - -SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ - MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MFI->setReturnAddressIsTaken(true); - - if (verifyReturnAddressArgumentIsConstant(Op, DAG)) - return SDValue(); - - EVT VT = Op.getValueType(); - SDLoc dl(Op); - unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); - if (Depth) { - SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); - SDValue Offset = DAG.getConstant(8, MVT::i64); - return DAG.getLoad(VT, dl, DAG.getEntryNode(), - DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), - MachinePointerInfo(), false, false, false, 0); - } - - // Return X30, which contains the return address. Mark it an implicit live-in. - unsigned Reg = MF.addLiveIn(AArch64::X30, getRegClassFor(MVT::i64)); - return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, MVT::i64); -} - - -SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) - const { - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); - MFI->setFrameAddressIsTaken(true); - - EVT VT = Op.getValueType(); - SDLoc dl(Op); - unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); - unsigned FrameReg = AArch64::X29; - SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); - while (Depth--) - FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, - MachinePointerInfo(), - false, false, false, 0); - return FrameAddr; -} - -// FIXME? Maybe this could be a TableGen attribute on some registers and -// this table could be generated automatically from RegInfo. -unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, - EVT VT) const { - unsigned Reg = StringSwitch(RegName) - .Case("sp", AArch64::XSP) - .Default(0); - if (Reg) - return Reg; - report_fatal_error("Invalid register name global variable"); -} - -SDValue -AArch64TargetLowering::LowerGlobalAddressELFLarge(SDValue Op, - SelectionDAG &DAG) const { - assert(getTargetMachine().getCodeModel() == CodeModel::Large); - assert(getTargetMachine().getRelocationModel() == Reloc::Static); - - EVT PtrVT = getPointerTy(); - SDLoc dl(Op); - const GlobalAddressSDNode *GN = cast(Op); - const GlobalValue *GV = GN->getGlobal(); - - SDValue GlobalAddr = DAG.getNode( - AArch64ISD::WrapperLarge, dl, PtrVT, - DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G3), - DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G2_NC), - DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G1_NC), - DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G0_NC)); - - if (GN->getOffset() != 0) - return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr, - DAG.getConstant(GN->getOffset(), PtrVT)); - - return GlobalAddr; -} - -SDValue -AArch64TargetLowering::LowerGlobalAddressELFSmall(SDValue Op, - SelectionDAG &DAG) const { - assert(getTargetMachine().getCodeModel() == CodeModel::Small); - - EVT PtrVT = getPointerTy(); - SDLoc dl(Op); - const GlobalAddressSDNode *GN = cast(Op); - const GlobalValue *GV = GN->getGlobal(); - unsigned Alignment = GV->getAlignment(); - Reloc::Model RelocM = getTargetMachine().getRelocationModel(); - if (GV->isWeakForLinker() && GV->isDeclaration() && RelocM == Reloc::Static) { - // Weak undefined symbols can't use ADRP/ADD pair since they should evaluate - // to zero when they remain undefined. In PIC mode the GOT can take care of - // this, but in absolute mode we use a constant pool load. - SDValue PoolAddr; - PoolAddr = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT, - DAG.getTargetConstantPool(GV, PtrVT, 0, 0, - AArch64II::MO_NO_FLAG), - DAG.getTargetConstantPool(GV, PtrVT, 0, 0, - AArch64II::MO_LO12), - DAG.getConstant(8, MVT::i32)); - SDValue GlobalAddr = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr, - MachinePointerInfo::getConstantPool(), - /*isVolatile=*/ false, - /*isNonTemporal=*/ true, - /*isInvariant=*/ true, 8); - if (GN->getOffset() != 0) - return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr, - DAG.getConstant(GN->getOffset(), PtrVT)); - - return GlobalAddr; - } - - if (Alignment == 0) { - const PointerType *GVPtrTy = cast(GV->getType()); - if (GVPtrTy->getElementType()->isSized()) { - Alignment - = getDataLayout()->getABITypeAlignment(GVPtrTy->getElementType()); - } else { - // Be conservative if we can't guess, not that it really matters: - // functions and labels aren't valid for loads, and the methods used to - // actually calculate an address work with any alignment. - Alignment = 1; - } - } - - unsigned char HiFixup, LoFixup; - bool UseGOT = getSubtarget()->GVIsIndirectSymbol(GV, RelocM); - - if (UseGOT) { - HiFixup = AArch64II::MO_GOT; - LoFixup = AArch64II::MO_GOT_LO12; - Alignment = 8; - } else { - HiFixup = AArch64II::MO_NO_FLAG; - LoFixup = AArch64II::MO_LO12; - } - - // AArch64's small model demands the following sequence: - // ADRP x0, somewhere - // ADD x0, x0, #:lo12:somewhere ; (or LDR directly). - SDValue GlobalRef = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT, - DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - HiFixup), - DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - LoFixup), - DAG.getConstant(Alignment, MVT::i32)); - - if (UseGOT) { - GlobalRef = DAG.getNode(AArch64ISD::GOTLoad, dl, PtrVT, DAG.getEntryNode(), - GlobalRef); - } - - if (GN->getOffset() != 0) - return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalRef, - DAG.getConstant(GN->getOffset(), PtrVT)); - - return GlobalRef; -} - -SDValue -AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op, - SelectionDAG &DAG) const { - // TableGen doesn't have easy access to the CodeModel or RelocationModel, so - // we make those distinctions here. - - switch (getTargetMachine().getCodeModel()) { - case CodeModel::Small: - return LowerGlobalAddressELFSmall(Op, DAG); - case CodeModel::Large: - return LowerGlobalAddressELFLarge(Op, DAG); - default: - llvm_unreachable("Only small and large code models supported now"); - } -} - -SDValue -AArch64TargetLowering::LowerConstantPool(SDValue Op, - SelectionDAG &DAG) const { - SDLoc DL(Op); - EVT PtrVT = getPointerTy(); - ConstantPoolSDNode *CN = cast(Op); - const Constant *C = CN->getConstVal(); - - switch(getTargetMachine().getCodeModel()) { - case CodeModel::Small: - // The most efficient code is PC-relative anyway for the small memory model, - // so we don't need to worry about relocation model. - return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, - DAG.getTargetConstantPool(C, PtrVT, 0, 0, - AArch64II::MO_NO_FLAG), - DAG.getTargetConstantPool(C, PtrVT, 0, 0, - AArch64II::MO_LO12), - DAG.getConstant(CN->getAlignment(), MVT::i32)); - case CodeModel::Large: - return DAG.getNode( - AArch64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G3), - DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC), - DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC), - DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC)); - default: - llvm_unreachable("Only small and large code models supported now"); - } -} - -SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr, - SDValue DescAddr, - SDLoc DL, - SelectionDAG &DAG) const { - EVT PtrVT = getPointerTy(); - - // The function we need to call is simply the first entry in the GOT for this - // descriptor, load it in preparation. - SDValue Func, Chain; - Func = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(), - DescAddr); - - // The function takes only one argument: the address of the descriptor itself - // in X0. - SDValue Glue; - Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue); - Glue = Chain.getValue(1); - - // Finally, there's a special calling-convention which means that the lookup - // must preserve all registers (except X0, obviously). - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const AArch64RegisterInfo *A64RI - = static_cast(TRI); - const uint32_t *Mask = A64RI->getTLSDescCallPreservedMask(); - - // We're now ready to populate the argument list, as with a normal call: - std::vector Ops; - Ops.push_back(Chain); - Ops.push_back(Func); - Ops.push_back(SymAddr); - Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT)); - Ops.push_back(DAG.getRegisterMask(Mask)); - Ops.push_back(Glue); - - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - Chain = DAG.getNode(AArch64ISD::TLSDESCCALL, DL, NodeTys, Ops); - Glue = Chain.getValue(1); - - // After the call, the offset from TPIDR_EL0 is in X0, copy it out and pass it - // back to the generic handling code. - return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue); -} - -SDValue -AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, - SelectionDAG &DAG) const { - assert(getSubtarget()->isTargetELF() && - "TLS not implemented for non-ELF targets"); - assert(getTargetMachine().getCodeModel() == CodeModel::Small - && "TLS only supported in small memory model"); - const GlobalAddressSDNode *GA = cast(Op); - - TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal()); - - SDValue TPOff; - EVT PtrVT = getPointerTy(); - SDLoc DL(Op); - const GlobalValue *GV = GA->getGlobal(); - - SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT); - - if (Model == TLSModel::InitialExec) { - TPOff = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, - AArch64II::MO_GOTTPREL), - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, - AArch64II::MO_GOTTPREL_LO12), - DAG.getConstant(8, MVT::i32)); - TPOff = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(), - TPOff); - } else if (Model == TLSModel::LocalExec) { - SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, - AArch64II::MO_TPREL_G1); - SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, - AArch64II::MO_TPREL_G0_NC); - - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar, - DAG.getTargetConstant(1, MVT::i32)), 0); - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT, - TPOff, LoVar, - DAG.getTargetConstant(0, MVT::i32)), 0); - } else if (Model == TLSModel::GeneralDynamic) { - // Accesses used in this sequence go via the TLS descriptor which lives in - // the GOT. Prepare an address we can use to handle this. - SDValue HiDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, - AArch64II::MO_TLSDESC); - SDValue LoDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, - AArch64II::MO_TLSDESC_LO12); - SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, - HiDesc, LoDesc, - DAG.getConstant(8, MVT::i32)); - SDValue SymAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0); - - TPOff = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG); - } else if (Model == TLSModel::LocalDynamic) { - // Local-dynamic accesses proceed in two phases. A general-dynamic TLS - // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate - // the beginning of the module's TLS region, followed by a DTPREL offset - // calculation. - - // These accesses will need deduplicating if there's more than one. - AArch64MachineFunctionInfo* MFI = DAG.getMachineFunction() - .getInfo(); - MFI->incNumLocalDynamicTLSAccesses(); - - - // Get the location of _TLS_MODULE_BASE_: - SDValue HiDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, - AArch64II::MO_TLSDESC); - SDValue LoDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, - AArch64II::MO_TLSDESC_LO12); - SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, - HiDesc, LoDesc, - DAG.getConstant(8, MVT::i32)); - SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT); - - ThreadBase = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG); - - // Get the variable's offset from _TLS_MODULE_BASE_ - SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, - AArch64II::MO_DTPREL_G1); - SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, - AArch64II::MO_DTPREL_G0_NC); - - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar, - DAG.getTargetConstant(0, MVT::i32)), 0); - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT, - TPOff, LoVar, - DAG.getTargetConstant(0, MVT::i32)), 0); - } else - llvm_unreachable("Unsupported TLS access model"); - - - return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff); -} - -static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG, - bool IsSigned) { - SDLoc dl(Op); - EVT VT = Op.getValueType(); - SDValue Vec = Op.getOperand(0); - unsigned Opc = IsSigned ? ISD::SINT_TO_FP : ISD::UINT_TO_FP; - - if (VT.getVectorNumElements() == 1) { - assert(VT == MVT::v1f64 && "Unexpected vector type!"); - if (VT.getSizeInBits() == Vec.getValueSizeInBits()) - return Op; - return DAG.UnrollVectorOp(Op.getNode()); - } - - if (VT.getSizeInBits() < Vec.getValueSizeInBits()) { - assert(Vec.getValueType() == MVT::v2i64 && VT == MVT::v2f32 && - "Unexpected vector type!"); - Vec = DAG.getNode(Opc, dl, MVT::v2f64, Vec); - return DAG.getNode(ISD::FP_ROUND, dl, VT, Vec, DAG.getIntPtrConstant(0)); - } else if (VT.getSizeInBits() > Vec.getValueSizeInBits()) { - unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; - EVT CastVT = EVT::getIntegerVT(*DAG.getContext(), - VT.getVectorElementType().getSizeInBits()); - CastVT = - EVT::getVectorVT(*DAG.getContext(), CastVT, VT.getVectorNumElements()); - Vec = DAG.getNode(CastOpc, dl, CastVT, Vec); - } - - return DAG.getNode(Opc, dl, VT, Vec); -} - -SDValue -AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, - bool IsSigned) const { - if (Op.getValueType().isVector()) - return LowerVectorINT_TO_FP(Op, DAG, IsSigned); - if (Op.getValueType() != MVT::f128) { - // Legal for everything except f128. - return Op; - } - - RTLIB::Libcall LC; - if (IsSigned) - LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); - else - LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); - - return LowerF128ToCall(Op, DAG, LC); -} - - -SDValue -AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { - JumpTableSDNode *JT = cast(Op); - SDLoc dl(JT); - EVT PtrVT = getPointerTy(); - - // When compiling PIC, jump tables get put in the code section so a static - // relocation-style is acceptable for both cases. - switch (getTargetMachine().getCodeModel()) { - case CodeModel::Small: - return DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT, - DAG.getTargetJumpTable(JT->getIndex(), PtrVT), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, - AArch64II::MO_LO12), - DAG.getConstant(1, MVT::i32)); - case CodeModel::Large: - return DAG.getNode( - AArch64ISD::WrapperLarge, dl, PtrVT, - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G3), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G2_NC), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G1_NC), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G0_NC)); - default: - llvm_unreachable("Only small and large code models supported now"); - } -} - -// (SELECT testbit, iftrue, iffalse) -SDValue -AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); - SDValue TheBit = Op.getOperand(0); - SDValue IfTrue = Op.getOperand(1); - SDValue IfFalse = Op.getOperand(2); - - // AArch64 BooleanContents is the default UndefinedBooleanContent, which means - // that as the consumer we are responsible for ignoring rubbish in higher - // bits. - TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit, - DAG.getConstant(1, MVT::i32)); - SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit, - DAG.getConstant(0, TheBit.getValueType()), - DAG.getCondCode(ISD::SETNE)); - - return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), - A64CMP, IfTrue, IfFalse, - DAG.getConstant(A64CC::NE, MVT::i32)); -} - -static SDValue LowerVectorSETCC(SDValue Op, SelectionDAG &DAG) { - SDLoc DL(Op); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - ISD::CondCode CC = cast(Op.getOperand(2))->get(); - EVT VT = Op.getValueType(); - bool Invert = false; - SDValue Op0, Op1; - unsigned Opcode; - - if (LHS.getValueType().isInteger()) { - - // Attempt to use Vector Integer Compare Mask Test instruction. - // TST = icmp ne (and (op0, op1), zero). - if (CC == ISD::SETNE) { - if (((LHS.getOpcode() == ISD::AND) && - ISD::isBuildVectorAllZeros(RHS.getNode())) || - ((RHS.getOpcode() == ISD::AND) && - ISD::isBuildVectorAllZeros(LHS.getNode()))) { - - SDValue AndOp = (LHS.getOpcode() == ISD::AND) ? LHS : RHS; - SDValue NewLHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(0)); - SDValue NewRHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(1)); - return DAG.getNode(AArch64ISD::NEON_TST, DL, VT, NewLHS, NewRHS); - } - } - - // Attempt to use Vector Integer Compare Mask against Zero instr (Signed). - // Note: Compare against Zero does not support unsigned predicates. - if ((ISD::isBuildVectorAllZeros(RHS.getNode()) || - ISD::isBuildVectorAllZeros(LHS.getNode())) && - !isUnsignedIntSetCC(CC)) { - - // If LHS is the zero value, swap operands and CondCode. - if (ISD::isBuildVectorAllZeros(LHS.getNode())) { - CC = getSetCCSwappedOperands(CC); - Op0 = RHS; - } else - Op0 = LHS; - - // Ensure valid CondCode for Compare Mask against Zero instruction: - // EQ, GE, GT, LE, LT. - if (ISD::SETNE == CC) { - Invert = true; - CC = ISD::SETEQ; - } - - // Using constant type to differentiate integer and FP compares with zero. - Op1 = DAG.getConstant(0, MVT::i32); - Opcode = AArch64ISD::NEON_CMPZ; - - } else { - // Attempt to use Vector Integer Compare Mask instr (Signed/Unsigned). - // Ensure valid CondCode for Compare Mask instr: EQ, GE, GT, UGE, UGT. - bool Swap = false; - switch (CC) { - default: - llvm_unreachable("Illegal integer comparison."); - case ISD::SETEQ: - case ISD::SETGT: - case ISD::SETGE: - case ISD::SETUGT: - case ISD::SETUGE: - break; - case ISD::SETNE: - Invert = true; - CC = ISD::SETEQ; - break; - case ISD::SETULT: - case ISD::SETULE: - case ISD::SETLT: - case ISD::SETLE: - Swap = true; - CC = getSetCCSwappedOperands(CC); - } - - if (Swap) - std::swap(LHS, RHS); - - Opcode = AArch64ISD::NEON_CMP; - Op0 = LHS; - Op1 = RHS; - } - - // Generate Compare Mask instr or Compare Mask against Zero instr. - SDValue NeonCmp = - DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC)); - - if (Invert) - NeonCmp = DAG.getNOT(DL, NeonCmp, VT); - - return NeonCmp; - } - - // Now handle Floating Point cases. - // Attempt to use Vector Floating Point Compare Mask against Zero instruction. - if (ISD::isBuildVectorAllZeros(RHS.getNode()) || - ISD::isBuildVectorAllZeros(LHS.getNode())) { - - // If LHS is the zero value, swap operands and CondCode. - if (ISD::isBuildVectorAllZeros(LHS.getNode())) { - CC = getSetCCSwappedOperands(CC); - Op0 = RHS; - } else - Op0 = LHS; - - // Using constant type to differentiate integer and FP compares with zero. - Op1 = DAG.getConstantFP(0, MVT::f32); - Opcode = AArch64ISD::NEON_CMPZ; - } else { - // Attempt to use Vector Floating Point Compare Mask instruction. - Op0 = LHS; - Op1 = RHS; - Opcode = AArch64ISD::NEON_CMP; - } - - SDValue NeonCmpAlt; - // Some register compares have to be implemented with swapped CC and operands, - // e.g.: OLT implemented as OGT with swapped operands. - bool SwapIfRegArgs = false; - - // Ensure valid CondCode for FP Compare Mask against Zero instruction: - // EQ, GE, GT, LE, LT. - // And ensure valid CondCode for FP Compare Mask instruction: EQ, GE, GT. - switch (CC) { - default: - llvm_unreachable("Illegal FP comparison"); - case ISD::SETUNE: - case ISD::SETNE: - Invert = true; // Fallthrough - case ISD::SETOEQ: - case ISD::SETEQ: - CC = ISD::SETEQ; - break; - case ISD::SETOLT: - case ISD::SETLT: - CC = ISD::SETLT; - SwapIfRegArgs = true; - break; - case ISD::SETOGT: - case ISD::SETGT: - CC = ISD::SETGT; - break; - case ISD::SETOLE: - case ISD::SETLE: - CC = ISD::SETLE; - SwapIfRegArgs = true; - break; - case ISD::SETOGE: - case ISD::SETGE: - CC = ISD::SETGE; - break; - case ISD::SETUGE: - Invert = true; - CC = ISD::SETLT; - SwapIfRegArgs = true; - break; - case ISD::SETULE: - Invert = true; - CC = ISD::SETGT; - break; - case ISD::SETUGT: - Invert = true; - CC = ISD::SETLE; - SwapIfRegArgs = true; - break; - case ISD::SETULT: - Invert = true; - CC = ISD::SETGE; - break; - case ISD::SETUEQ: - Invert = true; // Fallthrough - case ISD::SETONE: - // Expand this to (OGT |OLT). - NeonCmpAlt = - DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGT)); - CC = ISD::SETLT; - SwapIfRegArgs = true; - break; - case ISD::SETUO: - Invert = true; // Fallthrough - case ISD::SETO: - // Expand this to (OGE | OLT). - NeonCmpAlt = - DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGE)); - CC = ISD::SETLT; - SwapIfRegArgs = true; - break; - } - - if (Opcode == AArch64ISD::NEON_CMP && SwapIfRegArgs) { - CC = getSetCCSwappedOperands(CC); - std::swap(Op0, Op1); - } - - // Generate FP Compare Mask instr or FP Compare Mask against Zero instr - SDValue NeonCmp = DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC)); - - if (NeonCmpAlt.getNode()) - NeonCmp = DAG.getNode(ISD::OR, DL, VT, NeonCmp, NeonCmpAlt); - - if (Invert) - NeonCmp = DAG.getNOT(DL, NeonCmp, VT); - - return NeonCmp; -} - -// (SETCC lhs, rhs, condcode) -SDValue -AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - ISD::CondCode CC = cast(Op.getOperand(2))->get(); - EVT VT = Op.getValueType(); - - if (VT.isVector()) - return LowerVectorSETCC(Op, DAG); - - if (LHS.getValueType() == MVT::f128) { - // f128 comparisons will be lowered to libcalls giving a valid LHS and RHS - // for the rest of the function (some i32 or i64 values). - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); - - // If softenSetCCOperands returned a scalar, use it. - if (!RHS.getNode()) { - assert(LHS.getValueType() == Op.getValueType() && - "Unexpected setcc expansion!"); - return LHS; - } - } - - if (LHS.getValueType().isInteger()) { - SDValue A64cc; - - // Integers are handled in a separate function because the combinations of - // immediates and tests can get hairy and we may want to fiddle things. - SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl); - - return DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, - CmpOp, DAG.getConstant(1, VT), DAG.getConstant(0, VT), - A64cc); - } - - // Note that some LLVM floating-point CondCodes can't be lowered to a single - // conditional branch, hence FPCCToA64CC can set a second test, where either - // passing is sufficient. - A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; - CondCode = FPCCToA64CC(CC, Alternative); - SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); - SDValue CmpOp = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, - DAG.getCondCode(CC)); - SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, - CmpOp, DAG.getConstant(1, VT), - DAG.getConstant(0, VT), A64cc); - - if (Alternative != A64CC::Invalid) { - A64cc = DAG.getConstant(Alternative, MVT::i32); - A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp, - DAG.getConstant(1, VT), A64SELECT_CC, A64cc); - } - - return A64SELECT_CC; -} - -static SDValue LowerVectorSELECT_CC(SDValue Op, SelectionDAG &DAG) { - SDLoc dl(Op); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDValue IfTrue = Op.getOperand(2); - SDValue IfFalse = Op.getOperand(3); - EVT IfTrueVT = IfTrue.getValueType(); - EVT CondVT = IfTrueVT.changeVectorElementTypeToInteger(); - ISD::CondCode CC = cast(Op.getOperand(4))->get(); - - // If LHS & RHS are floating point and IfTrue & IfFalse are vectors, we will - // use NEON compare. - if ((LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64)) { - EVT EltVT = LHS.getValueType(); - unsigned EltNum = 128 / EltVT.getSizeInBits(); - EVT VT = EVT::getVectorVT(*DAG.getContext(), EltVT, EltNum); - unsigned SubConstant = - (LHS.getValueType() == MVT::f32) ? AArch64::sub_32 :AArch64::sub_64; - EVT CEltT = (LHS.getValueType() == MVT::f32) ? MVT::i32 : MVT::i64; - EVT CVT = EVT::getVectorVT(*DAG.getContext(), CEltT, EltNum); - - LHS - = SDValue(DAG.getMachineNode(TargetOpcode::SUBREG_TO_REG, dl, - VT, DAG.getTargetConstant(0, MVT::i32), LHS, - DAG.getTargetConstant(SubConstant, MVT::i32)), 0); - RHS - = SDValue(DAG.getMachineNode(TargetOpcode::SUBREG_TO_REG, dl, - VT, DAG.getTargetConstant(0, MVT::i32), RHS, - DAG.getTargetConstant(SubConstant, MVT::i32)), 0); - - SDValue VSetCC = DAG.getSetCC(dl, CVT, LHS, RHS, CC); - SDValue ResCC = LowerVectorSETCC(VSetCC, DAG); - if (CEltT.getSizeInBits() < IfTrueVT.getSizeInBits()) { - EVT DUPVT = - EVT::getVectorVT(*DAG.getContext(), CEltT, - IfTrueVT.getSizeInBits() / CEltT.getSizeInBits()); - ResCC = DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, DUPVT, ResCC, - DAG.getConstant(0, MVT::i64, false)); - - ResCC = DAG.getNode(ISD::BITCAST, dl, CondVT, ResCC); - } else { - // FIXME: If IfTrue & IfFalse hold v1i8, v1i16 or v1i32, this function - // can't handle them and will hit this assert. - assert(CEltT.getSizeInBits() == IfTrueVT.getSizeInBits() && - "Vector of IfTrue & IfFalse is too small."); - - unsigned ExEltNum = - EltNum * IfTrueVT.getSizeInBits() / ResCC.getValueSizeInBits(); - EVT ExVT = EVT::getVectorVT(*DAG.getContext(), CEltT, ExEltNum); - ResCC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ExVT, ResCC, - DAG.getConstant(0, MVT::i64, false)); - ResCC = DAG.getNode(ISD::BITCAST, dl, CondVT, ResCC); - } - SDValue VSelect = DAG.getNode(ISD::VSELECT, dl, IfTrue.getValueType(), - ResCC, IfTrue, IfFalse); - return VSelect; - } - - // Here we handle the case that LHS & RHS are integer and IfTrue & IfFalse are - // vectors. - A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; - CondCode = FPCCToA64CC(CC, Alternative); - SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); - SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, - DAG.getCondCode(CC)); - EVT SEVT = MVT::i32; - if (IfTrue.getValueType().getVectorElementType().getSizeInBits() > 32) - SEVT = MVT::i64; - SDValue AllOne = DAG.getConstant(-1, SEVT); - SDValue AllZero = DAG.getConstant(0, SEVT); - SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, SEVT, SetCC, - AllOne, AllZero, A64cc); - - if (Alternative != A64CC::Invalid) { - A64cc = DAG.getConstant(Alternative, MVT::i32); - A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), - SetCC, AllOne, A64SELECT_CC, A64cc); - } - SDValue VDup; - if (IfTrue.getValueType().getVectorNumElements() == 1) - VDup = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, CondVT, A64SELECT_CC); - else - VDup = DAG.getNode(AArch64ISD::NEON_VDUP, dl, CondVT, A64SELECT_CC); - SDValue VSelect = DAG.getNode(ISD::VSELECT, dl, IfTrue.getValueType(), - VDup, IfTrue, IfFalse); - return VSelect; -} - -// (SELECT_CC lhs, rhs, iftrue, iffalse, condcode) -SDValue -AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDValue IfTrue = Op.getOperand(2); - SDValue IfFalse = Op.getOperand(3); - ISD::CondCode CC = cast(Op.getOperand(4))->get(); - - if (IfTrue.getValueType().isVector()) - return LowerVectorSELECT_CC(Op, DAG); - - if (LHS.getValueType() == MVT::f128) { - // f128 comparisons are lowered to libcalls, but slot in nicely here - // afterwards. - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); - - // If softenSetCCOperands returned a scalar, we need to compare the result - // against zero to select between true and false values. - if (!RHS.getNode()) { - RHS = DAG.getConstant(0, LHS.getValueType()); - CC = ISD::SETNE; - } - } - - if (LHS.getValueType().isInteger()) { - SDValue A64cc; - - // Integers are handled in a separate function because the combinations of - // immediates and tests can get hairy and we may want to fiddle things. - SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl); - - return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), CmpOp, - IfTrue, IfFalse, A64cc); - } - - // Note that some LLVM floating-point CondCodes can't be lowered to a single - // conditional branch, hence FPCCToA64CC can set a second test, where either - // passing is sufficient. - A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; - CondCode = FPCCToA64CC(CC, Alternative); - SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); - SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, - DAG.getCondCode(CC)); - SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, - Op.getValueType(), - SetCC, IfTrue, IfFalse, A64cc); - - if (Alternative != A64CC::Invalid) { - A64cc = DAG.getConstant(Alternative, MVT::i32); - A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), - SetCC, IfTrue, A64SELECT_CC, A64cc); - - } - - return A64SELECT_CC; -} - -SDValue -AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { - const Value *DestSV = cast(Op.getOperand(3))->getValue(); - const Value *SrcSV = cast(Op.getOperand(4))->getValue(); - - // We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes - // rather than just 8. - return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op), - Op.getOperand(1), Op.getOperand(2), - DAG.getConstant(32, MVT::i32), 8, false, false, - MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV)); -} - -SDValue -AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { - // The layout of the va_list struct is specified in the AArch64 Procedure Call - // Standard, section B.3. - MachineFunction &MF = DAG.getMachineFunction(); - AArch64MachineFunctionInfo *FuncInfo - = MF.getInfo(); - SDLoc DL(Op); - - SDValue Chain = Op.getOperand(0); - SDValue VAList = Op.getOperand(1); - const Value *SV = cast(Op.getOperand(2))->getValue(); - SmallVector MemOps; - - // void *__stack at offset 0 - SDValue Stack = DAG.getFrameIndex(FuncInfo->getVariadicStackIdx(), - getPointerTy()); - MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList, - MachinePointerInfo(SV), false, false, 0)); - - // void *__gr_top at offset 8 - int GPRSize = FuncInfo->getVariadicGPRSize(); - if (GPRSize > 0) { - SDValue GRTop, GRTopAddr; - - GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(8, getPointerTy())); - - GRTop = DAG.getFrameIndex(FuncInfo->getVariadicGPRIdx(), getPointerTy()); - GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop, - DAG.getConstant(GPRSize, getPointerTy())); - - MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr, - MachinePointerInfo(SV, 8), - false, false, 0)); - } - - // void *__vr_top at offset 16 - int FPRSize = FuncInfo->getVariadicFPRSize(); - if (FPRSize > 0) { - SDValue VRTop, VRTopAddr; - VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(16, getPointerTy())); - - VRTop = DAG.getFrameIndex(FuncInfo->getVariadicFPRIdx(), getPointerTy()); - VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop, - DAG.getConstant(FPRSize, getPointerTy())); - - MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr, - MachinePointerInfo(SV, 16), - false, false, 0)); - } - - // int __gr_offs at offset 24 - SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(24, getPointerTy())); - MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32), - GROffsAddr, MachinePointerInfo(SV, 24), - false, false, 0)); - - // int __vr_offs at offset 28 - SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(28, getPointerTy())); - MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32), - VROffsAddr, MachinePointerInfo(SV, 28), - false, false, 0)); - - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); -} - -SDValue -AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { - switch (Op.getOpcode()) { - default: llvm_unreachable("Don't know how to custom lower this!"); - case ISD::FADD: return LowerF128ToCall(Op, DAG, RTLIB::ADD_F128); - case ISD::FSUB: return LowerF128ToCall(Op, DAG, RTLIB::SUB_F128); - case ISD::FMUL: return LowerF128ToCall(Op, DAG, RTLIB::MUL_F128); - case ISD::FDIV: return LowerF128ToCall(Op, DAG, RTLIB::DIV_F128); - case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, true); - case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG, false); - case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG, true); - case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG, false); - case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); - case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); - case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); - case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); - - case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); - case ISD::SRL_PARTS: - case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); - - case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); - case ISD::BRCOND: return LowerBRCOND(Op, DAG); - case ISD::BR_CC: return LowerBR_CC(Op, DAG); - case ISD::GlobalAddress: return LowerGlobalAddressELF(Op, DAG); - case ISD::ConstantPool: return LowerConstantPool(Op, DAG); - case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); - case ISD::JumpTable: return LowerJumpTable(Op, DAG); - case ISD::SELECT: return LowerSELECT(Op, DAG); - case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); - case ISD::SETCC: return LowerSETCC(Op, DAG); - case ISD::VACOPY: return LowerVACOPY(Op, DAG); - case ISD::VASTART: return LowerVASTART(Op, DAG); - case ISD::BUILD_VECTOR: - return LowerBUILD_VECTOR(Op, DAG, getSubtarget()); - case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); - case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); - } - - return SDValue(); -} - -/// Check if the specified splat value corresponds to a valid vector constant -/// for a Neon instruction with a "modified immediate" operand (e.g., MOVI). If -/// so, return the encoded 8-bit immediate and the OpCmode instruction fields -/// values. -static bool isNeonModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, - unsigned SplatBitSize, SelectionDAG &DAG, - bool is128Bits, NeonModImmType type, EVT &VT, - unsigned &Imm, unsigned &OpCmode) { - switch (SplatBitSize) { - default: - llvm_unreachable("unexpected size for isNeonModifiedImm"); - case 8: { - if (type != Neon_Mov_Imm) - return false; - assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); - // Neon movi per byte: Op=0, Cmode=1110. - OpCmode = 0xe; - Imm = SplatBits; - VT = is128Bits ? MVT::v16i8 : MVT::v8i8; - break; - } - case 16: { - // Neon move inst per halfword - VT = is128Bits ? MVT::v8i16 : MVT::v4i16; - if ((SplatBits & ~0xff) == 0) { - // Value = 0x00nn is 0x00nn LSL 0 - // movi: Op=0, Cmode=1000; mvni: Op=1, Cmode=1000 - // bic: Op=1, Cmode=1001; orr: Op=0, Cmode=1001 - // Op=x, Cmode=100y - Imm = SplatBits; - OpCmode = 0x8; - break; - } - if ((SplatBits & ~0xff00) == 0) { - // Value = 0xnn00 is 0x00nn LSL 8 - // movi: Op=0, Cmode=1010; mvni: Op=1, Cmode=1010 - // bic: Op=1, Cmode=1011; orr: Op=0, Cmode=1011 - // Op=x, Cmode=101x - Imm = SplatBits >> 8; - OpCmode = 0xa; - break; - } - // can't handle any other - return false; - } - - case 32: { - // First the LSL variants (MSL is unusable by some interested instructions). - - // Neon move instr per word, shift zeros - VT = is128Bits ? MVT::v4i32 : MVT::v2i32; - if ((SplatBits & ~0xff) == 0) { - // Value = 0x000000nn is 0x000000nn LSL 0 - // movi: Op=0, Cmode= 0000; mvni: Op=1, Cmode= 0000 - // bic: Op=1, Cmode= 0001; orr: Op=0, Cmode= 0001 - // Op=x, Cmode=000x - Imm = SplatBits; - OpCmode = 0; - break; - } - if ((SplatBits & ~0xff00) == 0) { - // Value = 0x0000nn00 is 0x000000nn LSL 8 - // movi: Op=0, Cmode= 0010; mvni: Op=1, Cmode= 0010 - // bic: Op=1, Cmode= 0011; orr : Op=0, Cmode= 0011 - // Op=x, Cmode=001x - Imm = SplatBits >> 8; - OpCmode = 0x2; - break; - } - if ((SplatBits & ~0xff0000) == 0) { - // Value = 0x00nn0000 is 0x000000nn LSL 16 - // movi: Op=0, Cmode= 0100; mvni: Op=1, Cmode= 0100 - // bic: Op=1, Cmode= 0101; orr: Op=0, Cmode= 0101 - // Op=x, Cmode=010x - Imm = SplatBits >> 16; - OpCmode = 0x4; - break; - } - if ((SplatBits & ~0xff000000) == 0) { - // Value = 0xnn000000 is 0x000000nn LSL 24 - // movi: Op=0, Cmode= 0110; mvni: Op=1, Cmode= 0110 - // bic: Op=1, Cmode= 0111; orr: Op=0, Cmode= 0111 - // Op=x, Cmode=011x - Imm = SplatBits >> 24; - OpCmode = 0x6; - break; - } - - // Now the MSL immediates. - - // Neon move instr per word, shift ones - if ((SplatBits & ~0xffff) == 0 && - ((SplatBits | SplatUndef) & 0xff) == 0xff) { - // Value = 0x0000nnff is 0x000000nn MSL 8 - // movi: Op=0, Cmode= 1100; mvni: Op=1, Cmode= 1100 - // Op=x, Cmode=1100 - Imm = SplatBits >> 8; - OpCmode = 0xc; - break; - } - if ((SplatBits & ~0xffffff) == 0 && - ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { - // Value = 0x00nnffff is 0x000000nn MSL 16 - // movi: Op=1, Cmode= 1101; mvni: Op=1, Cmode= 1101 - // Op=x, Cmode=1101 - Imm = SplatBits >> 16; - OpCmode = 0xd; - break; - } - // can't handle any other - return false; - } - - case 64: { - if (type != Neon_Mov_Imm) - return false; - // Neon move instr bytemask, where each byte is either 0x00 or 0xff. - // movi Op=1, Cmode=1110. - OpCmode = 0x1e; - uint64_t BitMask = 0xff; - uint64_t Val = 0; - unsigned ImmMask = 1; - Imm = 0; - for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { - if (((SplatBits | SplatUndef) & BitMask) == BitMask) { - Val |= BitMask; - Imm |= ImmMask; - } else if ((SplatBits & BitMask) != 0) { - return false; - } - BitMask <<= 8; - ImmMask <<= 1; - } - SplatBits = Val; - VT = is128Bits ? MVT::v2i64 : MVT::v1i64; - break; - } - } - - return true; -} - -static SDValue PerformANDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - - SelectionDAG &DAG = DCI.DAG; - SDLoc DL(N); - EVT VT = N->getValueType(0); - - // We're looking for an SRA/SHL pair which form an SBFX. - - if (VT != MVT::i32 && VT != MVT::i64) - return SDValue(); - - if (!isa(N->getOperand(1))) - return SDValue(); - - uint64_t TruncMask = N->getConstantOperandVal(1); - if (!isMask_64(TruncMask)) - return SDValue(); - - uint64_t Width = CountPopulation_64(TruncMask); - SDValue Shift = N->getOperand(0); - - if (Shift.getOpcode() != ISD::SRL) - return SDValue(); - - if (!isa(Shift->getOperand(1))) - return SDValue(); - uint64_t LSB = Shift->getConstantOperandVal(1); - - if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits()) - return SDValue(); - - return DAG.getNode(AArch64ISD::UBFX, DL, VT, Shift.getOperand(0), - DAG.getConstant(LSB, MVT::i64), - DAG.getConstant(LSB + Width - 1, MVT::i64)); -} - -/// For a true bitfield insert, the bits getting into that contiguous mask -/// should come from the low part of an existing value: they must be formed from -/// a compatible SHL operation (unless they're already low). This function -/// checks that condition and returns the least-significant bit that's -/// intended. If the operation not a field preparation, -1 is returned. -static int32_t getLSBForBFI(SelectionDAG &DAG, SDLoc DL, EVT VT, - SDValue &MaskedVal, uint64_t Mask) { - if (!isShiftedMask_64(Mask)) - return -1; - - // Now we need to alter MaskedVal so that it is an appropriate input for a BFI - // instruction. BFI will do a left-shift by LSB before applying the mask we've - // spotted, so in general we should pre-emptively "undo" that by making sure - // the incoming bits have had a right-shift applied to them. - // - // This right shift, however, will combine with existing left/right shifts. In - // the simplest case of a completely straight bitfield operation, it will be - // expected to completely cancel out with an existing SHL. More complicated - // cases (e.g. bitfield to bitfield copy) may still need a real shift before - // the BFI. - - uint64_t LSB = countTrailingZeros(Mask); - int64_t ShiftRightRequired = LSB; - if (MaskedVal.getOpcode() == ISD::SHL && - isa(MaskedVal.getOperand(1))) { - ShiftRightRequired -= MaskedVal.getConstantOperandVal(1); - MaskedVal = MaskedVal.getOperand(0); - } else if (MaskedVal.getOpcode() == ISD::SRL && - isa(MaskedVal.getOperand(1))) { - ShiftRightRequired += MaskedVal.getConstantOperandVal(1); - MaskedVal = MaskedVal.getOperand(0); - } - - if (ShiftRightRequired > 0) - MaskedVal = DAG.getNode(ISD::SRL, DL, VT, MaskedVal, - DAG.getConstant(ShiftRightRequired, MVT::i64)); - else if (ShiftRightRequired < 0) { - // We could actually end up with a residual left shift, for example with - // "struc.bitfield = val << 1". - MaskedVal = DAG.getNode(ISD::SHL, DL, VT, MaskedVal, - DAG.getConstant(-ShiftRightRequired, MVT::i64)); - } - - return LSB; -} - -/// Searches from N for an existing AArch64ISD::BFI node, possibly surrounded by -/// a mask and an extension. Returns true if a BFI was found and provides -/// information on its surroundings. -static bool findMaskedBFI(SDValue N, SDValue &BFI, uint64_t &Mask, - bool &Extended) { - Extended = false; - if (N.getOpcode() == ISD::ZERO_EXTEND) { - Extended = true; - N = N.getOperand(0); - } - - if (N.getOpcode() == ISD::AND && isa(N.getOperand(1))) { - Mask = N->getConstantOperandVal(1); - N = N.getOperand(0); - } else { - // Mask is the whole width. - Mask = -1ULL >> (64 - N.getValueType().getSizeInBits()); - } - - if (N.getOpcode() == AArch64ISD::BFI) { - BFI = N; - return true; - } - - return false; -} - -/// Try to combine a subtree (rooted at an OR) into a "masked BFI" node, which -/// is roughly equivalent to (and (BFI ...), mask). This form is used because it -/// can often be further combined with a larger mask. Ultimately, we want mask -/// to be 2^32-1 or 2^64-1 so the AND can be skipped. -static SDValue tryCombineToBFI(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const AArch64Subtarget *Subtarget) { - SelectionDAG &DAG = DCI.DAG; - SDLoc DL(N); - EVT VT = N->getValueType(0); - - assert(N->getOpcode() == ISD::OR && "Unexpected root"); - - // We need the LHS to be (and SOMETHING, MASK). Find out what that mask is or - // abandon the effort. - SDValue LHS = N->getOperand(0); - if (LHS.getOpcode() != ISD::AND) - return SDValue(); - - uint64_t LHSMask; - if (isa(LHS.getOperand(1))) - LHSMask = LHS->getConstantOperandVal(1); - else - return SDValue(); - - // We also need the RHS to be (and SOMETHING, MASK). Find out what that mask - // is or abandon the effort. - SDValue RHS = N->getOperand(1); - if (RHS.getOpcode() != ISD::AND) - return SDValue(); - - uint64_t RHSMask; - if (isa(RHS.getOperand(1))) - RHSMask = RHS->getConstantOperandVal(1); - else - return SDValue(); - - // Can't do anything if the masks are incompatible. - if (LHSMask & RHSMask) - return SDValue(); - - // Now we need one of the masks to be a contiguous field. Without loss of - // generality that should be the RHS one. - SDValue Bitfield = LHS.getOperand(0); - if (getLSBForBFI(DAG, DL, VT, Bitfield, LHSMask) != -1) { - // We know that LHS is a candidate new value, and RHS isn't already a better - // one. - std::swap(LHS, RHS); - std::swap(LHSMask, RHSMask); - } - - // We've done our best to put the right operands in the right places, all we - // can do now is check whether a BFI exists. - Bitfield = RHS.getOperand(0); - int32_t LSB = getLSBForBFI(DAG, DL, VT, Bitfield, RHSMask); - if (LSB == -1) - return SDValue(); - - uint32_t Width = CountPopulation_64(RHSMask); - assert(Width && "Expected non-zero bitfield width"); - - SDValue BFI = DAG.getNode(AArch64ISD::BFI, DL, VT, - LHS.getOperand(0), Bitfield, - DAG.getConstant(LSB, MVT::i64), - DAG.getConstant(Width, MVT::i64)); - - // Mask is trivial - if ((LHSMask | RHSMask) == (-1ULL >> (64 - VT.getSizeInBits()))) - return BFI; - - return DAG.getNode(ISD::AND, DL, VT, BFI, - DAG.getConstant(LHSMask | RHSMask, VT)); -} - -/// Search for the bitwise combining (with careful masks) of a MaskedBFI and its -/// original input. This is surprisingly common because SROA splits things up -/// into i8 chunks, so the originally detected MaskedBFI may actually only act -/// on the low (say) byte of a word. This is then orred into the rest of the -/// word afterwards. -/// -/// Basic input: (or (and OLDFIELD, MASK1), (MaskedBFI MASK2, OLDFIELD, ...)). -/// -/// If MASK1 and MASK2 are compatible, we can fold the whole thing into the -/// MaskedBFI. We can also deal with a certain amount of extend/truncate being -/// involved. -static SDValue tryCombineToLargerBFI(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const AArch64Subtarget *Subtarget) { - SelectionDAG &DAG = DCI.DAG; - SDLoc DL(N); - EVT VT = N->getValueType(0); - - // First job is to hunt for a MaskedBFI on either the left or right. Swap - // operands if it's actually on the right. - SDValue BFI; - SDValue PossExtraMask; - uint64_t ExistingMask = 0; - bool Extended = false; - if (findMaskedBFI(N->getOperand(0), BFI, ExistingMask, Extended)) - PossExtraMask = N->getOperand(1); - else if (findMaskedBFI(N->getOperand(1), BFI, ExistingMask, Extended)) - PossExtraMask = N->getOperand(0); - else - return SDValue(); - - // We can only combine a BFI with another compatible mask. - if (PossExtraMask.getOpcode() != ISD::AND || - !isa(PossExtraMask.getOperand(1))) - return SDValue(); - - uint64_t ExtraMask = PossExtraMask->getConstantOperandVal(1); - - // Masks must be compatible. - if (ExtraMask & ExistingMask) - return SDValue(); - - SDValue OldBFIVal = BFI.getOperand(0); - SDValue NewBFIVal = BFI.getOperand(1); - if (Extended) { - // We skipped a ZERO_EXTEND above, so the input to the MaskedBFIs should be - // 32-bit and we'll be forming a 64-bit MaskedBFI. The MaskedBFI arguments - // need to be made compatible. - assert(VT == MVT::i64 && BFI.getValueType() == MVT::i32 - && "Invalid types for BFI"); - OldBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, OldBFIVal); - NewBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NewBFIVal); - } - - // We need the MaskedBFI to be combined with a mask of the *same* value. - if (PossExtraMask.getOperand(0) != OldBFIVal) - return SDValue(); - - BFI = DAG.getNode(AArch64ISD::BFI, DL, VT, - OldBFIVal, NewBFIVal, - BFI.getOperand(2), BFI.getOperand(3)); - - // If the masking is trivial, we don't need to create it. - if ((ExtraMask | ExistingMask) == (-1ULL >> (64 - VT.getSizeInBits()))) - return BFI; - - return DAG.getNode(ISD::AND, DL, VT, BFI, - DAG.getConstant(ExtraMask | ExistingMask, VT)); -} - -/// An EXTR instruction is made up of two shifts, ORed together. This helper -/// searches for and classifies those shifts. -static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, - bool &FromHi) { - if (N.getOpcode() == ISD::SHL) - FromHi = false; - else if (N.getOpcode() == ISD::SRL) - FromHi = true; - else - return false; - - if (!isa(N.getOperand(1))) - return false; - - ShiftAmount = N->getConstantOperandVal(1); - Src = N->getOperand(0); - return true; -} - -/// EXTR instruction extracts a contiguous chunk of bits from two existing -/// registers viewed as a high/low pair. This function looks for the pattern: -/// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an -/// EXTR. Can't quite be done in TableGen because the two immediates aren't -/// independent. -static SDValue tryCombineToEXTR(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - SelectionDAG &DAG = DCI.DAG; - SDLoc DL(N); - EVT VT = N->getValueType(0); - - assert(N->getOpcode() == ISD::OR && "Unexpected root"); - - if (VT != MVT::i32 && VT != MVT::i64) - return SDValue(); - - SDValue LHS; - uint32_t ShiftLHS = 0; - bool LHSFromHi = 0; - if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi)) - return SDValue(); - - SDValue RHS; - uint32_t ShiftRHS = 0; - bool RHSFromHi = 0; - if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi)) - return SDValue(); - - // If they're both trying to come from the high part of the register, they're - // not really an EXTR. - if (LHSFromHi == RHSFromHi) - return SDValue(); - - if (ShiftLHS + ShiftRHS != VT.getSizeInBits()) - return SDValue(); - - if (LHSFromHi) { - std::swap(LHS, RHS); - std::swap(ShiftLHS, ShiftRHS); - } - - return DAG.getNode(AArch64ISD::EXTR, DL, VT, - LHS, RHS, - DAG.getConstant(ShiftRHS, MVT::i64)); -} - -/// Target-specific dag combine xforms for ISD::OR -static SDValue PerformORCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const AArch64Subtarget *Subtarget) { - - SelectionDAG &DAG = DCI.DAG; - SDLoc DL(N); - EVT VT = N->getValueType(0); - - if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) - return SDValue(); - - // Attempt to recognise bitfield-insert operations. - SDValue Res = tryCombineToBFI(N, DCI, Subtarget); - if (Res.getNode()) - return Res; - - // Attempt to combine an existing MaskedBFI operation into one with a larger - // mask. - Res = tryCombineToLargerBFI(N, DCI, Subtarget); - if (Res.getNode()) - return Res; - - Res = tryCombineToEXTR(N, DCI); - if (Res.getNode()) - return Res; - - if (!Subtarget->hasNEON()) - return SDValue(); - - // Attempt to use vector immediate-form BSL - // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. - - SDValue N0 = N->getOperand(0); - if (N0.getOpcode() != ISD::AND) - return SDValue(); - - SDValue N1 = N->getOperand(1); - if (N1.getOpcode() != ISD::AND) - return SDValue(); - - if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT)) { - APInt SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - BuildVectorSDNode *BVN0 = dyn_cast(N0->getOperand(1)); - APInt SplatBits0; - if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, - HasAnyUndefs) && - !HasAnyUndefs) { - BuildVectorSDNode *BVN1 = dyn_cast(N1->getOperand(1)); - APInt SplatBits1; - if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, - HasAnyUndefs) && !HasAnyUndefs && - SplatBits0.getBitWidth() == SplatBits1.getBitWidth() && - SplatBits0 == ~SplatBits1) { - - return DAG.getNode(ISD::VSELECT, DL, VT, N0->getOperand(1), - N0->getOperand(0), N1->getOperand(0)); - } - } - } - - return SDValue(); -} - -/// Target-specific dag combine xforms for ISD::SRA -static SDValue PerformSRACombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - - SelectionDAG &DAG = DCI.DAG; - SDLoc DL(N); - EVT VT = N->getValueType(0); - - // We're looking for an SRA/SHL pair which form an SBFX. - - if (VT != MVT::i32 && VT != MVT::i64) - return SDValue(); - - if (!isa(N->getOperand(1))) - return SDValue(); - - uint64_t ExtraSignBits = N->getConstantOperandVal(1); - SDValue Shift = N->getOperand(0); - - if (Shift.getOpcode() != ISD::SHL) - return SDValue(); - - if (!isa(Shift->getOperand(1))) - return SDValue(); - - uint64_t BitsOnLeft = Shift->getConstantOperandVal(1); - uint64_t Width = VT.getSizeInBits() - ExtraSignBits; - uint64_t LSB = VT.getSizeInBits() - Width - BitsOnLeft; - - if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits()) - return SDValue(); - - return DAG.getNode(AArch64ISD::SBFX, DL, VT, Shift.getOperand(0), - DAG.getConstant(LSB, MVT::i64), - DAG.getConstant(LSB + Width - 1, MVT::i64)); -} - -/// Check if this is a valid build_vector for the immediate operand of -/// a vector shift operation, where all the elements of the build_vector -/// must have the same constant integer value. -static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { - // Ignore bit_converts. - while (Op.getOpcode() == ISD::BITCAST) - Op = Op.getOperand(0); - BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); - APInt SplatBits, SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, - HasAnyUndefs, ElementBits) || - SplatBitSize > ElementBits) - return false; - Cnt = SplatBits.getSExtValue(); - return true; -} - -/// Check if this is a valid build_vector for the immediate operand of -/// a vector shift left operation. That value must be in the range: -/// 0 <= Value < ElementBits -static bool isVShiftLImm(SDValue Op, EVT VT, int64_t &Cnt) { - assert(VT.isVector() && "vector shift count is not a vector type"); - unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); - if (!getVShiftImm(Op, ElementBits, Cnt)) - return false; - return (Cnt >= 0 && Cnt < ElementBits); -} - -/// Check if this is a valid build_vector for the immediate operand of a -/// vector shift right operation. The value must be in the range: -/// 1 <= Value <= ElementBits -static bool isVShiftRImm(SDValue Op, EVT VT, int64_t &Cnt) { - assert(VT.isVector() && "vector shift count is not a vector type"); - unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); - if (!getVShiftImm(Op, ElementBits, Cnt)) - return false; - return (Cnt >= 1 && Cnt <= ElementBits); -} - -static SDValue GenForSextInreg(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - EVT SrcVT, EVT DestVT, EVT SubRegVT, - const int *Mask, SDValue Src) { - SelectionDAG &DAG = DCI.DAG; - SDValue Bitcast - = DAG.getNode(ISD::BITCAST, SDLoc(N), SrcVT, Src); - SDValue Sext - = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), DestVT, Bitcast); - SDValue ShuffleVec - = DAG.getVectorShuffle(DestVT, SDLoc(N), Sext, DAG.getUNDEF(DestVT), Mask); - SDValue ExtractSubreg - = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N), - SubRegVT, ShuffleVec, - DAG.getTargetConstant(AArch64::sub_64, MVT::i32)), 0); - return ExtractSubreg; -} - -/// Checks for vector shifts and lowers them. -static SDValue PerformShiftCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const AArch64Subtarget *ST) { - SelectionDAG &DAG = DCI.DAG; - EVT VT = N->getValueType(0); - if (N->getOpcode() == ISD::SRA && (VT == MVT::i32 || VT == MVT::i64)) - return PerformSRACombine(N, DCI); - - // We're looking for an SRA/SHL pair to help generating instruction - // sshll v0.8h, v0.8b, #0 - // The instruction STXL is also the alias of this instruction. - // - // For example, for DAG like below, - // v2i32 = sra (v2i32 (shl v2i32, 16)), 16 - // we can transform it into - // v2i32 = EXTRACT_SUBREG - // (v4i32 (suffle_vector - // (v4i32 (sext (v4i16 (bitcast v2i32))), - // undef, (0, 2, u, u)), - // sub_64 - // - // With this transformation we expect to generate "SSHLL + UZIP1" - // Sometimes UZIP1 can be optimized away by combining with other context. - int64_t ShrCnt, ShlCnt; - if (N->getOpcode() == ISD::SRA - && (VT == MVT::v2i32 || VT == MVT::v4i16) - && isVShiftRImm(N->getOperand(1), VT, ShrCnt) - && N->getOperand(0).getOpcode() == ISD::SHL - && isVShiftRImm(N->getOperand(0).getOperand(1), VT, ShlCnt)) { - SDValue Src = N->getOperand(0).getOperand(0); - if (VT == MVT::v2i32 && ShrCnt == 16 && ShlCnt == 16) { - // sext_inreg(v2i32, v2i16) - // We essentially only care the Mask {0, 2, u, u} - int Mask[4] = {0, 2, 4, 6}; - return GenForSextInreg(N, DCI, MVT::v4i16, MVT::v4i32, MVT::v2i32, - Mask, Src); - } - else if (VT == MVT::v2i32 && ShrCnt == 24 && ShlCnt == 24) { - // sext_inreg(v2i16, v2i8) - // We essentially only care the Mask {0, u, 4, u, u, u, u, u, u, u, u, u} - int Mask[8] = {0, 2, 4, 6, 8, 10, 12, 14}; - return GenForSextInreg(N, DCI, MVT::v8i8, MVT::v8i16, MVT::v2i32, - Mask, Src); - } - else if (VT == MVT::v4i16 && ShrCnt == 8 && ShlCnt == 8) { - // sext_inreg(v4i16, v4i8) - // We essentially only care the Mask {0, 2, 4, 6, u, u, u, u, u, u, u, u} - int Mask[8] = {0, 2, 4, 6, 8, 10, 12, 14}; - return GenForSextInreg(N, DCI, MVT::v8i8, MVT::v8i16, MVT::v4i16, - Mask, Src); - } - } - - // Nothing to be done for scalar shifts. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!VT.isVector() || !TLI.isTypeLegal(VT)) - return SDValue(); - - assert(ST->hasNEON() && "unexpected vector shift"); - int64_t Cnt; - - switch (N->getOpcode()) { - default: - llvm_unreachable("unexpected shift opcode"); - - case ISD::SHL: - if (isVShiftLImm(N->getOperand(1), VT, Cnt)) { - SDValue RHS = - DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT, - DAG.getConstant(Cnt, MVT::i32)); - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS); - } - break; - - case ISD::SRA: - case ISD::SRL: - if (isVShiftRImm(N->getOperand(1), VT, Cnt)) { - SDValue RHS = - DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT, - DAG.getConstant(Cnt, MVT::i32)); - return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N->getOperand(0), RHS); - } - break; - } - - return SDValue(); -} - -/// ARM-specific DAG combining for intrinsics. -static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { - unsigned IntNo = cast(N->getOperand(0))->getZExtValue(); - - switch (IntNo) { - default: - // Don't do anything for most intrinsics. - break; - - case Intrinsic::arm_neon_vqshifts: - case Intrinsic::arm_neon_vqshiftu: - EVT VT = N->getOperand(1).getValueType(); - int64_t Cnt; - if (!isVShiftLImm(N->getOperand(2), VT, Cnt)) - break; - unsigned VShiftOpc = (IntNo == Intrinsic::arm_neon_vqshifts) - ? AArch64ISD::NEON_QSHLs - : AArch64ISD::NEON_QSHLu; - return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0), - N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); - } - - return SDValue(); -} - -/// Target-specific DAG combine function for NEON load/store intrinsics -/// to merge base address updates. -static SDValue CombineBaseUpdate(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) - return SDValue(); - - SelectionDAG &DAG = DCI.DAG; - bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID || - N->getOpcode() == ISD::INTRINSIC_W_CHAIN); - unsigned AddrOpIdx = (isIntrinsic ? 2 : 1); - SDValue Addr = N->getOperand(AddrOpIdx); - - // Search for a use of the address operand that is an increment. - for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), - UE = Addr.getNode()->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; - if (User->getOpcode() != ISD::ADD || - UI.getUse().getResNo() != Addr.getResNo()) - continue; - - // Check that the add is independent of the load/store. Otherwise, folding - // it would create a cycle. - if (User->isPredecessorOf(N) || N->isPredecessorOf(User)) - continue; - - // Find the new opcode for the updating load/store. - bool isLoad = true; - bool isLaneOp = false; - unsigned NewOpc = 0; - unsigned NumVecs = 0; - if (isIntrinsic) { - unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); - switch (IntNo) { - default: llvm_unreachable("unexpected intrinsic for Neon base update"); - case Intrinsic::arm_neon_vld1: NewOpc = AArch64ISD::NEON_LD1_UPD; - NumVecs = 1; break; - case Intrinsic::arm_neon_vld2: NewOpc = AArch64ISD::NEON_LD2_UPD; - NumVecs = 2; break; - case Intrinsic::arm_neon_vld3: NewOpc = AArch64ISD::NEON_LD3_UPD; - NumVecs = 3; break; - case Intrinsic::arm_neon_vld4: NewOpc = AArch64ISD::NEON_LD4_UPD; - NumVecs = 4; break; - case Intrinsic::arm_neon_vst1: NewOpc = AArch64ISD::NEON_ST1_UPD; - NumVecs = 1; isLoad = false; break; - case Intrinsic::arm_neon_vst2: NewOpc = AArch64ISD::NEON_ST2_UPD; - NumVecs = 2; isLoad = false; break; - case Intrinsic::arm_neon_vst3: NewOpc = AArch64ISD::NEON_ST3_UPD; - NumVecs = 3; isLoad = false; break; - case Intrinsic::arm_neon_vst4: NewOpc = AArch64ISD::NEON_ST4_UPD; - NumVecs = 4; isLoad = false; break; - case Intrinsic::aarch64_neon_vld1x2: NewOpc = AArch64ISD::NEON_LD1x2_UPD; - NumVecs = 2; break; - case Intrinsic::aarch64_neon_vld1x3: NewOpc = AArch64ISD::NEON_LD1x3_UPD; - NumVecs = 3; break; - case Intrinsic::aarch64_neon_vld1x4: NewOpc = AArch64ISD::NEON_LD1x4_UPD; - NumVecs = 4; break; - case Intrinsic::aarch64_neon_vst1x2: NewOpc = AArch64ISD::NEON_ST1x2_UPD; - NumVecs = 2; isLoad = false; break; - case Intrinsic::aarch64_neon_vst1x3: NewOpc = AArch64ISD::NEON_ST1x3_UPD; - NumVecs = 3; isLoad = false; break; - case Intrinsic::aarch64_neon_vst1x4: NewOpc = AArch64ISD::NEON_ST1x4_UPD; - NumVecs = 4; isLoad = false; break; - case Intrinsic::arm_neon_vld2lane: NewOpc = AArch64ISD::NEON_LD2LN_UPD; - NumVecs = 2; isLaneOp = true; break; - case Intrinsic::arm_neon_vld3lane: NewOpc = AArch64ISD::NEON_LD3LN_UPD; - NumVecs = 3; isLaneOp = true; break; - case Intrinsic::arm_neon_vld4lane: NewOpc = AArch64ISD::NEON_LD4LN_UPD; - NumVecs = 4; isLaneOp = true; break; - case Intrinsic::arm_neon_vst2lane: NewOpc = AArch64ISD::NEON_ST2LN_UPD; - NumVecs = 2; isLoad = false; isLaneOp = true; break; - case Intrinsic::arm_neon_vst3lane: NewOpc = AArch64ISD::NEON_ST3LN_UPD; - NumVecs = 3; isLoad = false; isLaneOp = true; break; - case Intrinsic::arm_neon_vst4lane: NewOpc = AArch64ISD::NEON_ST4LN_UPD; - NumVecs = 4; isLoad = false; isLaneOp = true; break; - } - } else { - isLaneOp = true; - switch (N->getOpcode()) { - default: llvm_unreachable("unexpected opcode for Neon base update"); - case AArch64ISD::NEON_LD2DUP: NewOpc = AArch64ISD::NEON_LD2DUP_UPD; - NumVecs = 2; break; - case AArch64ISD::NEON_LD3DUP: NewOpc = AArch64ISD::NEON_LD3DUP_UPD; - NumVecs = 3; break; - case AArch64ISD::NEON_LD4DUP: NewOpc = AArch64ISD::NEON_LD4DUP_UPD; - NumVecs = 4; break; - } - } - - // Find the size of memory referenced by the load/store. - EVT VecTy; - if (isLoad) - VecTy = N->getValueType(0); - else - VecTy = N->getOperand(AddrOpIdx + 1).getValueType(); - unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; - if (isLaneOp) - NumBytes /= VecTy.getVectorNumElements(); - - // If the increment is a constant, it must match the memory ref size. - SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); - if (ConstantSDNode *CInc = dyn_cast(Inc.getNode())) { - uint32_t IncVal = CInc->getZExtValue(); - if (IncVal != NumBytes) - continue; - Inc = DAG.getTargetConstant(IncVal, MVT::i32); - } - - // Create the new updating load/store node. - EVT Tys[6]; - unsigned NumResultVecs = (isLoad ? NumVecs : 0); - unsigned n; - for (n = 0; n < NumResultVecs; ++n) - Tys[n] = VecTy; - Tys[n++] = MVT::i64; - Tys[n] = MVT::Other; - SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2)); - SmallVector Ops; - Ops.push_back(N->getOperand(0)); // incoming chain - Ops.push_back(N->getOperand(AddrOpIdx)); - Ops.push_back(Inc); - for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) { - Ops.push_back(N->getOperand(i)); - } - MemIntrinsicSDNode *MemInt = cast(N); - SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, - Ops, MemInt->getMemoryVT(), - MemInt->getMemOperand()); - - // Update the uses. - std::vector NewResults; - for (unsigned i = 0; i < NumResultVecs; ++i) { - NewResults.push_back(SDValue(UpdN.getNode(), i)); - } - NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain - DCI.CombineTo(N, NewResults); - DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); - - break; - } - return SDValue(); -} - -/// For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1) -/// intrinsic, and if all the other uses of that intrinsic are also VDUPLANEs. -/// If so, combine them to a vldN-dup operation and return true. -static SDValue CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { - SelectionDAG &DAG = DCI.DAG; - EVT VT = N->getValueType(0); - - // Check if the VDUPLANE operand is a vldN-dup intrinsic. - SDNode *VLD = N->getOperand(0).getNode(); - if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN) - return SDValue(); - unsigned NumVecs = 0; - unsigned NewOpc = 0; - unsigned IntNo = cast(VLD->getOperand(1))->getZExtValue(); - if (IntNo == Intrinsic::arm_neon_vld2lane) { - NumVecs = 2; - NewOpc = AArch64ISD::NEON_LD2DUP; - } else if (IntNo == Intrinsic::arm_neon_vld3lane) { - NumVecs = 3; - NewOpc = AArch64ISD::NEON_LD3DUP; - } else if (IntNo == Intrinsic::arm_neon_vld4lane) { - NumVecs = 4; - NewOpc = AArch64ISD::NEON_LD4DUP; - } else { - return SDValue(); - } - - // First check that all the vldN-lane uses are VDUPLANEs and that the lane - // numbers match the load. - unsigned VLDLaneNo = - cast(VLD->getOperand(NumVecs + 3))->getZExtValue(); - for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); - UI != UE; ++UI) { - // Ignore uses of the chain result. - if (UI.getUse().getResNo() == NumVecs) - continue; - SDNode *User = *UI; - if (User->getOpcode() != AArch64ISD::NEON_VDUPLANE || - VLDLaneNo != cast(User->getOperand(1))->getZExtValue()) - return SDValue(); - } - - // Create the vldN-dup node. - EVT Tys[5]; - unsigned n; - for (n = 0; n < NumVecs; ++n) - Tys[n] = VT; - Tys[n] = MVT::Other; - SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumVecs + 1)); - SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; - MemIntrinsicSDNode *VLDMemInt = cast(VLD); - SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, Ops, - VLDMemInt->getMemoryVT(), - VLDMemInt->getMemOperand()); - - // Update the uses. - for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); - UI != UE; ++UI) { - unsigned ResNo = UI.getUse().getResNo(); - // Ignore uses of the chain result. - if (ResNo == NumVecs) - continue; - SDNode *User = *UI; - DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo)); - } - - // Now the vldN-lane intrinsic is dead except for its chain result. - // Update uses of the chain. - std::vector VLDDupResults; - for (unsigned n = 0; n < NumVecs; ++n) - VLDDupResults.push_back(SDValue(VLDDup.getNode(), n)); - VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs)); - DCI.CombineTo(VLD, VLDDupResults); - - return SDValue(N, 0); -} - -// vselect (v1i1 setcc) -> -// vselect (v1iXX setcc) (XX is the size of the compared operand type) -// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as -// condition. If it can legalize "VSELECT v1i1" correctly, no need to combine -// such VSELECT. -static SDValue PerformVSelectCombine(SDNode *N, SelectionDAG &DAG) { - SDValue N0 = N->getOperand(0); - EVT CCVT = N0.getValueType(); - - if (N0.getOpcode() != ISD::SETCC || CCVT.getVectorNumElements() != 1 || - CCVT.getVectorElementType() != MVT::i1) - return SDValue(); - - EVT ResVT = N->getValueType(0); - EVT CmpVT = N0.getOperand(0).getValueType(); - // Only combine when the result type is of the same size as the compared - // operands. - if (ResVT.getSizeInBits() != CmpVT.getSizeInBits()) - return SDValue(); - - SDValue IfTrue = N->getOperand(1); - SDValue IfFalse = N->getOperand(2); - SDValue SetCC = - DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(), - N0.getOperand(0), N0.getOperand(1), - cast(N0.getOperand(2))->get()); - return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC, - IfTrue, IfFalse); -} - -// sign_extend (extract_vector_elt (v1i1 setcc)) -> -// extract_vector_elt (v1iXX setcc) -// (XX is the size of the compared operand type) -static SDValue PerformSignExtendCombine(SDNode *N, SelectionDAG &DAG) { - SDValue N0 = N->getOperand(0); - SDValue Vec = N0.getOperand(0); - - if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - Vec.getOpcode() != ISD::SETCC) - return SDValue(); - - EVT ResVT = N->getValueType(0); - EVT CmpVT = Vec.getOperand(0).getValueType(); - // Only optimize when the result type is of the same size as the element - // type of the compared operand. - if (ResVT.getSizeInBits() != CmpVT.getVectorElementType().getSizeInBits()) - return SDValue(); - - SDValue Lane = N0.getOperand(1); - SDValue SetCC = - DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(), - Vec.getOperand(0), Vec.getOperand(1), - cast(Vec.getOperand(2))->get()); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ResVT, - SetCC, Lane); -} - -SDValue -AArch64TargetLowering::PerformDAGCombine(SDNode *N, - DAGCombinerInfo &DCI) const { - switch (N->getOpcode()) { - default: break; - case ISD::AND: return PerformANDCombine(N, DCI); - case ISD::OR: return PerformORCombine(N, DCI, getSubtarget()); - case ISD::SHL: - case ISD::SRA: - case ISD::SRL: - return PerformShiftCombine(N, DCI, getSubtarget()); - case ISD::VSELECT: return PerformVSelectCombine(N, DCI.DAG); - case ISD::SIGN_EXTEND: return PerformSignExtendCombine(N, DCI.DAG); - case ISD::INTRINSIC_WO_CHAIN: - return PerformIntrinsicCombine(N, DCI.DAG); - case AArch64ISD::NEON_VDUPLANE: - return CombineVLDDUP(N, DCI); - case AArch64ISD::NEON_LD2DUP: - case AArch64ISD::NEON_LD3DUP: - case AArch64ISD::NEON_LD4DUP: - return CombineBaseUpdate(N, DCI); - case ISD::INTRINSIC_VOID: - case ISD::INTRINSIC_W_CHAIN: - switch (cast(N->getOperand(1))->getZExtValue()) { - case Intrinsic::arm_neon_vld1: - case Intrinsic::arm_neon_vld2: - case Intrinsic::arm_neon_vld3: - case Intrinsic::arm_neon_vld4: - case Intrinsic::arm_neon_vst1: - case Intrinsic::arm_neon_vst2: - case Intrinsic::arm_neon_vst3: - case Intrinsic::arm_neon_vst4: - case Intrinsic::arm_neon_vld2lane: - case Intrinsic::arm_neon_vld3lane: - case Intrinsic::arm_neon_vld4lane: - case Intrinsic::aarch64_neon_vld1x2: - case Intrinsic::aarch64_neon_vld1x3: - case Intrinsic::aarch64_neon_vld1x4: - case Intrinsic::aarch64_neon_vst1x2: - case Intrinsic::aarch64_neon_vst1x3: - case Intrinsic::aarch64_neon_vst1x4: - case Intrinsic::arm_neon_vst2lane: - case Intrinsic::arm_neon_vst3lane: - case Intrinsic::arm_neon_vst4lane: - return CombineBaseUpdate(N, DCI); - default: - break; - } - } - return SDValue(); -} - -bool -AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { - VT = VT.getScalarType(); - - if (!VT.isSimple()) - return false; - - switch (VT.getSimpleVT().SimpleTy) { - case MVT::f16: - case MVT::f32: - case MVT::f64: - return true; - case MVT::f128: - return false; - default: - break; - } - - return false; -} - -bool AArch64TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, - unsigned AddrSpace, - bool *Fast) const { - const AArch64Subtarget *Subtarget = getSubtarget(); - // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus - bool AllowsUnaligned = Subtarget->allowsUnalignedMem(); - - switch (VT.getSimpleVT().SimpleTy) { - default: - return false; - // Scalar types - case MVT::i8: case MVT::i16: - case MVT::i32: case MVT::i64: - case MVT::f32: case MVT::f64: { - // Unaligned access can use (for example) LRDB, LRDH, LDRW - if (AllowsUnaligned) { - if (Fast) - *Fast = true; - return true; - } - return false; - } - // 64-bit vector types - case MVT::v8i8: case MVT::v4i16: - case MVT::v2i32: case MVT::v1i64: - case MVT::v2f32: case MVT::v1f64: - // 128-bit vector types - case MVT::v16i8: case MVT::v8i16: - case MVT::v4i32: case MVT::v2i64: - case MVT::v4f32: case MVT::v2f64: { - // For any little-endian targets with neon, we can support unaligned - // load/store of V registers using ld1/st1. - // A big-endian target may also explicitly support unaligned accesses - if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) { - if (Fast) - *Fast = true; - return true; - } - return false; - } - } -} - -// Check whether a shuffle_vector could be presented as concat_vector. -bool AArch64TargetLowering::isConcatVector(SDValue Op, SelectionDAG &DAG, - SDValue V0, SDValue V1, - const int *Mask, - SDValue &Res) const { - SDLoc DL(Op); - EVT VT = Op.getValueType(); - if (VT.getSizeInBits() != 128) - return false; - if (VT.getVectorElementType() != V0.getValueType().getVectorElementType() || - VT.getVectorElementType() != V1.getValueType().getVectorElementType()) - return false; - - unsigned NumElts = VT.getVectorNumElements(); - bool isContactVector = true; - bool splitV0 = false; - if (V0.getValueType().getSizeInBits() == 128) - splitV0 = true; - - for (int I = 0, E = NumElts / 2; I != E; I++) { - if (Mask[I] != I) { - isContactVector = false; - break; - } - } - - if (isContactVector) { - int offset = NumElts / 2; - for (int I = NumElts / 2, E = NumElts; I != E; I++) { - if (Mask[I] != I + splitV0 * offset) { - isContactVector = false; - break; - } - } - } - - if (isContactVector) { - EVT CastVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), - NumElts / 2); - if (splitV0) { - V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0, - DAG.getConstant(0, MVT::i64)); - } - if (V1.getValueType().getSizeInBits() == 128) { - V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1, - DAG.getConstant(0, MVT::i64)); - } - Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1); - return true; - } - return false; -} - -// Check whether a Build Vector could be presented as Shuffle Vector. -// This Shuffle Vector maybe not legalized, so the length of its operand and -// the length of result may not equal. -bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, - SDValue &V0, SDValue &V1, - int *Mask) const { - SDLoc DL(Op); - EVT VT = Op.getValueType(); - unsigned NumElts = VT.getVectorNumElements(); - unsigned V0NumElts = 0; - - // Check if all elements are extracted from less than 3 vectors. - for (unsigned i = 0; i < NumElts; ++i) { - SDValue Elt = Op.getOperand(i); - if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - Elt.getOperand(0).getValueType().getVectorElementType() != - VT.getVectorElementType()) - return false; - - if (!V0.getNode()) { - V0 = Elt.getOperand(0); - V0NumElts = V0.getValueType().getVectorNumElements(); - } - if (Elt.getOperand(0) == V0) { - Mask[i] = (cast(Elt->getOperand(1))->getZExtValue()); - continue; - } else if (!V1.getNode()) { - V1 = Elt.getOperand(0); - } - if (Elt.getOperand(0) == V1) { - unsigned Lane = cast(Elt->getOperand(1))->getZExtValue(); - Mask[i] = (Lane + V0NumElts); - continue; - } else { - return false; - } - } - return true; -} - -// LowerShiftRightParts - Lower SRL_PARTS and SRA_PARTS, which returns two -/// i64 values and take a 2 x i64 value to shift plus a shift amount. -SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op, - SelectionDAG &DAG) const { - assert(Op.getNumOperands() == 3 && "Not a quad-shift!"); - EVT VT = Op.getValueType(); - unsigned VTBits = VT.getSizeInBits(); - SDLoc dl(Op); - SDValue ShOpLo = Op.getOperand(0); - SDValue ShOpHi = Op.getOperand(1); - SDValue ShAmt = Op.getOperand(2); - unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; - - assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); - SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, - DAG.getConstant(VTBits, MVT::i64), ShAmt); - SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); - SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt, - DAG.getConstant(VTBits, MVT::i64)); - SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); - SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); - SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); - SDValue Tmp3 = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); - - SDValue A64cc; - SDValue CmpOp = getSelectableIntSetCC(ExtraShAmt, - DAG.getConstant(0, MVT::i64), - ISD::SETGE, A64cc, - DAG, dl); - - SDValue Hi = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp, - DAG.getConstant(0, Tmp3.getValueType()), Tmp3, - A64cc); - SDValue Lo = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp, - TrueVal, FalseVal, A64cc); - - SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, dl); -} - -/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two -/// i64 values and take a 2 x i64 value to shift plus a shift amount. -SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op, - SelectionDAG &DAG) const { - assert(Op.getNumOperands() == 3 && "Not a quad-shift!"); - EVT VT = Op.getValueType(); - unsigned VTBits = VT.getSizeInBits(); - SDLoc dl(Op); - SDValue ShOpLo = Op.getOperand(0); - SDValue ShOpHi = Op.getOperand(1); - SDValue ShAmt = Op.getOperand(2); - - assert(Op.getOpcode() == ISD::SHL_PARTS); - SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, - DAG.getConstant(VTBits, MVT::i64), ShAmt); - SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); - SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt, - DAG.getConstant(VTBits, MVT::i64)); - SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); - SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); - SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); - SDValue Tmp4 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); - - SDValue A64cc; - SDValue CmpOp = getSelectableIntSetCC(ExtraShAmt, - DAG.getConstant(0, MVT::i64), - ISD::SETGE, A64cc, - DAG, dl); - - SDValue Lo = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp, - DAG.getConstant(0, Tmp4.getValueType()), Tmp4, - A64cc); - SDValue Hi = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp, - Tmp3, FalseVal, A64cc); - - SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, dl); -} - -// If this is a case we can't handle, return null and let the default -// expansion code take care of it. -SDValue -AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, - const AArch64Subtarget *ST) const { - - BuildVectorSDNode *BVN = cast(Op.getNode()); - SDLoc DL(Op); - EVT VT = Op.getValueType(); - - APInt SplatBits, SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - - unsigned UseNeonMov = VT.getSizeInBits() >= 64; - - // Note we favor lowering MOVI over MVNI. - // This has implications on the definition of patterns in TableGen to select - // BIC immediate instructions but not ORR immediate instructions. - // If this lowering order is changed, TableGen patterns for BIC immediate and - // ORR immediate instructions have to be updated. - if (UseNeonMov && - BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { - if (SplatBitSize <= 64) { - // First attempt to use vector immediate-form MOVI - EVT NeonMovVT; - unsigned Imm = 0; - unsigned OpCmode = 0; - - if (isNeonModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), - SplatBitSize, DAG, VT.is128BitVector(), - Neon_Mov_Imm, NeonMovVT, Imm, OpCmode)) { - SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32); - SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32); - - if (ImmVal.getNode() && OpCmodeVal.getNode()) { - SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MOVIMM, DL, NeonMovVT, - ImmVal, OpCmodeVal); - return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov); - } - } - - // Then attempt to use vector immediate-form MVNI - uint64_t NegatedImm = (~SplatBits).getZExtValue(); - if (isNeonModifiedImm(NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, - DAG, VT.is128BitVector(), Neon_Mvn_Imm, NeonMovVT, - Imm, OpCmode)) { - SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32); - SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32); - if (ImmVal.getNode() && OpCmodeVal.getNode()) { - SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MVNIMM, DL, NeonMovVT, - ImmVal, OpCmodeVal); - return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov); - } - } - - // Attempt to use vector immediate-form FMOV - if (((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) || - (VT == MVT::v2f64 && SplatBitSize == 64)) { - APFloat RealVal( - SplatBitSize == 32 ? APFloat::IEEEsingle : APFloat::IEEEdouble, - SplatBits); - uint32_t ImmVal; - if (A64Imms::isFPImm(RealVal, ImmVal)) { - SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32); - return DAG.getNode(AArch64ISD::NEON_FMOVIMM, DL, VT, Val); - } - } - } - } - - unsigned NumElts = VT.getVectorNumElements(); - bool isOnlyLowElement = true; - bool usesOnlyOneValue = true; - bool hasDominantValue = false; - bool isConstant = true; - - // Map of the number of times a particular SDValue appears in the - // element list. - DenseMap ValueCounts; - SDValue Value; - for (unsigned i = 0; i < NumElts; ++i) { - SDValue V = Op.getOperand(i); - if (V.getOpcode() == ISD::UNDEF) - continue; - if (i > 0) - isOnlyLowElement = false; - if (!isa(V) && !isa(V)) - isConstant = false; - - ValueCounts.insert(std::make_pair(V, 0)); - unsigned &Count = ValueCounts[V]; - - // Is this value dominant? (takes up more than half of the lanes) - if (++Count > (NumElts / 2)) { - hasDominantValue = true; - Value = V; - } - } - if (ValueCounts.size() != 1) - usesOnlyOneValue = false; - if (!Value.getNode() && ValueCounts.size() > 0) - Value = ValueCounts.begin()->first; - - if (ValueCounts.size() == 0) - return DAG.getUNDEF(VT); - - if (isOnlyLowElement) - return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value); - - unsigned EltSize = VT.getVectorElementType().getSizeInBits(); - if (hasDominantValue && EltSize <= 64) { - // Use VDUP for non-constant splats. - if (!isConstant) { - SDValue N; - - // If we are DUPing a value that comes directly from a vector, we could - // just use DUPLANE. We can only do this if the lane being extracted - // is at a constant index, as the DUP from lane instructions only have - // constant-index forms. - // - // If there is a TRUNCATE between EXTRACT_VECTOR_ELT and DUP, we can - // remove TRUNCATE for DUPLANE by apdating the source vector to - // appropriate vector type and lane index. - // - // FIXME: for now we have v1i8, v1i16, v1i32 legal vector types, if they - // are not legal any more, no need to check the type size in bits should - // be large than 64. - SDValue V = Value; - if (Value->getOpcode() == ISD::TRUNCATE) - V = Value->getOperand(0); - if (V->getOpcode() == ISD::EXTRACT_VECTOR_ELT && - isa(V->getOperand(1)) && - V->getOperand(0).getValueType().getSizeInBits() >= 64) { - - // If the element size of source vector is larger than DUPLANE - // element size, we can do transformation by, - // 1) bitcasting source register to smaller element vector - // 2) mutiplying the lane index by SrcEltSize/ResEltSize - // For example, we can lower - // "v8i16 vdup_lane(v4i32, 1)" - // to be - // "v8i16 vdup_lane(v8i16 bitcast(v4i32), 2)". - SDValue SrcVec = V->getOperand(0); - unsigned SrcEltSize = - SrcVec.getValueType().getVectorElementType().getSizeInBits(); - unsigned ResEltSize = VT.getVectorElementType().getSizeInBits(); - if (SrcEltSize > ResEltSize) { - assert((SrcEltSize % ResEltSize == 0) && "Invalid element size"); - SDValue BitCast; - unsigned SrcSize = SrcVec.getValueType().getSizeInBits(); - unsigned ResSize = VT.getSizeInBits(); - - if (SrcSize > ResSize) { - assert((SrcSize % ResSize == 0) && "Invalid vector size"); - EVT CastVT = - EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), - SrcSize / ResEltSize); - BitCast = DAG.getNode(ISD::BITCAST, DL, CastVT, SrcVec); - } else { - assert((SrcSize == ResSize) && "Invalid vector size of source vec"); - BitCast = DAG.getNode(ISD::BITCAST, DL, VT, SrcVec); - } - - unsigned LaneIdx = V->getConstantOperandVal(1); - SDValue Lane = - DAG.getConstant((SrcEltSize / ResEltSize) * LaneIdx, MVT::i64); - N = DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, BitCast, Lane); - } else { - assert((SrcEltSize == ResEltSize) && - "Invalid element size of source vec"); - N = DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, V->getOperand(0), - V->getOperand(1)); - } - } else - N = DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value); - - if (!usesOnlyOneValue) { - // The dominant value was splatted as 'N', but we now have to insert - // all differing elements. - for (unsigned I = 0; I < NumElts; ++I) { - if (Op.getOperand(I) == Value) - continue; - SmallVector Ops; - Ops.push_back(N); - Ops.push_back(Op.getOperand(I)); - Ops.push_back(DAG.getConstant(I, MVT::i64)); - N = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Ops); - } - } - return N; - } - if (usesOnlyOneValue && isConstant) { - return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value); - } - } - // If all elements are constants and the case above didn't get hit, fall back - // to the default expansion, which will generate a load from the constant - // pool. - if (isConstant) - return SDValue(); - - // Try to lower this in lowering ShuffleVector way. - SDValue V0, V1; - int Mask[16]; - if (isKnownShuffleVector(Op, DAG, V0, V1, Mask)) { - unsigned V0NumElts = V0.getValueType().getVectorNumElements(); - if (!V1.getNode() && V0NumElts == NumElts * 2) { - V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0, - DAG.getConstant(NumElts, MVT::i64)); - V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0, - DAG.getConstant(0, MVT::i64)); - V0NumElts = V0.getValueType().getVectorNumElements(); - } - - if (V1.getNode() && NumElts == V0NumElts && - V0NumElts == V1.getValueType().getVectorNumElements()) { - SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask); - if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE) - return Shuffle; - else - return LowerVECTOR_SHUFFLE(Shuffle, DAG); - } else { - SDValue Res; - if (isConcatVector(Op, DAG, V0, V1, Mask, Res)) - return Res; - } - } - - // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we - // know the default expansion would otherwise fall back on something even - // worse. For a vector with one or two non-undef values, that's - // scalar_to_vector for the elements followed by a shuffle (provided the - // shuffle is valid for the target) and materialization element by element - // on the stack followed by a load for everything else. - if (!isConstant && !usesOnlyOneValue) { - SDValue Vec = DAG.getUNDEF(VT); - for (unsigned i = 0 ; i < NumElts; ++i) { - SDValue V = Op.getOperand(i); - if (V.getOpcode() == ISD::UNDEF) - continue; - SDValue LaneIdx = DAG.getConstant(i, MVT::i64); - Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, LaneIdx); - } - return Vec; - } - return SDValue(); -} - -/// isREVMask - Check if a vector shuffle corresponds to a REV -/// instruction with the specified blocksize. (The order of the elements -/// within each block of the vector is reversed.) -static bool isREVMask(ArrayRef M, EVT VT, unsigned BlockSize) { - assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && - "Only possible block sizes for REV are: 16, 32, 64"); - - unsigned EltSz = VT.getVectorElementType().getSizeInBits(); - if (EltSz == 64) - return false; - - unsigned NumElts = VT.getVectorNumElements(); - unsigned BlockElts = M[0] + 1; - // If the first shuffle index is UNDEF, be optimistic. - if (M[0] < 0) - BlockElts = BlockSize / EltSz; - - if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) - return false; - - for (unsigned i = 0; i < NumElts; ++i) { - if (M[i] < 0) - continue; // ignore UNDEF indices - if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts)) - return false; - } - - return true; -} - -// isPermuteMask - Check whether the vector shuffle matches to UZP, ZIP and -// TRN instruction. -static unsigned isPermuteMask(ArrayRef M, EVT VT, bool isV2undef) { - unsigned NumElts = VT.getVectorNumElements(); - if (NumElts < 4) - return 0; - - bool ismatch = true; - - // Check UZP1 - for (unsigned i = 0; i < NumElts; ++i) { - unsigned answer = i * 2; - if (isV2undef && answer >= NumElts) - answer -= NumElts; - if (M[i] != -1 && (unsigned)M[i] != answer) { - ismatch = false; - break; - } - } - if (ismatch) - return AArch64ISD::NEON_UZP1; - - // Check UZP2 - ismatch = true; - for (unsigned i = 0; i < NumElts; ++i) { - unsigned answer = i * 2 + 1; - if (isV2undef && answer >= NumElts) - answer -= NumElts; - if (M[i] != -1 && (unsigned)M[i] != answer) { - ismatch = false; - break; - } - } - if (ismatch) - return AArch64ISD::NEON_UZP2; - - // Check ZIP1 - ismatch = true; - for (unsigned i = 0; i < NumElts; ++i) { - unsigned answer = i / 2 + NumElts * (i % 2); - if (isV2undef && answer >= NumElts) - answer -= NumElts; - if (M[i] != -1 && (unsigned)M[i] != answer) { - ismatch = false; - break; - } - } - if (ismatch) - return AArch64ISD::NEON_ZIP1; - - // Check ZIP2 - ismatch = true; - for (unsigned i = 0; i < NumElts; ++i) { - unsigned answer = (NumElts + i) / 2 + NumElts * (i % 2); - if (isV2undef && answer >= NumElts) - answer -= NumElts; - if (M[i] != -1 && (unsigned)M[i] != answer) { - ismatch = false; - break; - } - } - if (ismatch) - return AArch64ISD::NEON_ZIP2; - - // Check TRN1 - ismatch = true; - for (unsigned i = 0; i < NumElts; ++i) { - unsigned answer = i + (NumElts - 1) * (i % 2); - if (isV2undef && answer >= NumElts) - answer -= NumElts; - if (M[i] != -1 && (unsigned)M[i] != answer) { - ismatch = false; - break; - } - } - if (ismatch) - return AArch64ISD::NEON_TRN1; - - // Check TRN2 - ismatch = true; - for (unsigned i = 0; i < NumElts; ++i) { - unsigned answer = 1 + i + (NumElts - 1) * (i % 2); - if (isV2undef && answer >= NumElts) - answer -= NumElts; - if (M[i] != -1 && (unsigned)M[i] != answer) { - ismatch = false; - break; - } - } - if (ismatch) - return AArch64ISD::NEON_TRN2; - - return 0; -} - -SDValue -AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, - SelectionDAG &DAG) const { - SDValue V1 = Op.getOperand(0); - SDValue V2 = Op.getOperand(1); - SDLoc dl(Op); - EVT VT = Op.getValueType(); - ShuffleVectorSDNode *SVN = cast(Op.getNode()); - - // Convert shuffles that are directly supported on NEON to target-specific - // DAG nodes, instead of keeping them as shuffles and matching them again - // during code selection. This is more efficient and avoids the possibility - // of inconsistencies between legalization and selection. - ArrayRef ShuffleMask = SVN->getMask(); - - unsigned EltSize = VT.getVectorElementType().getSizeInBits(); - if (EltSize > 64) - return SDValue(); - - if (isREVMask(ShuffleMask, VT, 64)) - return DAG.getNode(AArch64ISD::NEON_REV64, dl, VT, V1); - if (isREVMask(ShuffleMask, VT, 32)) - return DAG.getNode(AArch64ISD::NEON_REV32, dl, VT, V1); - if (isREVMask(ShuffleMask, VT, 16)) - return DAG.getNode(AArch64ISD::NEON_REV16, dl, VT, V1); - - unsigned ISDNo; - if (V2.getOpcode() == ISD::UNDEF) - ISDNo = isPermuteMask(ShuffleMask, VT, true); - else - ISDNo = isPermuteMask(ShuffleMask, VT, false); - - if (ISDNo) { - if (V2.getOpcode() == ISD::UNDEF) - return DAG.getNode(ISDNo, dl, VT, V1, V1); - else - return DAG.getNode(ISDNo, dl, VT, V1, V2); - } - - SDValue Res; - if (isConcatVector(Op, DAG, V1, V2, &ShuffleMask[0], Res)) - return Res; - - // If the element of shuffle mask are all the same constant, we can - // transform it into either NEON_VDUP or NEON_VDUPLANE - if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { - int Lane = SVN->getSplatIndex(); - // If this is undef splat, generate it via "just" vdup, if possible. - if (Lane == -1) Lane = 0; - - // Test if V1 is a SCALAR_TO_VECTOR. - if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { - return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, V1.getOperand(0)); - } - // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR. - if (V1.getOpcode() == ISD::BUILD_VECTOR) { - bool IsScalarToVector = true; - for (unsigned i = 0, e = V1.getNumOperands(); i != e; ++i) - if (V1.getOperand(i).getOpcode() != ISD::UNDEF && - i != (unsigned)Lane) { - IsScalarToVector = false; - break; - } - if (IsScalarToVector) - return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, - V1.getOperand(Lane)); - } - - // Test if V1 is a EXTRACT_SUBVECTOR. - if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) { - int ExtLane = cast(V1.getOperand(1))->getZExtValue(); - return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1.getOperand(0), - DAG.getConstant(Lane + ExtLane, MVT::i64)); - } - // Test if V1 is a CONCAT_VECTORS. - if (V1.getOpcode() == ISD::CONCAT_VECTORS && - V1.getOperand(1).getOpcode() == ISD::UNDEF) { - SDValue Op0 = V1.getOperand(0); - assert((unsigned)Lane < Op0.getValueType().getVectorNumElements() && - "Invalid vector lane access"); - return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, Op0, - DAG.getConstant(Lane, MVT::i64)); - } - - return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1, - DAG.getConstant(Lane, MVT::i64)); - } - - int Length = ShuffleMask.size(); - int V1EltNum = V1.getValueType().getVectorNumElements(); - - // If the number of v1 elements is the same as the number of shuffle mask - // element and the shuffle masks are sequential values, we can transform - // it into NEON_VEXTRACT. - if (V1EltNum == Length) { - // Check if the shuffle mask is sequential. - int SkipUndef = 0; - while (ShuffleMask[SkipUndef] == -1) { - SkipUndef++; - } - int CurMask = ShuffleMask[SkipUndef]; - if (CurMask >= SkipUndef) { - bool IsSequential = true; - for (int I = SkipUndef; I < Length; ++I) { - if (ShuffleMask[I] != -1 && ShuffleMask[I] != CurMask) { - IsSequential = false; - break; - } - CurMask++; - } - if (IsSequential) { - assert((EltSize % 8 == 0) && "Bitsize of vector element is incorrect"); - unsigned VecSize = EltSize * V1EltNum; - unsigned Index = (EltSize / 8) * (ShuffleMask[SkipUndef] - SkipUndef); - if (VecSize == 64 || VecSize == 128) - return DAG.getNode(AArch64ISD::NEON_VEXTRACT, dl, VT, V1, V2, - DAG.getConstant(Index, MVT::i64)); - } - } - } - - // For shuffle mask like "0, 1, 2, 3, 4, 5, 13, 7", try to generate insert - // by element from V2 to V1 . - // If shuffle mask is like "0, 1, 10, 11, 12, 13, 14, 15", V2 would be a - // better choice to be inserted than V1 as less insert needed, so we count - // element to be inserted for both V1 and V2, and select less one as insert - // target. - - // Collect elements need to be inserted and their index. - SmallVector NV1Elt; - SmallVector N1Index; - SmallVector NV2Elt; - SmallVector N2Index; - for (int I = 0; I != Length; ++I) { - if (ShuffleMask[I] != I) { - NV1Elt.push_back(ShuffleMask[I]); - N1Index.push_back(I); - } - } - for (int I = 0; I != Length; ++I) { - if (ShuffleMask[I] != (I + V1EltNum)) { - NV2Elt.push_back(ShuffleMask[I]); - N2Index.push_back(I); - } - } - - // Decide which to be inserted. If all lanes mismatch, neither V1 nor V2 - // will be inserted. - SDValue InsV = V1; - SmallVector InsMasks = NV1Elt; - SmallVector InsIndex = N1Index; - if ((int)NV1Elt.size() != Length || (int)NV2Elt.size() != Length) { - if (NV1Elt.size() > NV2Elt.size()) { - InsV = V2; - InsMasks = NV2Elt; - InsIndex = N2Index; - } - } else { - InsV = DAG.getNode(ISD::UNDEF, dl, VT); - } - - for (int I = 0, E = InsMasks.size(); I != E; ++I) { - SDValue ExtV = V1; - int Mask = InsMasks[I]; - if (Mask >= V1EltNum) { - ExtV = V2; - Mask -= V1EltNum; - } - // Any value type smaller than i32 is illegal in AArch64, and this lower - // function is called after legalize pass, so we need to legalize - // the result here. - EVT EltVT; - if (VT.getVectorElementType().isFloatingPoint()) - EltVT = (EltSize == 64) ? MVT::f64 : MVT::f32; - else - EltVT = (EltSize == 64) ? MVT::i64 : MVT::i32; - - if (Mask >= 0) { - ExtV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV, - DAG.getConstant(Mask, MVT::i64)); - InsV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, ExtV, - DAG.getConstant(InsIndex[I], MVT::i64)); - } - } - return InsV; -} - -AArch64TargetLowering::ConstraintType -AArch64TargetLowering::getConstraintType(const std::string &Constraint) const { - if (Constraint.size() == 1) { - switch (Constraint[0]) { - default: break; - case 'w': // An FP/SIMD vector register - return C_RegisterClass; - case 'I': // Constant that can be used with an ADD instruction - case 'J': // Constant that can be used with a SUB instruction - case 'K': // Constant that can be used with a 32-bit logical instruction - case 'L': // Constant that can be used with a 64-bit logical instruction - case 'M': // Constant that can be used as a 32-bit MOV immediate - case 'N': // Constant that can be used as a 64-bit MOV immediate - case 'Y': // Floating point constant zero - case 'Z': // Integer constant zero - return C_Other; - case 'Q': // A memory reference with base register and no offset - return C_Memory; - case 'S': // A symbolic address - return C_Other; - } - } - - // FIXME: Ump, Utf, Usa, Ush - // Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes, - // whatever they may be - // Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be - // Usa: An absolute symbolic address - // Ush: The high part (bits 32:12) of a pc-relative symbolic address - assert(Constraint != "Ump" && Constraint != "Utf" && Constraint != "Usa" - && Constraint != "Ush" && "Unimplemented constraints"); - - return TargetLowering::getConstraintType(Constraint); -} - -TargetLowering::ConstraintWeight -AArch64TargetLowering::getSingleConstraintMatchWeight(AsmOperandInfo &Info, - const char *Constraint) const { - - llvm_unreachable("Constraint weight unimplemented"); -} - -void -AArch64TargetLowering::LowerAsmOperandForConstraint(SDValue Op, - std::string &Constraint, - std::vector &Ops, - SelectionDAG &DAG) const { - SDValue Result; - - // Only length 1 constraints are C_Other. - if (Constraint.size() != 1) return; - - // Only C_Other constraints get lowered like this. That means constants for us - // so return early if there's no hope the constraint can be lowered. - - switch(Constraint[0]) { - default: break; - case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'Z': { - ConstantSDNode *C = dyn_cast(Op); - if (!C) - return; - - uint64_t CVal = C->getZExtValue(); - uint32_t Bits; - - switch (Constraint[0]) { - default: - // FIXME: 'M' and 'N' are MOV pseudo-insts -- unsupported in assembly. 'J' - // is a peculiarly useless SUB constraint. - llvm_unreachable("Unimplemented C_Other constraint"); - case 'I': - if (CVal <= 0xfff) - break; - return; - case 'K': - if (A64Imms::isLogicalImm(32, CVal, Bits)) - break; - return; - case 'L': - if (A64Imms::isLogicalImm(64, CVal, Bits)) - break; - return; - case 'Z': - if (CVal == 0) - break; - return; - } - - Result = DAG.getTargetConstant(CVal, Op.getValueType()); - break; - } - case 'S': { - // An absolute symbolic address or label reference. - if (const GlobalAddressSDNode *GA = dyn_cast(Op)) { - Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op), - GA->getValueType(0)); - } else if (const BlockAddressSDNode *BA - = dyn_cast(Op)) { - Result = DAG.getTargetBlockAddress(BA->getBlockAddress(), - BA->getValueType(0)); - } else if (const ExternalSymbolSDNode *ES - = dyn_cast(Op)) { - Result = DAG.getTargetExternalSymbol(ES->getSymbol(), - ES->getValueType(0)); - } else - return; - break; - } - case 'Y': - if (const ConstantFPSDNode *CFP = dyn_cast(Op)) { - if (CFP->isExactlyValue(0.0)) { - Result = DAG.getTargetConstantFP(0.0, CFP->getValueType(0)); - break; - } - } - return; - } - - if (Result.getNode()) { - Ops.push_back(Result); - return; - } - - // It's an unknown constraint for us. Let generic code have a go. - TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); -} - -std::pair -AArch64TargetLowering::getRegForInlineAsmConstraint( - const std::string &Constraint, - MVT VT) const { - if (Constraint.size() == 1) { - switch (Constraint[0]) { - case 'r': - if (VT.getSizeInBits() <= 32) - return std::make_pair(0U, &AArch64::GPR32RegClass); - else if (VT == MVT::i64) - return std::make_pair(0U, &AArch64::GPR64RegClass); - break; - case 'w': - if (VT == MVT::f16) - return std::make_pair(0U, &AArch64::FPR16RegClass); - else if (VT == MVT::f32) - return std::make_pair(0U, &AArch64::FPR32RegClass); - else if (VT.getSizeInBits() == 64) - return std::make_pair(0U, &AArch64::FPR64RegClass); - else if (VT.getSizeInBits() == 128) - return std::make_pair(0U, &AArch64::FPR128RegClass); - break; - } - } - - // Use the default implementation in TargetLowering to convert the register - // constraint into a member of a register class. - return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); -} - -/// Represent NEON load and store intrinsics as MemIntrinsicNodes. -/// The associated MachineMemOperands record the alignment specified -/// in the intrinsic calls. -bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, - unsigned Intrinsic) const { - switch (Intrinsic) { - case Intrinsic::arm_neon_vld1: - case Intrinsic::arm_neon_vld2: - case Intrinsic::arm_neon_vld3: - case Intrinsic::arm_neon_vld4: - case Intrinsic::aarch64_neon_vld1x2: - case Intrinsic::aarch64_neon_vld1x3: - case Intrinsic::aarch64_neon_vld1x4: - case Intrinsic::arm_neon_vld2lane: - case Intrinsic::arm_neon_vld3lane: - case Intrinsic::arm_neon_vld4lane: { - Info.opc = ISD::INTRINSIC_W_CHAIN; - // Conservatively set memVT to the entire set of vectors loaded. - uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8; - Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); - Info.ptrVal = I.getArgOperand(0); - Info.offset = 0; - Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); - Info.align = cast(AlignArg)->getZExtValue(); - Info.vol = false; // volatile loads with NEON intrinsics not supported - Info.readMem = true; - Info.writeMem = false; - return true; - } - case Intrinsic::arm_neon_vst1: - case Intrinsic::arm_neon_vst2: - case Intrinsic::arm_neon_vst3: - case Intrinsic::arm_neon_vst4: - case Intrinsic::aarch64_neon_vst1x2: - case Intrinsic::aarch64_neon_vst1x3: - case Intrinsic::aarch64_neon_vst1x4: - case Intrinsic::arm_neon_vst2lane: - case Intrinsic::arm_neon_vst3lane: - case Intrinsic::arm_neon_vst4lane: { - Info.opc = ISD::INTRINSIC_VOID; - // Conservatively set memVT to the entire set of vectors stored. - unsigned NumElts = 0; - for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { - Type *ArgTy = I.getArgOperand(ArgI)->getType(); - if (!ArgTy->isVectorTy()) - break; - NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8; - } - Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); - Info.ptrVal = I.getArgOperand(0); - Info.offset = 0; - Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); - Info.align = cast(AlignArg)->getZExtValue(); - Info.vol = false; // volatile stores with NEON intrinsics not supported - Info.readMem = false; - Info.writeMem = true; - return true; - } - default: - break; - } - - return false; -} - -// Truncations from 64-bit GPR to 32-bit GPR is free. -bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { - if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) - return false; - unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); - unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); - if (NumBits1 <= NumBits2) - return false; - return true; -} - -bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { - if (!VT1.isInteger() || !VT2.isInteger()) - return false; - unsigned NumBits1 = VT1.getSizeInBits(); - unsigned NumBits2 = VT2.getSizeInBits(); - if (NumBits1 <= NumBits2) - return false; - return true; -} - -// All 32-bit GPR operations implicitly zero the high-half of the corresponding -// 64-bit GPR. -bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { - if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) - return false; - unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); - unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); - if (NumBits1 == 32 && NumBits2 == 64) - return true; - return false; -} - -bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { - if (!VT1.isInteger() || !VT2.isInteger()) - return false; - unsigned NumBits1 = VT1.getSizeInBits(); - unsigned NumBits2 = VT2.getSizeInBits(); - if (NumBits1 == 32 && NumBits2 == 64) - return true; - return false; -} - -bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { - EVT VT1 = Val.getValueType(); - if (isZExtFree(VT1, VT2)) { - return true; - } - - if (Val.getOpcode() != ISD::LOAD) - return false; - - // 8-, 16-, and 32-bit integer loads all implicitly zero-extend. - return (VT1.isSimple() && VT1.isInteger() && VT2.isSimple() && - VT2.isInteger() && VT1.getSizeInBits() <= 32); -} - -// isLegalAddressingMode - Return true if the addressing mode represented -/// by AM is legal for this target, for a load/store of the specified type. -bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { - // AArch64 has five basic addressing modes: - // reg - // reg + 9-bit signed offset - // reg + SIZE_IN_BYTES * 12-bit unsigned offset - // reg1 + reg2 - // reg + SIZE_IN_BYTES * reg - - // No global is ever allowed as a base. - if (AM.BaseGV) - return false; - - // No reg+reg+imm addressing. - if (AM.HasBaseReg && AM.BaseOffs && AM.Scale) - return false; - - // check reg + imm case: - // i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12 - uint64_t NumBytes = 0; - if (Ty->isSized()) { - uint64_t NumBits = getDataLayout()->getTypeSizeInBits(Ty); - NumBytes = NumBits / 8; - if (!isPowerOf2_64(NumBits)) - NumBytes = 0; - } - - if (!AM.Scale) { - int64_t Offset = AM.BaseOffs; - - // 9-bit signed offset - if (Offset >= -(1LL << 9) && Offset <= (1LL << 9) - 1) - return true; - - // 12-bit unsigned offset - unsigned shift = Log2_64(NumBytes); - if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 && - // Must be a multiple of NumBytes (NumBytes is a power of 2) - (Offset >> shift) << shift == Offset) - return true; - return false; - } - if (!AM.Scale || AM.Scale == 1 || - (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes)) - return true; - return false; -} - -int AArch64TargetLowering::getScalingFactorCost(const AddrMode &AM, - Type *Ty) const { - // Scaling factors are not free at all. - // Operands | Rt Latency - // ------------------------------------------- - // Rt, [Xn, Xm] | 4 - // ------------------------------------------- - // Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5 - // Rt, [Xn, Wm, #imm] | - if (isLegalAddressingMode(AM, Ty)) - // Scale represents reg2 * scale, thus account for 1 if - // it is not equal to 0 or 1. - return AM.Scale != 0 && AM.Scale != 1; - return -1; -} - -/// getMaximalGlobalOffset - Returns the maximal possible offset which can -/// be used for loads / stores from the global. -unsigned AArch64TargetLowering::getMaximalGlobalOffset() const { - return 4095; -} - diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h deleted file mode 100644 index 070db94808f0..000000000000 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ /dev/null @@ -1,410 +0,0 @@ -//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interfaces that AArch64 uses to lower LLVM code into a -// selection DAG. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_AARCH64_ISELLOWERING_H -#define LLVM_TARGET_AARCH64_ISELLOWERING_H - -#include "Utils/AArch64BaseInfo.h" -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/Target/TargetLowering.h" - -namespace llvm { -namespace AArch64ISD { - enum NodeType { - // Start the numbering from where ISD NodeType finishes. - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - // This is a conditional branch which also notes the flag needed - // (eq/sgt/...). A64 puts this information on the branches rather than - // compares as LLVM does. - BR_CC, - - // A node to be selected to an actual call operation: either BL or BLR in - // the absence of tail calls. - Call, - - // Indicates a floating-point immediate which fits into the format required - // by the FMOV instructions. First (and only) operand is the 8-bit encoded - // value of that immediate. - FPMOV, - - // Corresponds directly to an EXTR instruction. Operands are an LHS an RHS - // and an LSB. - EXTR, - - // Wraps a load from the GOT, which should always be performed with a 64-bit - // load instruction. This prevents the DAG combiner folding a truncate to - // form a smaller memory access. - GOTLoad, - - // Performs a bitfield insert. Arguments are: the value being inserted into; - // the value being inserted; least significant bit changed; width of the - // field. - BFI, - - // Simply a convenient node inserted during ISelLowering to represent - // procedure return. Will almost certainly be selected to "RET". - Ret, - - /// Extracts a field of contiguous bits from the source and sign extends - /// them into a single register. Arguments are: source; immr; imms. Note - /// these are pre-encoded since DAG matching can't cope with combining LSB - /// and Width into these values itself. - SBFX, - - /// This is an A64-ification of the standard LLVM SELECT_CC operation. The - /// main difference is that it only has the values and an A64 condition, - /// which will be produced by a setcc instruction. - SELECT_CC, - - /// This serves most of the functions of the LLVM SETCC instruction, for two - /// purposes. First, it prevents optimisations from fiddling with the - /// compare after we've moved the CondCode information onto the SELECT_CC or - /// BR_CC instructions. Second, it gives a legal instruction for the actual - /// comparison. - /// - /// It keeps a record of the condition flags asked for because certain - /// instructions are only valid for a subset of condition codes. - SETCC, - - // Designates a node which is a tail call: both a call and a return - // instruction as far as selction is concerned. It should be selected to an - // unconditional branch. Has the usual plethora of call operands, but: 1st - // is callee, 2nd is stack adjustment required immediately before branch. - TC_RETURN, - - // Designates a call used to support the TLS descriptor ABI. The call itself - // will be indirect ("BLR xN") but a relocation-specifier (".tlsdesccall - // var") must be attached somehow during code generation. It takes two - // operands: the callee and the symbol to be relocated against. - TLSDESCCALL, - - // Leaf node which will be lowered to an appropriate MRS to obtain the - // thread pointer: TPIDR_EL0. - THREAD_POINTER, - - /// Extracts a field of contiguous bits from the source and zero extends - /// them into a single register. Arguments are: source; immr; imms. Note - /// these are pre-encoded since DAG matching can't cope with combining LSB - /// and Width into these values itself. - UBFX, - - // Wraps an address which the ISelLowering phase has decided should be - // created using the large memory model style: i.e. a sequence of four - // movz/movk instructions. - WrapperLarge, - - // Wraps an address which the ISelLowering phase has decided should be - // created using the small memory model style: i.e. adrp/add or - // adrp/mem-op. This exists to prevent bare TargetAddresses which may never - // get selected. - WrapperSmall, - - // Vector move immediate - NEON_MOVIMM, - - // Vector Move Inverted Immediate - NEON_MVNIMM, - - // Vector FP move immediate - NEON_FMOVIMM, - - // Vector permute - NEON_UZP1, - NEON_UZP2, - NEON_ZIP1, - NEON_ZIP2, - NEON_TRN1, - NEON_TRN2, - - // Vector Element reverse - NEON_REV64, - NEON_REV32, - NEON_REV16, - - // Vector compare - NEON_CMP, - - // Vector compare zero - NEON_CMPZ, - - // Vector compare bitwise test - NEON_TST, - - // Vector saturating shift - NEON_QSHLs, - NEON_QSHLu, - - // Vector dup - NEON_VDUP, - - // Vector dup by lane - NEON_VDUPLANE, - - // Vector extract - NEON_VEXTRACT, - - // NEON duplicate lane loads - NEON_LD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE, - NEON_LD3DUP, - NEON_LD4DUP, - - // NEON loads with post-increment base updates: - NEON_LD1_UPD, - NEON_LD2_UPD, - NEON_LD3_UPD, - NEON_LD4_UPD, - NEON_LD1x2_UPD, - NEON_LD1x3_UPD, - NEON_LD1x4_UPD, - - // NEON stores with post-increment base updates: - NEON_ST1_UPD, - NEON_ST2_UPD, - NEON_ST3_UPD, - NEON_ST4_UPD, - NEON_ST1x2_UPD, - NEON_ST1x3_UPD, - NEON_ST1x4_UPD, - - // NEON duplicate lane loads with post-increment base updates: - NEON_LD2DUP_UPD, - NEON_LD3DUP_UPD, - NEON_LD4DUP_UPD, - - // NEON lane loads with post-increment base updates: - NEON_LD2LN_UPD, - NEON_LD3LN_UPD, - NEON_LD4LN_UPD, - - // NEON lane store with post-increment base updates: - NEON_ST2LN_UPD, - NEON_ST3LN_UPD, - NEON_ST4LN_UPD - }; -} - - -class AArch64Subtarget; -class AArch64TargetMachine; - -class AArch64TargetLowering : public TargetLowering { -public: - explicit AArch64TargetLowering(AArch64TargetMachine &TM); - - const char *getTargetNodeName(unsigned Opcode) const override; - - CCAssignFn *CCAssignFnForNode(CallingConv::ID CC) const; - - SDValue LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const override; - - SDValue LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - SDLoc dl, SelectionDAG &DAG) const override; - - unsigned getByValTypeAlignment(Type *Ty) const override; - - SDValue LowerCall(CallLoweringInfo &CLI, - SmallVectorImpl &InVals) const override; - - SDValue LowerCallResult(SDValue Chain, SDValue InFlag, - CallingConv::ID CallConv, bool IsVarArg, - const SmallVectorImpl &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; - - SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; - - bool isConcatVector(SDValue Op, SelectionDAG &DAG, SDValue V0, SDValue V1, - const int *Mask, SDValue &Res) const; - - bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &V0, - SDValue &V1, int *Mask) const; - - SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, - const AArch64Subtarget *ST) const; - - SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; - - void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc DL, - SDValue &Chain) const; - - /// IsEligibleForTailCallOptimization - Check whether the call is eligible - /// for tail call optimization. Targets which want to do tail call - /// optimization should implement this function. - bool IsEligibleForTailCallOptimization(SDValue Callee, - CallingConv::ID CalleeCC, - bool IsVarArg, - bool IsCalleeStructRet, - bool IsCallerStructRet, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - SelectionDAG& DAG) const; - - /// Finds the incoming stack arguments which overlap the given fixed stack - /// object and incorporates their load into the current chain. This prevents - /// an upcoming store from clobbering the stack argument before it's used. - SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, - MachineFrameInfo *MFI, int ClobberedFI) const; - - EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; - - bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const; - - bool IsTailCallConvention(CallingConv::ID CallCC) const; - - SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; - - bool isLegalICmpImmediate(int64_t Val) const override; - - /// \brief Return true if the addressing mode represented by AM is legal for - /// this target, for a load/store of the specified type. - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; - - /// \brief Return the cost of the scaling factor used in the addressing - /// mode represented by AM for this target, for a load/store - /// of the specified type. - /// If the AM is supported, the return value must be >= 0. - /// If the AM is not supported, it returns a negative value. - int getScalingFactorCost(const AddrMode &AM, Type *Ty) const override; - - bool isTruncateFree(Type *Ty1, Type *Ty2) const override; - bool isTruncateFree(EVT VT1, EVT VT2) const override; - - bool isZExtFree(Type *Ty1, Type *Ty2) const override; - bool isZExtFree(EVT VT1, EVT VT2) const override; - bool isZExtFree(SDValue Val, EVT VT2) const override; - - SDValue getSelectableIntSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &A64cc, SelectionDAG &DAG, SDLoc &dl) const; - - MachineBasicBlock * - EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const override; - - MachineBasicBlock * - emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB, - unsigned Size, unsigned Opcode) const; - - MachineBasicBlock * - emitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Size, unsigned CmpOp, - A64CC::CondCodes Cond) const; - MachineBasicBlock * - emitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Size) const; - - MachineBasicBlock * - EmitF128CSEL(MachineInstr *MI, MachineBasicBlock *MBB) const; - - SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerF128ToCall(SDValue Op, SelectionDAG &DAG, - RTLIB::Libcall Call) const; - SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const; - SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; - - SDValue LowerGlobalAddressELFSmall(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerGlobalAddressELFLarge(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const; - - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; - - SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL, - SelectionDAG &DAG) const; - SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool IsSigned) const; - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; - - SDValue PerformDAGCombine(SDNode *N,DAGCombinerInfo &DCI) const override; - - unsigned getRegisterByName(const char* RegName, EVT VT) const override; - - /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster - /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be - /// expanded to FMAs when this method returns true, otherwise fmuladd is - /// expanded to fmul + fadd. - bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; - - /// allowsUnalignedMemoryAccesses - Returns true if the target allows - /// unaligned memory accesses of the specified type. Returns whether it - /// is "fast" by reference in the second argument. - bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace, - bool *Fast) const override; - - ConstraintType - getConstraintType(const std::string &Constraint) const override; - - ConstraintWeight - getSingleConstraintMatchWeight(AsmOperandInfo &Info, - const char *Constraint) const override; - void LowerAsmOperandForConstraint(SDValue Op, - std::string &Constraint, - std::vector &Ops, - SelectionDAG &DAG) const override; - - std::pair - getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const override; - - bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, - unsigned Intrinsic) const override; - - /// getMaximalGlobalOffset - Returns the maximal possible offset which can - /// be used for loads / stores from the global. - unsigned getMaximalGlobalOffset() const override; - -protected: - std::pair - findRepresentativeClass(MVT VT) const override; - -private: - const InstrItineraryData *Itins; - - const AArch64Subtarget *getSubtarget() const { - return &getTargetMachine().getSubtarget(); - } -}; -enum NeonModImmType { - Neon_Mov_Imm, - Neon_Mvn_Imm -}; - -extern SDValue ScanBUILD_VECTOR(SDValue Op, bool &isOnlyLowElement, - bool &usesOnlyOneValue, bool &hasDominantValue, - bool &isConstant, bool &isUNDEF); -} // namespace llvm - -#endif // LLVM_TARGET_AARCH64_ISELLOWERING_H diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td deleted file mode 100644 index 4cc3813203ce..000000000000 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ /dev/null @@ -1,1487 +0,0 @@ -//===- AArch64InstrFormats.td - AArch64 Instruction Formats --*- tablegen -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// This file describes AArch64 instruction formats, down to the level of the -// instruction's overall class. -//===----------------------------------------------------------------------===// - - -//===----------------------------------------------------------------------===// -// A64 Instruction Format Definitions. -//===----------------------------------------------------------------------===// - -// A64 is currently the only instruction set supported by the AArch64 -// architecture. -class A64Inst patterns, - InstrItinClass itin> - : Instruction { - // All A64 instructions are 32-bit. This field will be filled in - // gradually going down the hierarchy. - field bits<32> Inst; - - field bits<32> Unpredictable = 0; - // SoftFail is the generic name for this field, but we alias it so - // as to make it more obvious what it means in ARM-land. - field bits<32> SoftFail = Unpredictable; - - // LLVM-level model of the AArch64/A64 distinction. - let Namespace = "AArch64"; - let DecoderNamespace = "A64"; - let Size = 4; - - // Set the templated fields - let OutOperandList = outs; - let InOperandList = ins; - let AsmString = asmstr; - let Pattern = patterns; - let Itinerary = itin; -} - -class PseudoInst patterns> : Instruction { - let Namespace = "AArch64"; - - let OutOperandList = outs; - let InOperandList= ins; - let Pattern = patterns; - let isCodeGenOnly = 1; - let isPseudo = 1; -} - -// Represents a pseudo-instruction that represents a single A64 instruction for -// whatever reason, the eventual result will be a 32-bit real instruction. -class A64PseudoInst patterns> - : PseudoInst { - let Size = 4; -} - -// As above, this will be a single A64 instruction, but we can actually give the -// expansion in TableGen. -class A64PseudoExpand patterns, dag Result> - : A64PseudoInst, - PseudoInstExpansion; - - -// First, some common cross-hierarchy register formats. - -class A64InstRd patterns, InstrItinClass itin> - : A64Inst { - bits<5> Rd; - - let Inst{4-0} = Rd; -} - -class A64InstRt patterns, InstrItinClass itin> - : A64Inst { - bits<5> Rt; - - let Inst{4-0} = Rt; -} - - -class A64InstRdn patterns, InstrItinClass itin> - : A64InstRd { - // Inherit rdt - bits<5> Rn; - - let Inst{9-5} = Rn; -} - -class A64InstRtn patterns, InstrItinClass itin> - : A64InstRt { - // Inherit rdt - bits<5> Rn; - - let Inst{9-5} = Rn; -} - -// Instructions taking Rt,Rt2,Rn -class A64InstRtt2n patterns, InstrItinClass itin> - : A64InstRtn { - bits<5> Rt2; - - let Inst{14-10} = Rt2; -} - -class A64InstRdnm patterns, InstrItinClass itin> - : A64InstRdn { - bits<5> Rm; - - let Inst{20-16} = Rm; -} - -class A64InstRtnm patterns, InstrItinClass itin> - : A64InstRtn { - bits<5> Rm; - - let Inst{20-16} = Rm; -} - -//===----------------------------------------------------------------------===// -// -// Actual A64 Instruction Formats -// - -// Format for Add-subtract (extended register) instructions. -class A64I_addsubext opt, bits<3> option, - dag outs, dag ins, string asmstr, list patterns, - InstrItinClass itin> - : A64InstRdnm { - bits<3> Imm3; - - let Inst{31} = sf; - let Inst{30} = op; - let Inst{29} = S; - let Inst{28-24} = 0b01011; - let Inst{23-22} = opt; - let Inst{21} = 0b1; - // Rm inherited in 20-16 - let Inst{15-13} = option; - let Inst{12-10} = Imm3; - // Rn inherited in 9-5 - // Rd inherited in 4-0 -} - -// Format for Add-subtract (immediate) instructions. -class A64I_addsubimm shift, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - bits<12> Imm12; - - let Inst{31} = sf; - let Inst{30} = op; - let Inst{29} = S; - let Inst{28-24} = 0b10001; - let Inst{23-22} = shift; - let Inst{21-10} = Imm12; -} - -// Format for Add-subtract (shifted register) instructions. -class A64I_addsubshift shift, - dag outs, dag ins, string asmstr, list patterns, - InstrItinClass itin> - : A64InstRdnm { - bits<6> Imm6; - - let Inst{31} = sf; - let Inst{30} = op; - let Inst{29} = S; - let Inst{28-24} = 0b01011; - let Inst{23-22} = shift; - let Inst{21} = 0b0; - // Rm inherited in 20-16 - let Inst{15-10} = Imm6; - // Rn inherited in 9-5 - // Rd inherited in 4-0 -} - -// Format for Add-subtract (with carry) instructions. -class A64I_addsubcarry opcode2, - dag outs, dag ins, string asmstr, list patterns, - InstrItinClass itin> - : A64InstRdnm { - let Inst{31} = sf; - let Inst{30} = op; - let Inst{29} = S; - let Inst{28-21} = 0b11010000; - // Rm inherited in 20-16 - let Inst{15-10} = opcode2; - // Rn inherited in 9-5 - // Rd inherited in 4-0 -} - - -// Format for Bitfield instructions -class A64I_bitfield opc, bit n, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - bits<6> ImmR; - bits<6> ImmS; - - let Inst{31} = sf; - let Inst{30-29} = opc; - let Inst{28-23} = 0b100110; - let Inst{22} = n; - let Inst{21-16} = ImmR; - let Inst{15-10} = ImmS; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format for compare and branch (immediate) instructions. -class A64I_cmpbr patterns, InstrItinClass itin> - : A64InstRt { - bits<19> Label; - - let Inst{31} = sf; - let Inst{30-25} = 0b011010; - let Inst{24} = op; - let Inst{23-5} = Label; - // Inherit Rt in 4-0 -} - -// Format for conditional branch (immediate) instructions. -class A64I_condbr patterns, InstrItinClass itin> - : A64Inst { - bits<19> Label; - bits<4> Cond; - - let Inst{31-25} = 0b0101010; - let Inst{24} = o1; - let Inst{23-5} = Label; - let Inst{4} = o0; - let Inst{3-0} = Cond; -} - -// Format for conditional compare (immediate) instructions. -class A64I_condcmpimm patterns, InstrItinClass itin> - : A64Inst { - bits<5> Rn; - bits<5> UImm5; - bits<4> NZCVImm; - bits<4> Cond; - - let Inst{31} = sf; - let Inst{30} = op; - let Inst{29} = s; - let Inst{28-21} = 0b11010010; - let Inst{20-16} = UImm5; - let Inst{15-12} = Cond; - let Inst{11} = 0b1; - let Inst{10} = o2; - let Inst{9-5} = Rn; - let Inst{4} = o3; - let Inst{3-0} = NZCVImm; -} - -// Format for conditional compare (register) instructions. -class A64I_condcmpreg patterns, InstrItinClass itin> - : A64Inst { - bits<5> Rn; - bits<5> Rm; - bits<4> NZCVImm; - bits<4> Cond; - - - let Inst{31} = sf; - let Inst{30} = op; - let Inst{29} = s; - let Inst{28-21} = 0b11010010; - let Inst{20-16} = Rm; - let Inst{15-12} = Cond; - let Inst{11} = 0b0; - let Inst{10} = o2; - let Inst{9-5} = Rn; - let Inst{4} = o3; - let Inst{3-0} = NZCVImm; -} - -// Format for conditional select instructions. -class A64I_condsel op2, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - bits<4> Cond; - - let Inst{31} = sf; - let Inst{30} = op; - let Inst{29} = s; - let Inst{28-21} = 0b11010100; - // Inherit Rm in 20-16 - let Inst{15-12} = Cond; - let Inst{11-10} = op2; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format for data processing (1 source) instructions -class A64I_dp_1src opcode2, bits<6> opcode, - string asmstr, dag outs, dag ins, - list patterns, InstrItinClass itin> - : A64InstRdn { - let Inst{31} = sf; - let Inst{30} = 0b1; - let Inst{29} = S; - let Inst{28-21} = 0b11010110; - let Inst{20-16} = opcode2; - let Inst{15-10} = opcode; -} - -// Format for data processing (2 source) instructions -class A64I_dp_2src opcode, bit S, - string asmstr, dag outs, dag ins, - list patterns, InstrItinClass itin> - : A64InstRdnm { - let Inst{31} = sf; - let Inst{30} = 0b0; - let Inst{29} = S; - let Inst{28-21} = 0b11010110; - let Inst{15-10} = opcode; -} - -// Format for data-processing (3 source) instructions - -class A64I_dp3 opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - let Inst{31} = sf; - let Inst{30-29} = opcode{5-4}; - let Inst{28-24} = 0b11011; - let Inst{23-21} = opcode{3-1}; - // Inherits Rm in 20-16 - let Inst{15} = opcode{0}; - // {14-10} mostly Ra, but unspecified for SMULH/UMULH - // Inherits Rn in 9-5 - // Inherits Rd in 4-0 -} - -// Format for exception generation instructions -class A64I_exception opc, bits<3> op2, bits<2> ll, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64Inst { - bits<16> UImm16; - - let Inst{31-24} = 0b11010100; - let Inst{23-21} = opc; - let Inst{20-5} = UImm16; - let Inst{4-2} = op2; - let Inst{1-0} = ll; -} - -// Format for extract (immediate) instructions -class A64I_extract op, bit n, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - bits<6> LSB; - - let Inst{31} = sf; - let Inst{30-29} = op{2-1}; - let Inst{28-23} = 0b100111; - let Inst{22} = n; - let Inst{21} = op{0}; - // Inherits Rm in bits 20-16 - let Inst{15-10} = LSB; - // Inherits Rn in 9-5 - // Inherits Rd in 4-0 -} - -let Predicates = [HasFPARMv8] in { - -// Format for floating-point compare instructions. -class A64I_fpcmp type, bits<2> op, bits<5> opcode2, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64Inst { - bits<5> Rn; - bits<5> Rm; - - let Inst{31} = m; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0b1; - let Inst{20-16} = Rm; - let Inst{15-14} = op; - let Inst{13-10} = 0b1000; - let Inst{9-5} = Rn; - let Inst{4-0} = opcode2; -} - -// Format for floating-point conditional compare instructions. -class A64I_fpccmp type, bit op, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - bits<5> Rn; - bits<5> Rm; - bits<4> NZCVImm; - bits<4> Cond; - - let Inst{31} = m; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0b1; - let Inst{20-16} = Rm; - let Inst{15-12} = Cond; - let Inst{11-10} = 0b01; - let Inst{9-5} = Rn; - let Inst{4} = op; - let Inst{3-0} = NZCVImm; -} - -// Format for floating-point conditional select instructions. -class A64I_fpcondsel type, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - bits<4> Cond; - - let Inst{31} = m; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0b1; - // Inherit Rm in 20-16 - let Inst{15-12} = Cond; - let Inst{11-10} = 0b11; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - - -// Format for floating-point data-processing (1 source) instructions. -class A64I_fpdp1 type, bits<6> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - let Inst{31} = m; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0b1; - let Inst{20-15} = opcode; - let Inst{14-10} = 0b10000; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format for floating-point data-processing (2 sources) instructions. -class A64I_fpdp2 type, bits<4> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - let Inst{31} = m; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0b1; - // Inherit Rm in 20-16 - let Inst{15-12} = opcode; - let Inst{11-10} = 0b10; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format for floating-point data-processing (3 sources) instructions. -class A64I_fpdp3 type, bit o1, bit o0, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - bits<5> Ra; - - let Inst{31} = m; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11111; - let Inst{23-22} = type; - let Inst{21} = o1; - // Inherit Rm in 20-16 - let Inst{15} = o0; - let Inst{14-10} = Ra; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format for floating-point <-> fixed-point conversion instructions. -class A64I_fpfixed type, bits<2> mode, bits<3> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - bits<6> Scale; - - let Inst{31} = sf; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0b0; - let Inst{20-19} = mode; - let Inst{18-16} = opcode; - let Inst{15-10} = Scale; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format for floating-point <-> integer conversion instructions. -class A64I_fpint type, bits<2> rmode, bits<3> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - let Inst{31} = sf; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0b1; - let Inst{20-19} = rmode; - let Inst{18-16} = opcode; - let Inst{15-10} = 0b000000; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - - -// Format for floating-point immediate instructions. -class A64I_fpimm type, bits<5> imm5, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRd { - bits<8> Imm8; - - let Inst{31} = m; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0b1; - let Inst{20-13} = Imm8; - let Inst{12-10} = 0b100; - let Inst{9-5} = imm5; - // Inherit Rd in 4-0 -} - -} - -// Format for load-register (literal) instructions. -class A64I_LDRlit opc, bit v, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRt { - bits<19> Imm19; - - let Inst{31-30} = opc; - let Inst{29-27} = 0b011; - let Inst{26} = v; - let Inst{25-24} = 0b00; - let Inst{23-5} = Imm19; - // Inherit Rt in 4-0 -} - -// Format for load-store exclusive instructions. -class A64I_LDSTex_tn size, bit o2, bit L, bit o1, bit o0, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtn { - let Inst{31-30} = size; - let Inst{29-24} = 0b001000; - let Inst{23} = o2; - let Inst{22} = L; - let Inst{21} = o1; - let Inst{15} = o0; -} - -class A64I_LDSTex_tt2n size, bit o2, bit L, bit o1, bit o0, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin>: - A64I_LDSTex_tn{ - bits<5> Rt2; - let Inst{14-10} = Rt2; -} - -class A64I_LDSTex_stn size, bit o2, bit L, bit o1, bit o0, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin>: - A64I_LDSTex_tn{ - bits<5> Rs; - let Inst{20-16} = Rs; -} - -class A64I_LDSTex_stt2n size, bit o2, bit L, bit o1, bit o0, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin>: - A64I_LDSTex_stn{ - bits<5> Rt2; - let Inst{14-10} = Rt2; -} - -// Format for load-store register (immediate post-indexed) instructions -class A64I_LSpostind size, bit v, bits<2> opc, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtn { - bits<9> SImm9; - - let Inst{31-30} = size; - let Inst{29-27} = 0b111; - let Inst{26} = v; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 0b0; - let Inst{20-12} = SImm9; - let Inst{11-10} = 0b01; - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format for load-store register (immediate pre-indexed) instructions -class A64I_LSpreind size, bit v, bits<2> opc, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtn { - bits<9> SImm9; - - - let Inst{31-30} = size; - let Inst{29-27} = 0b111; - let Inst{26} = v; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 0b0; - let Inst{20-12} = SImm9; - let Inst{11-10} = 0b11; - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format for load-store register (unprivileged) instructions -class A64I_LSunpriv size, bit v, bits<2> opc, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtn { - bits<9> SImm9; - - - let Inst{31-30} = size; - let Inst{29-27} = 0b111; - let Inst{26} = v; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 0b0; - let Inst{20-12} = SImm9; - let Inst{11-10} = 0b10; - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format for load-store (unscaled immediate) instructions. -class A64I_LSunalimm size, bit v, bits<2> opc, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtn { - bits<9> SImm9; - - let Inst{31-30} = size; - let Inst{29-27} = 0b111; - let Inst{26} = v; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 0b0; - let Inst{20-12} = SImm9; - let Inst{11-10} = 0b00; - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - - -// Format for load-store (unsigned immediate) instructions. -class A64I_LSunsigimm size, bit v, bits<2> opc, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtn { - bits<12> UImm12; - - let Inst{31-30} = size; - let Inst{29-27} = 0b111; - let Inst{26} = v; - let Inst{25-24} = 0b01; - let Inst{23-22} = opc; - let Inst{21-10} = UImm12; -} - -// Format for load-store register (register offset) instructions. -class A64I_LSregoff size, bit v, bits<2> opc, bit optionlo, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtn { - bits<5> Rm; - - // Complex operand selection needed for these instructions, so they - // need an "addr" field for encoding/decoding to be generated. - bits<3> Ext; - // OptionHi = Ext{2-1} - // S = Ext{0} - - let Inst{31-30} = size; - let Inst{29-27} = 0b111; - let Inst{26} = v; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 0b1; - let Inst{20-16} = Rm; - let Inst{15-14} = Ext{2-1}; - let Inst{13} = optionlo; - let Inst{12} = Ext{0}; - let Inst{11-10} = 0b10; - // Inherits Rn in 9-5 - // Inherits Rt in 4-0 - - let AddedComplexity = 50; -} - -// Format for Load-store register pair (offset) instructions -class A64I_LSPoffset opc, bit v, bit l, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtt2n { - bits<7> SImm7; - - let Inst{31-30} = opc; - let Inst{29-27} = 0b101; - let Inst{26} = v; - let Inst{25-23} = 0b010; - let Inst{22} = l; - let Inst{21-15} = SImm7; - // Inherit Rt2 in 14-10 - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format for Load-store register pair (post-indexed) instructions -class A64I_LSPpostind opc, bit v, bit l, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtt2n { - bits<7> SImm7; - - let Inst{31-30} = opc; - let Inst{29-27} = 0b101; - let Inst{26} = v; - let Inst{25-23} = 0b001; - let Inst{22} = l; - let Inst{21-15} = SImm7; - // Inherit Rt2 in 14-10 - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format for Load-store register pair (pre-indexed) instructions -class A64I_LSPpreind opc, bit v, bit l, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtt2n { - bits<7> SImm7; - - let Inst{31-30} = opc; - let Inst{29-27} = 0b101; - let Inst{26} = v; - let Inst{25-23} = 0b011; - let Inst{22} = l; - let Inst{21-15} = SImm7; - // Inherit Rt2 in 14-10 - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format for Load-store non-temporal register pair (offset) instructions -class A64I_LSPnontemp opc, bit v, bit l, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtt2n { - bits<7> SImm7; - - let Inst{31-30} = opc; - let Inst{29-27} = 0b101; - let Inst{26} = v; - let Inst{25-23} = 0b000; - let Inst{22} = l; - let Inst{21-15} = SImm7; - // Inherit Rt2 in 14-10 - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format for Logical (immediate) instructions -class A64I_logicalimm opc, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - bit N; - bits<6> ImmR; - bits<6> ImmS; - - // N, ImmR and ImmS have no separate existence in any assembly syntax (or for - // selection), so we'll combine them into a single field here. - bits<13> Imm; - // N = Imm{12}; - // ImmR = Imm{11-6}; - // ImmS = Imm{5-0}; - - let Inst{31} = sf; - let Inst{30-29} = opc; - let Inst{28-23} = 0b100100; - let Inst{22} = Imm{12}; - let Inst{21-16} = Imm{11-6}; - let Inst{15-10} = Imm{5-0}; - // Rn inherited in 9-5 - // Rd inherited in 4-0 -} - -// Format for Logical (shifted register) instructions -class A64I_logicalshift opc, bits<2> shift, bit N, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - bits<6> Imm6; - - let Inst{31} = sf; - let Inst{30-29} = opc; - let Inst{28-24} = 0b01010; - let Inst{23-22} = shift; - let Inst{21} = N; - // Rm inherited - let Inst{15-10} = Imm6; - // Rn inherited - // Rd inherited -} - -// Format for Move wide (immediate) -class A64I_movw opc, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRd { - bits<16> UImm16; - bits<2> Shift; // Called "hw" officially - - let Inst{31} = sf; - let Inst{30-29} = opc; - let Inst{28-23} = 0b100101; - let Inst{22-21} = Shift; - let Inst{20-5} = UImm16; - // Inherits Rd in 4-0 -} - -// Format for PC-relative addressing instructions, ADR and ADRP. -class A64I_PCADR patterns, InstrItinClass itin> - : A64InstRd { - bits<21> Label; - - let Inst{31} = op; - let Inst{30-29} = Label{1-0}; - let Inst{28-24} = 0b10000; - let Inst{23-5} = Label{20-2}; -} - -// Format for system instructions -class A64I_system patterns, InstrItinClass itin> - : A64Inst { - bits<2> Op0; - bits<3> Op1; - bits<4> CRn; - bits<4> CRm; - bits<3> Op2; - bits<5> Rt; - - let Inst{31-22} = 0b1101010100; - let Inst{21} = l; - let Inst{20-19} = Op0; - let Inst{18-16} = Op1; - let Inst{15-12} = CRn; - let Inst{11-8} = CRm; - let Inst{7-5} = Op2; - let Inst{4-0} = Rt; - - // These instructions can do horrible things. - let hasSideEffects = 1; -} - -// Format for unconditional branch (immediate) instructions -class A64I_Bimm patterns, InstrItinClass itin> - : A64Inst { - // Doubly special in not even sharing register fields with other - // instructions, so we create our own Rn here. - bits<26> Label; - - let Inst{31} = op; - let Inst{30-26} = 0b00101; - let Inst{25-0} = Label; -} - -// Format for Test & branch (immediate) instructions -class A64I_TBimm patterns, InstrItinClass itin> - : A64InstRt { - // Doubly special in not even sharing register fields with other - // instructions, so we create our own Rn here. - bits<6> Imm; - bits<14> Label; - - let Inst{31} = Imm{5}; - let Inst{30-25} = 0b011011; - let Inst{24} = op; - let Inst{23-19} = Imm{4-0}; - let Inst{18-5} = Label; - // Inherit Rt in 4-0 -} - -// Format for Unconditional branch (register) instructions, including -// RET. Shares no fields with instructions further up the hierarchy -// so top-level. -class A64I_Breg opc, bits<5> op2, bits<6> op3, bits<5> op4, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64Inst { - // Doubly special in not even sharing register fields with other - // instructions, so we create our own Rn here. - bits<5> Rn; - - let Inst{31-25} = 0b1101011; - let Inst{24-21} = opc; - let Inst{20-16} = op2; - let Inst{15-10} = op3; - let Inst{9-5} = Rn; - let Inst{4-0} = op4; -} - - -//===----------------------------------------------------------------------===// -// -// Neon Instruction Format Definitions. -// - -let Predicates = [HasNEON] in { - -class NeonInstAlias - : InstAlias { -} - -// Format AdvSIMD bitwise extract -class NeonI_BitExtract op2, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29-24} = 0b101110; - let Inst{23-22} = op2; - let Inst{21} = 0b0; - // Inherit Rm in 20-16 - let Inst{15} = 0b0; - // imm4 in 14-11 - let Inst{10} = 0b0; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD perm -class NeonI_Perm size, bits<3> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29-24} = 0b001110; - let Inst{23-22} = size; - let Inst{21} = 0b0; - // Inherit Rm in 20-16 - let Inst{15} = 0b0; - let Inst{14-12} = opcode; - let Inst{11-10} = 0b10; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD table lookup -class NeonI_TBL op2, bits<2> len, bit op, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29-24} = 0b001110; - let Inst{23-22} = op2; - let Inst{21} = 0b0; - // Inherit Rm in 20-16 - let Inst{15} = 0b0; - let Inst{14-13} = len; - let Inst{12} = op; - let Inst{11-10} = 0b00; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD 3 vector registers with same vector type -class NeonI_3VSame size, bits<5> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = u; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21} = 0b1; - // Inherit Rm in 20-16 - let Inst{15-11} = opcode; - let Inst{10} = 0b1; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD 3 vector registers with different vector type -class NeonI_3VDiff size, bits<4> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = u; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21} = 0b1; - // Inherit Rm in 20-16 - let Inst{15-12} = opcode; - let Inst{11} = 0b0; - let Inst{10} = 0b0; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD two registers and an element -class NeonI_2VElem size, bits<4> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = u; - let Inst{28-24} = 0b01111; - let Inst{23-22} = size; - // l in Inst{21} - // m in Inst{20} - // Inherit Rm in 19-16 - let Inst{15-12} = opcode; - // h in Inst{11} - let Inst{10} = 0b0; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD 1 vector register with modified immediate -class NeonI_1VModImm patterns, InstrItinClass itin> - : A64InstRd { - bits<8> Imm; - bits<4> cmode; - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = op; - let Inst{28-19} = 0b0111100000; - let Inst{15-12} = cmode; - let Inst{11} = 0b0; // o2 - let Inst{10} = 1; - // Inherit Rd in 4-0 - let Inst{18-16} = Imm{7-5}; // imm a:b:c - let Inst{9-5} = Imm{4-0}; // imm d:e:f:g:h -} - -// Format AdvSIMD 3 scalar registers with same type - -class NeonI_Scalar3Same size, bits<5> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - let Inst{31} = 0b0; - let Inst{30} = 0b1; - let Inst{29} = u; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21} = 0b1; - // Inherit Rm in 20-16 - let Inst{15-11} = opcode; - let Inst{10} = 0b1; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - - -// Format AdvSIMD 2 vector registers miscellaneous -class NeonI_2VMisc size, bits<5> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = u; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD 2 vector 1 immediate shift -class NeonI_2VShiftImm opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - bits<7> Imm; - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = u; - let Inst{28-23} = 0b011110; - let Inst{22-16} = Imm; - let Inst{15-11} = opcode; - let Inst{10} = 0b1; - - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD duplicate and insert -class NeonI_copy imm4, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - bits<5> Imm5; - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = op; - let Inst{28-21} = 0b01110000; - let Inst{20-16} = Imm5; - let Inst{15} = 0b0; - let Inst{14-11} = imm4; - let Inst{10} = 0b1; - - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} -// Format AdvSIMD insert from element to vector -class NeonI_insert patterns, InstrItinClass itin> - : A64InstRdn { - bits<5> Imm5; - bits<4> Imm4; - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = op; - let Inst{28-21} = 0b01110000; - let Inst{20-16} = Imm5; - let Inst{15} = 0b0; - let Inst{14-11} = Imm4; - let Inst{10} = 0b1; - - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD scalar pairwise -class NeonI_ScalarPair size, bits<5> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - let Inst{31} = 0b0; - let Inst{30} = 0b1; - let Inst{29} = u; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21-17} = 0b11000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD 2 vector across lanes -class NeonI_2VAcross size, bits<5> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn -{ - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = u; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b11000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD scalar two registers miscellaneous -class NeonI_Scalar2SameMisc size, bits<5> opcode, dag outs, dag ins, - string asmstr, list patterns, InstrItinClass itin> - : A64InstRdn { - let Inst{31} = 0b0; - let Inst{30} = 0b1; - let Inst{29} = u; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD vector load/store multiple N-element structure -class NeonI_LdStMult opcode, bits<2> size, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtn -{ - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29-23} = 0b0011000; - let Inst{22} = l; - let Inst{21-16} = 0b000000; - let Inst{15-12} = opcode; - let Inst{11-10} = size; - - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format AdvSIMD vector load/store multiple N-element structure (post-index) -class NeonI_LdStMult_Post opcode, bits<2> size, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtnm -{ - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29-23} = 0b0011001; - let Inst{22} = l; - let Inst{21} = 0b0; - // Inherit Rm in 20-16 - let Inst{15-12} = opcode; - let Inst{11-10} = size; - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format AdvSIMD vector load Single N-element structure to all lanes -class NeonI_LdOne_Dup opcode, bits<2> size, dag outs, - dag ins, string asmstr, list patterns, - InstrItinClass itin> - : A64InstRtn -{ - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29-23} = 0b0011010; - let Inst{22} = 0b1; - let Inst{21} = r; - let Inst{20-16} = 0b00000; - let Inst{15-13} = opcode; - let Inst{12} = 0b0; - let Inst{11-10} = size; - - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format AdvSIMD vector load/store Single N-element structure to/from one lane -class NeonI_LdStOne_Lane op2_1, bit op0, dag outs, - dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtn -{ - bits<4> lane; - let Inst{31} = 0b0; - let Inst{29-23} = 0b0011010; - let Inst{22} = l; - let Inst{21} = r; - let Inst{20-16} = 0b00000; - let Inst{15-14} = op2_1; - let Inst{13} = op0; - - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format AdvSIMD post-index vector load Single N-element structure to all lanes -class NeonI_LdOne_Dup_Post opcode, bits<2> size, dag outs, - dag ins, string asmstr, list patterns, - InstrItinClass itin> - : A64InstRtnm -{ - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29-23} = 0b0011011; - let Inst{22} = 0b1; - let Inst{21} = r; - // Inherit Rm in 20-16 - let Inst{15-13} = opcode; - let Inst{12} = 0b0; - let Inst{11-10} = size; - - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format AdvSIMD post-index vector load/store Single N-element structure -// to/from one lane -class NeonI_LdStOne_Lane_Post op2_1, bit op0, dag outs, - dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtnm -{ - bits<4> lane; - let Inst{31} = 0b0; - let Inst{29-23} = 0b0011011; - let Inst{22} = l; - let Inst{21} = r; - // Inherit Rm in 20-16 - let Inst{15-14} = op2_1; - let Inst{13} = op0; - - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format AdvSIMD 3 scalar registers with different type - -class NeonI_Scalar3Diff size, bits<4> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - let Inst{31-30} = 0b01; - let Inst{29} = u; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21} = 0b1; - // Inherit Rm in 20-16 - let Inst{15-12} = opcode; - let Inst{11-10} = 0b00; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD scalar shift by immediate - -class NeonI_ScalarShiftImm opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - bits<4> Imm4; - bits<3> Imm3; - let Inst{31-30} = 0b01; - let Inst{29} = u; - let Inst{28-23} = 0b111110; - let Inst{22-19} = Imm4; - let Inst{18-16} = Imm3; - let Inst{15-11} = opcode; - let Inst{10} = 0b1; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD crypto AES -class NeonI_Crypto_AES size, bits<5> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - let Inst{31-24} = 0b01001110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10100; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD crypto SHA -class NeonI_Crypto_SHA size, bits<5> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - let Inst{31-24} = 0b01011110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10100; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD crypto 3V SHA -class NeonI_Crypto_3VSHA size, bits<3> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - let Inst{31-24} = 0b01011110; - let Inst{23-22} = size; - let Inst{21} = 0b0; - // Inherit Rm in 20-16 - let Inst{15} = 0b0; - let Inst{14-12} = opcode; - let Inst{11-10} = 0b00; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD scalar x indexed element -class NeonI_ScalarXIndexedElem opcode, dag outs, dag ins, - string asmstr, list patterns, - InstrItinClass itin> - : A64InstRdnm -{ - let Inst{31} = 0b0; - let Inst{30} = 0b1; - let Inst{29} = u; - let Inst{28-24} = 0b11111; - let Inst{23} = szhi; - let Inst{22} = szlo; - // l in Inst{21} - // m in Instr{20} - // Inherit Rm in 19-16 - let Inst{15-12} = opcode; - // h in Inst{11} - let Inst{10} = 0b0; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} -// Format AdvSIMD scalar copy - insert from element to scalar -class NeonI_ScalarCopy patterns, InstrItinClass itin> - : NeonI_copy<0b1, 0b0, 0b0000, outs, ins, asmstr, patterns, itin> { - let Inst{28} = 0b1; -} -} - diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp deleted file mode 100644 index e2612abffa52..000000000000 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ /dev/null @@ -1,979 +0,0 @@ -//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the AArch64 implementation of the TargetInstrInfo class. -// -//===----------------------------------------------------------------------===// - -#include "AArch64.h" -#include "AArch64InstrInfo.h" -#include "AArch64MachineFunctionInfo.h" -#include "AArch64TargetMachine.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "Utils/AArch64BaseInfo.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/Function.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" -#include - -using namespace llvm; - -#define GET_INSTRINFO_CTOR_DTOR -#include "AArch64GenInstrInfo.inc" - -AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) - : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), - Subtarget(STI) {} - -void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { - unsigned Opc = 0; - unsigned ZeroReg = 0; - if (DestReg == AArch64::XSP || SrcReg == AArch64::XSP) { - // E.g. ADD xDst, xsp, #0 (, lsl #0) - BuildMI(MBB, I, DL, get(AArch64::ADDxxi_lsl0_s), DestReg) - .addReg(SrcReg) - .addImm(0); - return; - } else if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { - // E.g. ADD wDST, wsp, #0 (, lsl #0) - BuildMI(MBB, I, DL, get(AArch64::ADDwwi_lsl0_s), DestReg) - .addReg(SrcReg) - .addImm(0); - return; - } else if (DestReg == AArch64::NZCV) { - assert(AArch64::GPR64RegClass.contains(SrcReg)); - // E.g. MSR NZCV, xDST - BuildMI(MBB, I, DL, get(AArch64::MSRix)) - .addImm(A64SysReg::NZCV) - .addReg(SrcReg); - } else if (SrcReg == AArch64::NZCV) { - assert(AArch64::GPR64RegClass.contains(DestReg)); - // E.g. MRS xDST, NZCV - BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg) - .addImm(A64SysReg::NZCV); - } else if (AArch64::GPR64RegClass.contains(DestReg)) { - if(AArch64::GPR64RegClass.contains(SrcReg)){ - Opc = AArch64::ORRxxx_lsl; - ZeroReg = AArch64::XZR; - } else{ - assert(AArch64::FPR64RegClass.contains(SrcReg)); - BuildMI(MBB, I, DL, get(AArch64::FMOVxd), DestReg) - .addReg(SrcReg); - return; - } - } else if (AArch64::GPR32RegClass.contains(DestReg)) { - if(AArch64::GPR32RegClass.contains(SrcReg)){ - Opc = AArch64::ORRwww_lsl; - ZeroReg = AArch64::WZR; - } else{ - assert(AArch64::FPR32RegClass.contains(SrcReg)); - BuildMI(MBB, I, DL, get(AArch64::FMOVws), DestReg) - .addReg(SrcReg); - return; - } - } else if (AArch64::FPR32RegClass.contains(DestReg)) { - if(AArch64::FPR32RegClass.contains(SrcReg)){ - BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg) - .addReg(SrcReg); - return; - } - else { - assert(AArch64::GPR32RegClass.contains(SrcReg)); - BuildMI(MBB, I, DL, get(AArch64::FMOVsw), DestReg) - .addReg(SrcReg); - return; - } - } else if (AArch64::FPR64RegClass.contains(DestReg)) { - if(AArch64::FPR64RegClass.contains(SrcReg)){ - BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg) - .addReg(SrcReg); - return; - } - else { - assert(AArch64::GPR64RegClass.contains(SrcReg)); - BuildMI(MBB, I, DL, get(AArch64::FMOVdx), DestReg) - .addReg(SrcReg); - return; - } - } else if (AArch64::FPR128RegClass.contains(DestReg)) { - assert(AArch64::FPR128RegClass.contains(SrcReg)); - - // If NEON is enable, we use ORR to implement this copy. - // If NEON isn't available, emit STR and LDR to handle this. - if(getSubTarget().hasNEON()) { - BuildMI(MBB, I, DL, get(AArch64::ORRvvv_16B), DestReg) - .addReg(SrcReg) - .addReg(SrcReg); - return; - } else { - BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP) - .addReg(SrcReg) - .addReg(AArch64::XSP) - .addImm(0x1ff & -16); - - BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg) - .addReg(AArch64::XSP, RegState::Define) - .addReg(AArch64::XSP) - .addImm(16); - return; - } - } else if (AArch64::FPR8RegClass.contains(DestReg, SrcReg)) { - // The copy of two FPR8 registers is implemented by the copy of two FPR32 - const TargetRegisterInfo *TRI = &getRegisterInfo(); - unsigned Dst = TRI->getMatchingSuperReg(DestReg, AArch64::sub_8, - &AArch64::FPR32RegClass); - unsigned Src = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_8, - &AArch64::FPR32RegClass); - BuildMI(MBB, I, DL, get(AArch64::FMOVss), Dst) - .addReg(Src); - return; - } else if (AArch64::FPR16RegClass.contains(DestReg, SrcReg)) { - // The copy of two FPR16 registers is implemented by the copy of two FPR32 - const TargetRegisterInfo *TRI = &getRegisterInfo(); - unsigned Dst = TRI->getMatchingSuperReg(DestReg, AArch64::sub_16, - &AArch64::FPR32RegClass); - unsigned Src = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_16, - &AArch64::FPR32RegClass); - BuildMI(MBB, I, DL, get(AArch64::FMOVss), Dst) - .addReg(Src); - return; - } else { - CopyPhysRegTuple(MBB, I, DL, DestReg, SrcReg); - return; - } - - // E.g. ORR xDst, xzr, xSrc, lsl #0 - BuildMI(MBB, I, DL, get(Opc), DestReg) - .addReg(ZeroReg) - .addReg(SrcReg) - .addImm(0); -} - -void AArch64InstrInfo::CopyPhysRegTuple(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - DebugLoc DL, unsigned DestReg, - unsigned SrcReg) const { - unsigned SubRegs; - bool IsQRegs; - if (AArch64::DPairRegClass.contains(DestReg, SrcReg)) { - SubRegs = 2; - IsQRegs = false; - } else if (AArch64::DTripleRegClass.contains(DestReg, SrcReg)) { - SubRegs = 3; - IsQRegs = false; - } else if (AArch64::DQuadRegClass.contains(DestReg, SrcReg)) { - SubRegs = 4; - IsQRegs = false; - } else if (AArch64::QPairRegClass.contains(DestReg, SrcReg)) { - SubRegs = 2; - IsQRegs = true; - } else if (AArch64::QTripleRegClass.contains(DestReg, SrcReg)) { - SubRegs = 3; - IsQRegs = true; - } else if (AArch64::QQuadRegClass.contains(DestReg, SrcReg)) { - SubRegs = 4; - IsQRegs = true; - } else - llvm_unreachable("Unknown register class"); - - unsigned BeginIdx = IsQRegs ? AArch64::qsub_0 : AArch64::dsub_0; - int Spacing = 1; - const TargetRegisterInfo *TRI = &getRegisterInfo(); - // Copy register tuples backward when the first Dest reg overlaps - // with SrcReg. - if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) { - BeginIdx = BeginIdx + (SubRegs - 1); - Spacing = -1; - } - - unsigned Opc = IsQRegs ? AArch64::ORRvvv_16B : AArch64::ORRvvv_8B; - for (unsigned i = 0; i != SubRegs; ++i) { - unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing); - unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing); - assert(Dst && Src && "Bad sub-register"); - BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst) - .addReg(Src) - .addReg(Src); - } - return; -} - -/// Does the Opcode represent a conditional branch that we can remove and re-add -/// at the end of a basic block? -static bool isCondBranch(unsigned Opc) { - return Opc == AArch64::Bcc || Opc == AArch64::CBZw || Opc == AArch64::CBZx || - Opc == AArch64::CBNZw || Opc == AArch64::CBNZx || - Opc == AArch64::TBZwii || Opc == AArch64::TBZxii || - Opc == AArch64::TBNZwii || Opc == AArch64::TBNZxii; -} - -/// Takes apart a given conditional branch MachineInstr (see isCondBranch), -/// setting TBB to the destination basic block and populating the Cond vector -/// with data necessary to recreate the conditional branch at a later -/// date. First element will be the opcode, and subsequent ones define the -/// conditions being branched on in an instruction-specific manner. -static void classifyCondBranch(MachineInstr *I, MachineBasicBlock *&TBB, - SmallVectorImpl &Cond) { - switch(I->getOpcode()) { - case AArch64::Bcc: - case AArch64::CBZw: - case AArch64::CBZx: - case AArch64::CBNZw: - case AArch64::CBNZx: - // These instructions just have one predicate operand in position 0 (either - // a condition code or a register being compared). - Cond.push_back(MachineOperand::CreateImm(I->getOpcode())); - Cond.push_back(I->getOperand(0)); - TBB = I->getOperand(1).getMBB(); - return; - case AArch64::TBZwii: - case AArch64::TBZxii: - case AArch64::TBNZwii: - case AArch64::TBNZxii: - // These have two predicate operands: a register and a bit position. - Cond.push_back(MachineOperand::CreateImm(I->getOpcode())); - Cond.push_back(I->getOperand(0)); - Cond.push_back(I->getOperand(1)); - TBB = I->getOperand(2).getMBB(); - return; - default: - llvm_unreachable("Unknown conditional branch to classify"); - } -} - - -bool -AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify) const { - // If the block has no terminators, it just falls into the block after it. - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) - return false; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return false; - --I; - } - if (!isUnpredicatedTerminator(I)) - return false; - - // Get the last instruction in the block. - MachineInstr *LastInst = I; - - // If there is only one terminator instruction, process it. - unsigned LastOpc = LastInst->getOpcode(); - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - if (LastOpc == AArch64::Bimm) { - TBB = LastInst->getOperand(0).getMBB(); - return false; - } - if (isCondBranch(LastOpc)) { - classifyCondBranch(LastInst, TBB, Cond); - return false; - } - return true; // Can't handle indirect branch. - } - - // Get the instruction before it if it is a terminator. - MachineInstr *SecondLastInst = I; - unsigned SecondLastOpc = SecondLastInst->getOpcode(); - - // If AllowModify is true and the block ends with two or more unconditional - // branches, delete all but the first unconditional branch. - if (AllowModify && LastOpc == AArch64::Bimm) { - while (SecondLastOpc == AArch64::Bimm) { - LastInst->eraseFromParent(); - LastInst = SecondLastInst; - LastOpc = LastInst->getOpcode(); - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - // Return now the only terminator is an unconditional branch. - TBB = LastInst->getOperand(0).getMBB(); - return false; - } else { - SecondLastInst = I; - SecondLastOpc = SecondLastInst->getOpcode(); - } - } - } - - // If there are three terminators, we don't know what sort of block this is. - if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) - return true; - - // If the block ends with a B and a Bcc, handle it. - if (LastOpc == AArch64::Bimm) { - if (SecondLastOpc == AArch64::Bcc) { - TBB = SecondLastInst->getOperand(1).getMBB(); - Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc)); - Cond.push_back(SecondLastInst->getOperand(0)); - FBB = LastInst->getOperand(0).getMBB(); - return false; - } else if (isCondBranch(SecondLastOpc)) { - classifyCondBranch(SecondLastInst, TBB, Cond); - FBB = LastInst->getOperand(0).getMBB(); - return false; - } - } - - // If the block ends with two unconditional branches, handle it. The second - // one is not executed, so remove it. - if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) { - TBB = SecondLastInst->getOperand(0).getMBB(); - I = LastInst; - if (AllowModify) - I->eraseFromParent(); - return false; - } - - // Otherwise, can't handle this. - return true; -} - -bool AArch64InstrInfo::ReverseBranchCondition( - SmallVectorImpl &Cond) const { - switch (Cond[0].getImm()) { - case AArch64::Bcc: { - A64CC::CondCodes CC = static_cast(Cond[1].getImm()); - CC = A64InvertCondCode(CC); - Cond[1].setImm(CC); - return false; - } - case AArch64::CBZw: - Cond[0].setImm(AArch64::CBNZw); - return false; - case AArch64::CBZx: - Cond[0].setImm(AArch64::CBNZx); - return false; - case AArch64::CBNZw: - Cond[0].setImm(AArch64::CBZw); - return false; - case AArch64::CBNZx: - Cond[0].setImm(AArch64::CBZx); - return false; - case AArch64::TBZwii: - Cond[0].setImm(AArch64::TBNZwii); - return false; - case AArch64::TBZxii: - Cond[0].setImm(AArch64::TBNZxii); - return false; - case AArch64::TBNZwii: - Cond[0].setImm(AArch64::TBZwii); - return false; - case AArch64::TBNZxii: - Cond[0].setImm(AArch64::TBZxii); - return false; - default: - llvm_unreachable("Unknown branch type"); - } -} - - -unsigned -AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, - DebugLoc DL) const { - if (!FBB && Cond.empty()) { - BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(TBB); - return 1; - } else if (!FBB) { - MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); - for (int i = 1, e = Cond.size(); i != e; ++i) - MIB.addOperand(Cond[i]); - MIB.addMBB(TBB); - return 1; - } - - MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); - for (int i = 1, e = Cond.size(); i != e; ++i) - MIB.addOperand(Cond[i]); - MIB.addMBB(TBB); - - BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(FBB); - return 2; -} - -unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) return 0; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return 0; - --I; - } - if (I->getOpcode() != AArch64::Bimm && !isCondBranch(I->getOpcode())) - return 0; - - // Remove the branch. - I->eraseFromParent(); - - I = MBB.end(); - - if (I == MBB.begin()) return 1; - --I; - if (!isCondBranch(I->getOpcode())) - return 1; - - // Remove the branch. - I->eraseFromParent(); - return 2; -} - -bool -AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const { - MachineInstr &MI = *MBBI; - MachineBasicBlock &MBB = *MI.getParent(); - - unsigned Opcode = MI.getOpcode(); - switch (Opcode) { - case AArch64::TLSDESC_BLRx: { - MachineInstr *NewMI = - BuildMI(MBB, MBBI, MI.getDebugLoc(), get(AArch64::TLSDESCCALL)) - .addOperand(MI.getOperand(1)); - MI.setDesc(get(AArch64::BLRx)); - - llvm::finalizeBundle(MBB, NewMI, *++MBBI); - return true; - } - default: - return false; - } - - return false; -} - -void -AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, - int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - DebugLoc DL = MBB.findDebugLoc(MBBI); - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - unsigned Align = MFI.getObjectAlignment(FrameIdx); - - MachineMemOperand *MMO - = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), - MachineMemOperand::MOStore, - MFI.getObjectSize(FrameIdx), - Align); - - unsigned StoreOp = 0; - if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) { - switch(RC->getSize()) { - case 4: StoreOp = AArch64::LS32_STR; break; - case 8: StoreOp = AArch64::LS64_STR; break; - default: - llvm_unreachable("Unknown size for regclass"); - } - } else if (AArch64::FPR8RegClass.hasSubClassEq(RC)) { - StoreOp = AArch64::LSFP8_STR; - } else if (AArch64::FPR16RegClass.hasSubClassEq(RC)) { - StoreOp = AArch64::LSFP16_STR; - } else if (RC->hasType(MVT::f32) || RC->hasType(MVT::f64) || - RC->hasType(MVT::f128)) { - switch (RC->getSize()) { - case 4: StoreOp = AArch64::LSFP32_STR; break; - case 8: StoreOp = AArch64::LSFP64_STR; break; - case 16: StoreOp = AArch64::LSFP128_STR; break; - default: - llvm_unreachable("Unknown size for regclass"); - } - } else { // For a super register class has more than one sub registers - if (AArch64::DPairRegClass.hasSubClassEq(RC)) - StoreOp = AArch64::ST1x2_8B; - else if (AArch64::DTripleRegClass.hasSubClassEq(RC)) - StoreOp = AArch64::ST1x3_8B; - else if (AArch64::DQuadRegClass.hasSubClassEq(RC)) - StoreOp = AArch64::ST1x4_8B; - else if (AArch64::QPairRegClass.hasSubClassEq(RC)) - StoreOp = AArch64::ST1x2_16B; - else if (AArch64::QTripleRegClass.hasSubClassEq(RC)) - StoreOp = AArch64::ST1x3_16B; - else if (AArch64::QQuadRegClass.hasSubClassEq(RC)) - StoreOp = AArch64::ST1x4_16B; - else - llvm_unreachable("Unknown reg class"); - - MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp)); - // Vector store has different operands from other store instructions. - NewMI.addFrameIndex(FrameIdx) - .addReg(SrcReg, getKillRegState(isKill)) - .addMemOperand(MMO); - return; - } - - MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp)); - NewMI.addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FrameIdx) - .addImm(0) - .addMemOperand(MMO); - -} - -void -AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - DebugLoc DL = MBB.findDebugLoc(MBBI); - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - unsigned Align = MFI.getObjectAlignment(FrameIdx); - - MachineMemOperand *MMO - = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), - MachineMemOperand::MOLoad, - MFI.getObjectSize(FrameIdx), - Align); - - unsigned LoadOp = 0; - if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) { - switch(RC->getSize()) { - case 4: LoadOp = AArch64::LS32_LDR; break; - case 8: LoadOp = AArch64::LS64_LDR; break; - default: - llvm_unreachable("Unknown size for regclass"); - } - } else if (AArch64::FPR8RegClass.hasSubClassEq(RC)) { - LoadOp = AArch64::LSFP8_LDR; - } else if (AArch64::FPR16RegClass.hasSubClassEq(RC)) { - LoadOp = AArch64::LSFP16_LDR; - } else if (RC->hasType(MVT::f32) || RC->hasType(MVT::f64) || - RC->hasType(MVT::f128)) { - switch (RC->getSize()) { - case 4: LoadOp = AArch64::LSFP32_LDR; break; - case 8: LoadOp = AArch64::LSFP64_LDR; break; - case 16: LoadOp = AArch64::LSFP128_LDR; break; - default: - llvm_unreachable("Unknown size for regclass"); - } - } else { // For a super register class has more than one sub registers - if (AArch64::DPairRegClass.hasSubClassEq(RC)) - LoadOp = AArch64::LD1x2_8B; - else if (AArch64::DTripleRegClass.hasSubClassEq(RC)) - LoadOp = AArch64::LD1x3_8B; - else if (AArch64::DQuadRegClass.hasSubClassEq(RC)) - LoadOp = AArch64::LD1x4_8B; - else if (AArch64::QPairRegClass.hasSubClassEq(RC)) - LoadOp = AArch64::LD1x2_16B; - else if (AArch64::QTripleRegClass.hasSubClassEq(RC)) - LoadOp = AArch64::LD1x3_16B; - else if (AArch64::QQuadRegClass.hasSubClassEq(RC)) - LoadOp = AArch64::LD1x4_16B; - else - llvm_unreachable("Unknown reg class"); - - MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg); - // Vector load has different operands from other load instructions. - NewMI.addFrameIndex(FrameIdx) - .addMemOperand(MMO); - return; - } - - MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg); - NewMI.addFrameIndex(FrameIdx) - .addImm(0) - .addMemOperand(MMO); -} - -unsigned AArch64InstrInfo::estimateRSStackLimit(MachineFunction &MF) const { - unsigned Limit = (1 << 16) - 1; - for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) { - for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); - I != E; ++I) { - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - if (!I->getOperand(i).isFI()) continue; - - // When using ADDxxi_lsl0_s to get the address of a stack object, 0xfff - // is the largest offset guaranteed to fit in the immediate offset. - if (I->getOpcode() == AArch64::ADDxxi_lsl0_s) { - Limit = std::min(Limit, 0xfffu); - break; - } - - int AccessScale, MinOffset, MaxOffset; - getAddressConstraints(*I, AccessScale, MinOffset, MaxOffset); - Limit = std::min(Limit, static_cast(MaxOffset)); - - break; // At most one FI per instruction - } - } - } - - return Limit; -} -void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI, - int &AccessScale, int &MinOffset, - int &MaxOffset) const { - switch (MI.getOpcode()) { - default: - llvm_unreachable("Unknown load/store kind"); - case TargetOpcode::DBG_VALUE: - AccessScale = 1; - MinOffset = INT_MIN; - MaxOffset = INT_MAX; - return; - case AArch64::LS8_LDR: case AArch64::LS8_STR: - case AArch64::LSFP8_LDR: case AArch64::LSFP8_STR: - case AArch64::LDRSBw: - case AArch64::LDRSBx: - AccessScale = 1; - MinOffset = 0; - MaxOffset = 0xfff; - return; - case AArch64::LS16_LDR: case AArch64::LS16_STR: - case AArch64::LSFP16_LDR: case AArch64::LSFP16_STR: - case AArch64::LDRSHw: - case AArch64::LDRSHx: - AccessScale = 2; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - case AArch64::LS32_LDR: case AArch64::LS32_STR: - case AArch64::LSFP32_LDR: case AArch64::LSFP32_STR: - case AArch64::LDRSWx: - case AArch64::LDPSWx: - AccessScale = 4; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - case AArch64::LS64_LDR: case AArch64::LS64_STR: - case AArch64::LSFP64_LDR: case AArch64::LSFP64_STR: - case AArch64::PRFM: - AccessScale = 8; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - case AArch64::LSFP128_LDR: case AArch64::LSFP128_STR: - AccessScale = 16; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - case AArch64::LSPair32_LDR: case AArch64::LSPair32_STR: - case AArch64::LSFPPair32_LDR: case AArch64::LSFPPair32_STR: - AccessScale = 4; - MinOffset = -0x40 * AccessScale; - MaxOffset = 0x3f * AccessScale; - return; - case AArch64::LSPair64_LDR: case AArch64::LSPair64_STR: - case AArch64::LSFPPair64_LDR: case AArch64::LSFPPair64_STR: - AccessScale = 8; - MinOffset = -0x40 * AccessScale; - MaxOffset = 0x3f * AccessScale; - return; - case AArch64::LSFPPair128_LDR: case AArch64::LSFPPair128_STR: - AccessScale = 16; - MinOffset = -0x40 * AccessScale; - MaxOffset = 0x3f * AccessScale; - return; - case AArch64::LD1x2_8B: case AArch64::ST1x2_8B: - AccessScale = 16; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - case AArch64::LD1x3_8B: case AArch64::ST1x3_8B: - AccessScale = 24; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - case AArch64::LD1x4_8B: case AArch64::ST1x4_8B: - case AArch64::LD1x2_16B: case AArch64::ST1x2_16B: - AccessScale = 32; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - case AArch64::LD1x3_16B: case AArch64::ST1x3_16B: - AccessScale = 48; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - case AArch64::LD1x4_16B: case AArch64::ST1x4_16B: - AccessScale = 64; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - } -} - -unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { - const MCInstrDesc &MCID = MI.getDesc(); - const MachineBasicBlock &MBB = *MI.getParent(); - const MachineFunction &MF = *MBB.getParent(); - const MCAsmInfo &MAI = *MF.getTarget().getMCAsmInfo(); - - if (MCID.getSize()) - return MCID.getSize(); - - if (MI.getOpcode() == AArch64::INLINEASM) - return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI); - - switch (MI.getOpcode()) { - case TargetOpcode::BUNDLE: - return getInstBundleLength(MI); - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - case TargetOpcode::CFI_INSTRUCTION: - case TargetOpcode::EH_LABEL: - case TargetOpcode::GC_LABEL: - case TargetOpcode::DBG_VALUE: - case AArch64::TLSDESCCALL: - return 0; - default: - llvm_unreachable("Unknown instruction class"); - } -} - -unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const { - unsigned Size = 0; - MachineBasicBlock::const_instr_iterator I = MI; - MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); - while (++I != E && I->isInsideBundle()) { - assert(!I->isBundle() && "No nested bundle!"); - Size += getInstSizeInBytes(*I); - } - return Size; -} - -bool llvm::rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned FrameReg, int &Offset, - const AArch64InstrInfo &TII) { - MachineBasicBlock &MBB = *MI.getParent(); - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - - MFI.getObjectOffset(FrameRegIdx); - llvm_unreachable("Unimplemented rewriteFrameIndex"); -} - -void llvm::emitRegUpdate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - DebugLoc dl, const TargetInstrInfo &TII, - unsigned DstReg, unsigned SrcReg, unsigned ScratchReg, - int64_t NumBytes, MachineInstr::MIFlag MIFlags) { - if (NumBytes == 0 && DstReg == SrcReg) - return; - else if (abs64(NumBytes) & ~0xffffff) { - // Generically, we have to materialize the offset into a temporary register - // and subtract it. There are a couple of ways this could be done, for now - // we'll use a movz/movk or movn/movk sequence. - uint64_t Bits = static_cast(abs64(NumBytes)); - BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVZxii), ScratchReg) - .addImm(0xffff & Bits).addImm(0) - .setMIFlags(MIFlags); - - Bits >>= 16; - if (Bits & 0xffff) { - BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg) - .addReg(ScratchReg) - .addImm(0xffff & Bits).addImm(1) - .setMIFlags(MIFlags); - } - - Bits >>= 16; - if (Bits & 0xffff) { - BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg) - .addReg(ScratchReg) - .addImm(0xffff & Bits).addImm(2) - .setMIFlags(MIFlags); - } - - Bits >>= 16; - if (Bits & 0xffff) { - BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg) - .addReg(ScratchReg) - .addImm(0xffff & Bits).addImm(3) - .setMIFlags(MIFlags); - } - - // ADD DST, SRC, xTMP (, lsl #0) - unsigned AddOp = NumBytes > 0 ? AArch64::ADDxxx_uxtx : AArch64::SUBxxx_uxtx; - BuildMI(MBB, MBBI, dl, TII.get(AddOp), DstReg) - .addReg(SrcReg, RegState::Kill) - .addReg(ScratchReg, RegState::Kill) - .addImm(0) - .setMIFlag(MIFlags); - return; - } - - // Now we know that the adjustment can be done in at most two add/sub - // (immediate) instructions, which is always more efficient than a - // literal-pool load, or even a hypothetical movz/movk/add sequence - - // Decide whether we're doing addition or subtraction - unsigned LowOp, HighOp; - if (NumBytes >= 0) { - LowOp = AArch64::ADDxxi_lsl0_s; - HighOp = AArch64::ADDxxi_lsl12_s; - } else { - LowOp = AArch64::SUBxxi_lsl0_s; - HighOp = AArch64::SUBxxi_lsl12_s; - NumBytes = abs64(NumBytes); - } - - // If we're here, at the very least a move needs to be produced, which just - // happens to be materializable by an ADD. - if ((NumBytes & 0xfff) || NumBytes == 0) { - BuildMI(MBB, MBBI, dl, TII.get(LowOp), DstReg) - .addReg(SrcReg, RegState::Kill) - .addImm(NumBytes & 0xfff) - .setMIFlag(MIFlags); - - // Next update should use the register we've just defined. - SrcReg = DstReg; - } - - if (NumBytes & 0xfff000) { - BuildMI(MBB, MBBI, dl, TII.get(HighOp), DstReg) - .addReg(SrcReg, RegState::Kill) - .addImm(NumBytes >> 12) - .setMIFlag(MIFlags); - } -} - -void llvm::emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - DebugLoc dl, const TargetInstrInfo &TII, - unsigned ScratchReg, int64_t NumBytes, - MachineInstr::MIFlag MIFlags) { - emitRegUpdate(MBB, MI, dl, TII, AArch64::XSP, AArch64::XSP, AArch64::X16, - NumBytes, MIFlags); -} - - -namespace { - struct LDTLSCleanup : public MachineFunctionPass { - static char ID; - LDTLSCleanup() : MachineFunctionPass(ID) {} - - bool runOnMachineFunction(MachineFunction &MF) override { - AArch64MachineFunctionInfo* MFI - = MF.getInfo(); - if (MFI->getNumLocalDynamicTLSAccesses() < 2) { - // No point folding accesses if there isn't at least two. - return false; - } - - MachineDominatorTree *DT = &getAnalysis(); - return VisitNode(DT->getRootNode(), 0); - } - - // Visit the dominator subtree rooted at Node in pre-order. - // If TLSBaseAddrReg is non-null, then use that to replace any - // TLS_base_addr instructions. Otherwise, create the register - // when the first such instruction is seen, and then use it - // as we encounter more instructions. - bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) { - MachineBasicBlock *BB = Node->getBlock(); - bool Changed = false; - - // Traverse the current block. - for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; - ++I) { - switch (I->getOpcode()) { - case AArch64::TLSDESC_BLRx: - // Make sure it's a local dynamic access. - if (!I->getOperand(1).isSymbol() || - strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_")) - break; - - if (TLSBaseAddrReg) - I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg); - else - I = SetRegister(I, &TLSBaseAddrReg); - Changed = true; - break; - default: - break; - } - } - - // Visit the children of this block in the dominator tree. - for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end(); - I != E; ++I) { - Changed |= VisitNode(*I, TLSBaseAddrReg); - } - - return Changed; - } - - // Replace the TLS_base_addr instruction I with a copy from - // TLSBaseAddrReg, returning the new instruction. - MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I, - unsigned TLSBaseAddrReg) { - MachineFunction *MF = I->getParent()->getParent(); - const AArch64TargetMachine *TM = - static_cast(&MF->getTarget()); - const AArch64InstrInfo *TII = TM->getInstrInfo(); - - // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the - // code sequence assumes the address will be. - MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(), - TII->get(TargetOpcode::COPY), - AArch64::X0) - .addReg(TLSBaseAddrReg); - - // Erase the TLS_base_addr instruction. - I->eraseFromParent(); - - return Copy; - } - - // Create a virtal register in *TLSBaseAddrReg, and populate it by - // inserting a copy instruction after I. Returns the new instruction. - MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) { - MachineFunction *MF = I->getParent()->getParent(); - const AArch64TargetMachine *TM = - static_cast(&MF->getTarget()); - const AArch64InstrInfo *TII = TM->getInstrInfo(); - - // Create a virtual register for the TLS base address. - MachineRegisterInfo &RegInfo = MF->getRegInfo(); - *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass); - - // Insert a copy from X0 to TLSBaseAddrReg for later. - MachineInstr *Next = I->getNextNode(); - MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(), - TII->get(TargetOpcode::COPY), - *TLSBaseAddrReg) - .addReg(AArch64::X0); - - return Copy; - } - - const char *getPassName() const override { - return "Local Dynamic TLS Access Clean-up"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired(); - MachineFunctionPass::getAnalysisUsage(AU); - } - }; -} - -char LDTLSCleanup::ID = 0; -FunctionPass* -llvm::createAArch64CleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); } diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h deleted file mode 100644 index 10d5185ab630..000000000000 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ /dev/null @@ -1,112 +0,0 @@ -//===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the AArch64 implementation of the TargetInstrInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_AARCH64INSTRINFO_H -#define LLVM_TARGET_AARCH64INSTRINFO_H - -#include "AArch64RegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" - -#define GET_INSTRINFO_HEADER -#include "AArch64GenInstrInfo.inc" - -namespace llvm { - -class AArch64Subtarget; - -class AArch64InstrInfo : public AArch64GenInstrInfo { - const AArch64RegisterInfo RI; - const AArch64Subtarget &Subtarget; -public: - explicit AArch64InstrInfo(const AArch64Subtarget &TM); - - /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As - /// such, whenever a client has an instance of instruction info, it should - /// always be able to get register info as well (through this method). - /// - const TargetRegisterInfo &getRegisterInfo() const { return RI; } - - const AArch64Subtarget &getSubTarget() const { return Subtarget; } - - void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const override; - void CopyPhysRegTuple(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg) const; - - void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; - void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; - - bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify = false) const override; - unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, - DebugLoc DL) const override; - unsigned RemoveBranch(MachineBasicBlock &MBB) const override; - bool - ReverseBranchCondition(SmallVectorImpl &Cond) const override; - - bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override; - - /// Look through the instructions in this function and work out the largest - /// the stack frame can be while maintaining the ability to address local - /// slots with no complexities. - unsigned estimateRSStackLimit(MachineFunction &MF) const; - - /// getAddressConstraints - For loads and stores (and PRFMs) taking an - /// immediate offset, this function determines the constraints required for - /// the immediate. It must satisfy: - /// + MinOffset <= imm <= MaxOffset - /// + imm % OffsetScale == 0 - void getAddressConstraints(const MachineInstr &MI, int &AccessScale, - int &MinOffset, int &MaxOffset) const; - - - unsigned getInstSizeInBytes(const MachineInstr &MI) const; - - unsigned getInstBundleLength(const MachineInstr &MI) const; - -}; - -bool rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned FrameReg, int &Offset, - const AArch64InstrInfo &TII); - - -void emitRegUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - DebugLoc dl, const TargetInstrInfo &TII, - unsigned DstReg, unsigned SrcReg, unsigned ScratchReg, - int64_t NumBytes, - MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags); - -void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - DebugLoc dl, const TargetInstrInfo &TII, - unsigned ScratchReg, int64_t NumBytes, - MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags); - -} - -#endif diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td deleted file mode 100644 index 4d3c80152c30..000000000000 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ /dev/null @@ -1,5388 +0,0 @@ -//===----- AArch64InstrInfo.td - AArch64 Instruction Info ----*- tablegen -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes the AArch64 scalar instructions in TableGen format. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// ARM Instruction Predicate Definitions. -// -def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, - AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">; -def HasNEON : Predicate<"Subtarget->hasNEON()">, - AssemblerPredicate<"FeatureNEON", "neon">; -def HasCrypto : Predicate<"Subtarget->hasCrypto()">, - AssemblerPredicate<"FeatureCrypto","crypto">; - -// Use fused MAC if more precision in FP computation is allowed. -def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" - " FPOpFusion::Fast)">; -include "AArch64InstrFormats.td" - -//===----------------------------------------------------------------------===// -// AArch64 specific pattern fragments. -// -// An 'fmul' node with a single use. -def fmul_su : PatFrag<(ops node:$lhs, node:$rhs), (fmul node:$lhs, node:$rhs),[{ - return N->hasOneUse(); -}]>; - - -//===----------------------------------------------------------------------===// -// Target-specific ISD nodes and profiles -//===----------------------------------------------------------------------===// - -def SDT_A64ret : SDTypeProfile<0, 0, []>; -def A64ret : SDNode<"AArch64ISD::Ret", SDT_A64ret, [SDNPHasChain, - SDNPOptInGlue, - SDNPVariadic]>; - -// (ins NZCV, Condition, Dest) -def SDT_A64br_cc : SDTypeProfile<0, 3, [SDTCisVT<0, i32>]>; -def A64br_cc : SDNode<"AArch64ISD::BR_CC", SDT_A64br_cc, [SDNPHasChain]>; - -// (outs Result), (ins NZCV, IfTrue, IfFalse, Condition) -def SDT_A64select_cc : SDTypeProfile<1, 4, [SDTCisVT<1, i32>, - SDTCisSameAs<0, 2>, - SDTCisSameAs<2, 3>]>; -def A64select_cc : SDNode<"AArch64ISD::SELECT_CC", SDT_A64select_cc>; - -// (outs NZCV), (ins LHS, RHS, Condition) -def SDT_A64setcc : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, - SDTCisSameAs<1, 2>]>; -def A64setcc : SDNode<"AArch64ISD::SETCC", SDT_A64setcc>; - - -// (outs GPR64), (ins) -def A64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; - -// A64 compares don't care about the cond really (they set all flags) so a -// simple binary operator is useful. -def A64cmp : PatFrag<(ops node:$lhs, node:$rhs), - (A64setcc node:$lhs, node:$rhs, cond)>; - - -// When matching a notional (CMP op1, (sub 0, op2)), we'd like to use a CMN -// instruction on the grounds that "op1 - (-op2) == op1 + op2". However, the C -// and V flags can be set differently by this operation. It comes down to -// whether "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are -// then everything is fine. If not then the optimization is wrong. Thus general -// comparisons are only valid if op2 != 0. - -// So, finally, the only LLVM-native comparisons that don't mention C and V are -// SETEQ and SETNE. They're the only ones we can safely use CMN for in the -// absence of information about op2. -def equality_cond : PatLeaf<(cond), [{ - return N->get() == ISD::SETEQ || N->get() == ISD::SETNE; -}]>; - -def A64cmn : PatFrag<(ops node:$lhs, node:$rhs), - (A64setcc node:$lhs, (sub 0, node:$rhs), equality_cond)>; - -// There are two layers of indirection here, driven by the following -// considerations. -// + TableGen does not know CodeModel or Reloc so that decision should be -// made for a variable/address at ISelLowering. -// + The output of ISelLowering should be selectable (hence the Wrapper, -// rather than a bare target opcode) -def SDTAArch64WrapperLarge : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, - SDTCisSameAs<0, 3>, - SDTCisSameAs<0, 4>, - SDTCisPtrTy<0>]>; - -def A64WrapperLarge :SDNode<"AArch64ISD::WrapperLarge", SDTAArch64WrapperLarge>; - -def SDTAArch64WrapperSmall : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, - SDTCisSameAs<1, 2>, - SDTCisVT<3, i32>, - SDTCisPtrTy<0>]>; - -def A64WrapperSmall :SDNode<"AArch64ISD::WrapperSmall", SDTAArch64WrapperSmall>; - - -def SDTAArch64GOTLoad : SDTypeProfile<1, 1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; -def A64GOTLoad : SDNode<"AArch64ISD::GOTLoad", SDTAArch64GOTLoad, - [SDNPHasChain]>; - - -// (A64BFI LHS, RHS, LSB, Width) -def SDTA64BFI : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, - SDTCisSameAs<1, 2>, - SDTCisVT<3, i64>, - SDTCisVT<4, i64>]>; - -def A64Bfi : SDNode<"AArch64ISD::BFI", SDTA64BFI>; - -// (A64EXTR HiReg, LoReg, LSB) -def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, - SDTCisVT<3, i64>]>; -def A64Extr : SDNode<"AArch64ISD::EXTR", SDTA64EXTR>; - -// (A64[SU]BFX Field, ImmR, ImmS). -// -// Note that ImmR and ImmS are already encoded for the actual instructions. The -// more natural LSB and Width mix together to form ImmR and ImmS, something -// which TableGen can't handle. -def SDTA64BFX : SDTypeProfile<1, 3, [SDTCisVT<2, i64>, SDTCisVT<3, i64>]>; -def A64Sbfx : SDNode<"AArch64ISD::SBFX", SDTA64BFX>; - -def A64Ubfx : SDNode<"AArch64ISD::UBFX", SDTA64BFX>; - -class BinOpFrag : PatFrag<(ops node:$LHS, node:$RHS), res>; - -//===----------------------------------------------------------------------===// -// Call sequence pseudo-instructions -//===----------------------------------------------------------------------===// - - -def SDT_AArch64Call : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; -def AArch64Call : SDNode<"AArch64ISD::Call", SDT_AArch64Call, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; - -def AArch64tcret : SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64Call, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; - -// The TLSDESCCALL node is a variant call which goes to an indirectly calculated -// destination but needs a relocation against a fixed symbol. As such it has two -// certain operands: the callee and the relocated variable. -// -// The TLS ABI only allows it to be selected to a BLR instructin (with -// appropriate relocation). -def SDTTLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; - -def A64tlsdesc_blr : SDNode<"AArch64ISD::TLSDESCCALL", SDTTLSDescCall, - [SDNPInGlue, SDNPOutGlue, SDNPHasChain, - SDNPVariadic]>; - - -def SDT_AArch64CallSeqStart : SDCallSeqStart<[ SDTCisPtrTy<0> ]>; -def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AArch64CallSeqStart, - [SDNPHasChain, SDNPOutGlue]>; - -def SDT_AArch64CallSeqEnd : SDCallSeqEnd<[ SDTCisPtrTy<0>, SDTCisPtrTy<1> ]>; -def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_AArch64CallSeqEnd, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; - - - -// These pseudo-instructions have special semantics by virtue of being passed to -// the InstrInfo constructor. CALLSEQ_START/CALLSEQ_END are produced by -// LowerCall to (in our case) tell the back-end about stack adjustments for -// arguments passed on the stack. Here we select those markers to -// pseudo-instructions which explicitly set the stack, and finally in the -// RegisterInfo we convert them to a true stack adjustment. -let Defs = [XSP], Uses = [XSP] in { - def ADJCALLSTACKDOWN : PseudoInst<(outs), (ins i64imm:$amt), - [(AArch64callseq_start timm:$amt)]>; - - def ADJCALLSTACKUP : PseudoInst<(outs), (ins i64imm:$amt1, i64imm:$amt2), - [(AArch64callseq_end timm:$amt1, timm:$amt2)]>; -} - -//===----------------------------------------------------------------------===// -// Atomic operation pseudo-instructions -//===----------------------------------------------------------------------===// - -// These get selected from C++ code as a pretty much direct translation from the -// generic DAG nodes. The one exception is the AtomicOrdering is added as an -// operand so that the eventual lowering can make use of it and choose -// acquire/release operations when required. - -let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1 in { -multiclass AtomicSizes { - def _I8 : PseudoInst<(outs GPR32:$dst), - (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>; - def _I16 : PseudoInst<(outs GPR32:$dst), - (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>; - def _I32 : PseudoInst<(outs GPR32:$dst), - (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>; - def _I64 : PseudoInst<(outs GPR64:$dst), - (ins GPR64xsp:$ptr, GPR64:$incr, i32imm:$ordering), []>; -} -} - -defm ATOMIC_LOAD_ADD : AtomicSizes; -defm ATOMIC_LOAD_SUB : AtomicSizes; -defm ATOMIC_LOAD_AND : AtomicSizes; -defm ATOMIC_LOAD_OR : AtomicSizes; -defm ATOMIC_LOAD_XOR : AtomicSizes; -defm ATOMIC_LOAD_NAND : AtomicSizes; -defm ATOMIC_SWAP : AtomicSizes; -let Defs = [NZCV] in { - // These operations need a CMP to calculate the correct value - defm ATOMIC_LOAD_MIN : AtomicSizes; - defm ATOMIC_LOAD_MAX : AtomicSizes; - defm ATOMIC_LOAD_UMIN : AtomicSizes; - defm ATOMIC_LOAD_UMAX : AtomicSizes; -} - -class AtomicCmpSwap - : PseudoInst<(outs GPRData:$dst), - (ins GPR64xsp:$ptr, GPRData:$old, GPRData:$new, - i32imm:$ordering), []> { - let usesCustomInserter = 1; - let hasCtrlDep = 1; - let mayLoad = 1; - let mayStore = 1; - let Defs = [NZCV]; -} - -def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap; -def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap; -def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap; -def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap; - -//===----------------------------------------------------------------------===// -// Add-subtract (extended register) instructions -//===----------------------------------------------------------------------===// -// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP - -// The RHS of these operations is conceptually a sign/zero-extended -// register, optionally shifted left by 1-4. The extension can be a -// NOP (e.g. "sxtx" sign-extending a 64-bit register to 64-bits) but -// must be specified with one exception: - -// If one of the registers is sp/wsp then LSL is an alias for UXTW in -// 32-bit instructions and UXTX in 64-bit versions, the shift amount -// is not optional in that case (but can explicitly be 0), and the -// entire suffix can be skipped (e.g. "add sp, x3, x2"). - -multiclass extend_operands { - def _asmoperand : AsmOperandClass { - let Name = PREFIX; - let RenderMethod = "addRegExtendOperands"; - let PredicateMethod = "isRegExtend"; - let DiagnosticType = "AddSubRegExtend" # Diag; - } - - def _operand : Operand, - ImmLeaf= 0 && Imm <= 4; }]> { - let PrintMethod = "printRegExtendOperand"; - let DecoderMethod = "DecodeRegExtendOperand"; - let ParserMatchClass = !cast(PREFIX # "_asmoperand"); - } -} - -defm UXTB : extend_operands<"UXTB", "Small">; -defm UXTH : extend_operands<"UXTH", "Small">; -defm UXTW : extend_operands<"UXTW", "Small">; -defm UXTX : extend_operands<"UXTX", "Large">; -defm SXTB : extend_operands<"SXTB", "Small">; -defm SXTH : extend_operands<"SXTH", "Small">; -defm SXTW : extend_operands<"SXTW", "Small">; -defm SXTX : extend_operands<"SXTX", "Large">; - -def LSL_extasmoperand : AsmOperandClass { - let Name = "RegExtendLSL"; - let RenderMethod = "addRegExtendOperands"; - let DiagnosticType = "AddSubRegExtendLarge"; -} - -def LSL_extoperand : Operand { - let ParserMatchClass = LSL_extasmoperand; -} - - -// The patterns for various sign-extensions are a little ugly and -// non-uniform because everything has already been promoted to the -// legal i64 and i32 types. We'll wrap the various variants up in a -// class for use later. -class extend_types { - dag uxtb; dag uxth; dag uxtw; dag uxtx; - dag sxtb; dag sxth; dag sxtw; dag sxtx; - ValueType ty; - RegisterClass GPR; -} - -def extends_to_i64 : extend_types { - let uxtb = (and (anyext i32:$Rm), 255); - let uxth = (and (anyext i32:$Rm), 65535); - let uxtw = (zext i32:$Rm); - let uxtx = (i64 $Rm); - - let sxtb = (sext_inreg (anyext i32:$Rm), i8); - let sxth = (sext_inreg (anyext i32:$Rm), i16); - let sxtw = (sext i32:$Rm); - let sxtx = (i64 $Rm); - - let ty = i64; - let GPR = GPR64xsp; -} - - -def extends_to_i32 : extend_types { - let uxtb = (and i32:$Rm, 255); - let uxth = (and i32:$Rm, 65535); - let uxtw = (i32 i32:$Rm); - let uxtx = (i32 i32:$Rm); - - let sxtb = (sext_inreg i32:$Rm, i8); - let sxth = (sext_inreg i32:$Rm, i16); - let sxtw = (i32 i32:$Rm); - let sxtx = (i32 i32:$Rm); - - let ty = i32; - let GPR = GPR32wsp; -} - -// Now, six of the extensions supported are easy and uniform: if the source size -// is 32-bits or less, then Rm is always a 32-bit register. We'll instantiate -// those instructions in one block. - -// The uxtx/sxtx could potentially be merged in, but three facts dissuaded me: -// + It would break the naming scheme: either ADDxx_uxtx or ADDww_uxtx would -// be impossible. -// + Patterns are very different as well. -// + Passing different registers would be ugly (more fields in extend_types -// would probably be the best option). -multiclass addsub_exts { - def w_uxtb : A64I_addsubext, - Sched<[WriteALU, ReadALU, ReadALU]>; - def w_uxth : A64I_addsubext, - Sched<[WriteALU, ReadALU, ReadALU]>; - def w_uxtw : A64I_addsubext, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def w_sxtb : A64I_addsubext, - Sched<[WriteALU, ReadALU, ReadALU]>; - def w_sxth : A64I_addsubext, - Sched<[WriteALU, ReadALU, ReadALU]>; - def w_sxtw : A64I_addsubext, - Sched<[WriteALU, ReadALU, ReadALU]>; -} - -// These two could be merge in with the above, but their patterns aren't really -// necessary and the naming-scheme would necessarily break: -multiclass addsub_xxtx { - def x_uxtx : A64I_addsubext<0b1, op, S, 0b00, 0b011, - outs, - (ins GPR64xsp:$Rn, GPR64:$Rm, UXTX_operand:$Imm3), - !strconcat(asmop, "$Rn, $Rm, $Imm3"), - [(opfrag i64:$Rn, (shl i64:$Rm, UXTX_operand:$Imm3))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def x_sxtx : A64I_addsubext<0b1, op, S, 0b00, 0b111, - outs, - (ins GPR64xsp:$Rn, GPR64:$Rm, SXTX_operand:$Imm3), - !strconcat(asmop, "$Rn, $Rm, $Imm3"), - [/* No Pattern: same as uxtx */], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; -} - -multiclass addsub_wxtx { - def w_uxtx : A64I_addsubext<0b0, op, S, 0b00, 0b011, - outs, (ins GPR32wsp:$Rn, GPR32:$Rm, UXTX_operand:$Imm3), - !strconcat(asmop, "$Rn, $Rm, $Imm3"), - [/* No pattern: probably same as uxtw */], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def w_sxtx : A64I_addsubext<0b0, op, S, 0b00, 0b111, - outs, (ins GPR32wsp:$Rn, GPR32:$Rm, SXTX_operand:$Imm3), - !strconcat(asmop, "$Rn, $Rm, $Imm3"), - [/* No Pattern: probably same as uxtw */], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; -} - -class SetRD - : PatFrag<(ops node:$lhs, node:$rhs), (set RC:$Rd, (op node:$lhs, node:$rhs))>; -class SetNZCV - : PatFrag<(ops node:$lhs, node:$rhs), (set NZCV, (op node:$lhs, node:$rhs))>; - -defm ADDxx :addsub_exts<0b1, 0b0, 0b0, "add\t$Rd, ", SetRD, - (outs GPR64xsp:$Rd), extends_to_i64>, - addsub_xxtx< 0b0, 0b0, "add\t$Rd, ", SetRD, - (outs GPR64xsp:$Rd)>; -defm ADDww :addsub_exts<0b0, 0b0, 0b0, "add\t$Rd, ", SetRD, - (outs GPR32wsp:$Rd), extends_to_i32>, - addsub_wxtx< 0b0, 0b0, "add\t$Rd, ", - (outs GPR32wsp:$Rd)>; -defm SUBxx :addsub_exts<0b1, 0b1, 0b0, "sub\t$Rd, ", SetRD, - (outs GPR64xsp:$Rd), extends_to_i64>, - addsub_xxtx< 0b1, 0b0, "sub\t$Rd, ", SetRD, - (outs GPR64xsp:$Rd)>; -defm SUBww :addsub_exts<0b0, 0b1, 0b0, "sub\t$Rd, ", SetRD, - (outs GPR32wsp:$Rd), extends_to_i32>, - addsub_wxtx< 0b1, 0b0, "sub\t$Rd, ", - (outs GPR32wsp:$Rd)>; - -let Defs = [NZCV] in { -defm ADDSxx :addsub_exts<0b1, 0b0, 0b1, "adds\t$Rd, ", SetRD, - (outs GPR64:$Rd), extends_to_i64>, - addsub_xxtx< 0b0, 0b1, "adds\t$Rd, ", SetRD, - (outs GPR64:$Rd)>; -defm ADDSww :addsub_exts<0b0, 0b0, 0b1, "adds\t$Rd, ", SetRD, - (outs GPR32:$Rd), extends_to_i32>, - addsub_wxtx< 0b0, 0b1, "adds\t$Rd, ", - (outs GPR32:$Rd)>; -defm SUBSxx :addsub_exts<0b1, 0b1, 0b1, "subs\t$Rd, ", SetRD, - (outs GPR64:$Rd), extends_to_i64>, - addsub_xxtx< 0b1, 0b1, "subs\t$Rd, ", SetRD, - (outs GPR64:$Rd)>; -defm SUBSww :addsub_exts<0b0, 0b1, 0b1, "subs\t$Rd, ", SetRD, - (outs GPR32:$Rd), extends_to_i32>, - addsub_wxtx< 0b1, 0b1, "subs\t$Rd, ", - (outs GPR32:$Rd)>; - - -let SchedRW = [WriteCMP, ReadCMP, ReadCMP], Rd = 0b11111, isCompare = 1 in { -defm CMNx : addsub_exts<0b1, 0b0, 0b1, "cmn\t", SetNZCV, - (outs), extends_to_i64>, - addsub_xxtx< 0b0, 0b1, "cmn\t", SetNZCV, (outs)>; -defm CMNw : addsub_exts<0b0, 0b0, 0b1, "cmn\t", SetNZCV, - (outs), extends_to_i32>, - addsub_wxtx< 0b0, 0b1, "cmn\t", (outs)>; -defm CMPx : addsub_exts<0b1, 0b1, 0b1, "cmp\t", SetNZCV, - (outs), extends_to_i64>, - addsub_xxtx< 0b1, 0b1, "cmp\t", SetNZCV, (outs)>; -defm CMPw : addsub_exts<0b0, 0b1, 0b1, "cmp\t", SetNZCV, - (outs), extends_to_i32>, - addsub_wxtx< 0b1, 0b1, "cmp\t", (outs)>; -} -} - -// Now patterns for the operation without a shift being needed. No patterns are -// created for uxtx/sxtx since they're non-uniform and it's expected that -// add/sub (shifted register) will handle those cases anyway. -multiclass addsubext_noshift_patterns { - def : Pat<(nodeop exts.ty:$Rn, exts.uxtb), - (!cast(prefix # "w_uxtb") $Rn, $Rm, 0)>; - def : Pat<(nodeop exts.ty:$Rn, exts.uxth), - (!cast(prefix # "w_uxth") $Rn, $Rm, 0)>; - def : Pat<(nodeop exts.ty:$Rn, exts.uxtw), - (!cast(prefix # "w_uxtw") $Rn, $Rm, 0)>; - - def : Pat<(nodeop exts.ty:$Rn, exts.sxtb), - (!cast(prefix # "w_sxtb") $Rn, $Rm, 0)>; - def : Pat<(nodeop exts.ty:$Rn, exts.sxth), - (!cast(prefix # "w_sxth") $Rn, $Rm, 0)>; - def : Pat<(nodeop exts.ty:$Rn, exts.sxtw), - (!cast(prefix # "w_sxtw") $Rn, $Rm, 0)>; -} - -defm : addsubext_noshift_patterns<"ADDxx", add, extends_to_i64>; -defm : addsubext_noshift_patterns<"ADDww", add, extends_to_i32>; -defm : addsubext_noshift_patterns<"SUBxx", sub, extends_to_i64>; -defm : addsubext_noshift_patterns<"SUBww", sub, extends_to_i32>; - -defm : addsubext_noshift_patterns<"CMNx", A64cmn, extends_to_i64>; -defm : addsubext_noshift_patterns<"CMNw", A64cmn, extends_to_i32>; -defm : addsubext_noshift_patterns<"CMPx", A64cmp, extends_to_i64>; -defm : addsubext_noshift_patterns<"CMPw", A64cmp, extends_to_i32>; - -// An extend of "lsl #imm" is valid if and only if one of Rn and Rd is -// sp/wsp. It is synonymous with uxtx/uxtw depending on the size of the -// operation. Also permitted in this case is complete omission of the argument, -// which implies "lsl #0". -multiclass lsl_aliases { - def : InstAlias; - - def : InstAlias; - -} - -defm : lsl_aliases<"add", ADDxxx_uxtx, Rxsp, GPR64xsp, GPR64>; -defm : lsl_aliases<"add", ADDxxx_uxtx, GPR64xsp, Rxsp, GPR64>; -defm : lsl_aliases<"add", ADDwww_uxtw, Rwsp, GPR32wsp, GPR32>; -defm : lsl_aliases<"add", ADDwww_uxtw, GPR32wsp, Rwsp, GPR32>; -defm : lsl_aliases<"sub", SUBxxx_uxtx, Rxsp, GPR64xsp, GPR64>; -defm : lsl_aliases<"sub", SUBxxx_uxtx, GPR64xsp, Rxsp, GPR64>; -defm : lsl_aliases<"sub", SUBwww_uxtw, Rwsp, GPR32wsp, GPR32>; -defm : lsl_aliases<"sub", SUBwww_uxtw, GPR32wsp, Rwsp, GPR32>; - -// Rd cannot be sp for flag-setting variants so only half of the aliases are -// needed. -defm : lsl_aliases<"adds", ADDSxxx_uxtx, GPR64, Rxsp, GPR64>; -defm : lsl_aliases<"adds", ADDSwww_uxtw, GPR32, Rwsp, GPR32>; -defm : lsl_aliases<"subs", SUBSxxx_uxtx, GPR64, Rxsp, GPR64>; -defm : lsl_aliases<"subs", SUBSwww_uxtw, GPR32, Rwsp, GPR32>; - -// CMP unfortunately has to be different because the instruction doesn't have a -// dest register. -multiclass cmp_lsl_aliases { - def : InstAlias; - - def : InstAlias; -} - -defm : cmp_lsl_aliases<"cmp", CMPxx_uxtx, Rxsp, GPR64>; -defm : cmp_lsl_aliases<"cmp", CMPww_uxtw, Rwsp, GPR32>; -defm : cmp_lsl_aliases<"cmn", CMNxx_uxtx, Rxsp, GPR64>; -defm : cmp_lsl_aliases<"cmn", CMNww_uxtw, Rwsp, GPR32>; - -//===----------------------------------------------------------------------===// -// Add-subtract (immediate) instructions -//===----------------------------------------------------------------------===// -// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, MOV - -// These instructions accept a 12-bit unsigned immediate, optionally shifted -// left by 12 bits. Official assembly format specifies a 12 bit immediate with -// one of "", "LSL #0", "LSL #12" supplementary operands. - -// There are surprisingly few ways to make this work with TableGen, so this -// implementation has separate instructions for the "LSL #0" and "LSL #12" -// variants. - -// If the MCInst retained a single combined immediate (which could be 0x123000, -// for example) then both components (imm & shift) would have to be delegated to -// a single assembly operand. This would entail a separate operand parser -// (because the LSL would have to live in the same AArch64Operand as the -// immediate to be accessible); assembly parsing is rather complex and -// error-prone C++ code. -// -// By splitting the immediate, we can delegate handling this optional operand to -// an InstAlias. Supporting functions to generate the correct MCInst are still -// required, but these are essentially trivial and parsing can remain generic. -// -// Rejected plans with rationale: -// ------------------------------ -// -// In an ideal world you'de have two first class immediate operands (in -// InOperandList, specifying imm12 and shift). Unfortunately this is not -// selectable by any means I could discover. -// -// An Instruction with two MCOperands hidden behind a single entry in -// InOperandList (expanded by ComplexPatterns and MIOperandInfo) was functional, -// but required more C++ code to handle encoding/decoding. Parsing (the intended -// main beneficiary) ended up equally complex because of the optional nature of -// "LSL #0". -// -// Attempting to circumvent the need for a custom OperandParser above by giving -// InstAliases without the "lsl #0" failed. add/sub could be accommodated but -// the cmp/cmn aliases didn't use the MIOperandInfo to determine how operands -// should be parsed: there was no way to accommodate an "lsl #12". - -let ParserMethod = "ParseImmWithLSLOperand", - RenderMethod = "addImmWithLSLOperands" in { - // Derived PredicateMethod fields are different for each - def addsubimm_lsl0_asmoperand : AsmOperandClass { - let Name = "AddSubImmLSL0"; - // If an error is reported against this operand, instruction could also be a - // register variant. - let DiagnosticType = "AddSubSecondSource"; - } - - def addsubimm_lsl12_asmoperand : AsmOperandClass { - let Name = "AddSubImmLSL12"; - let DiagnosticType = "AddSubSecondSource"; - } -} - -def shr_12_XFORM : SDNodeXFormgetTargetConstant(N->getSExtValue() >> 12, MVT::i32); -}]>; - -def shr_12_neg_XFORM : SDNodeXFormgetTargetConstant((-N->getSExtValue()) >> 12, MVT::i32); -}]>; - -def neg_XFORM : SDNodeXFormgetTargetConstant(-N->getSExtValue(), MVT::i32); -}]>; - - -multiclass addsub_imm_operands { - let PrintMethod = "printAddSubImmLSL0Operand", - EncoderMethod = "getAddSubImmOpValue", - ParserMatchClass = addsubimm_lsl0_asmoperand in { - def _posimm_lsl0 : Operand, - ImmLeaf= 0 && (Imm & ~0xfff) == 0; }]>; - def _negimm_lsl0 : Operand, - ImmLeaf; - } - - let PrintMethod = "printAddSubImmLSL12Operand", - EncoderMethod = "getAddSubImmOpValue", - ParserMatchClass = addsubimm_lsl12_asmoperand in { - def _posimm_lsl12 : Operand, - ImmLeaf= 0 && (Imm & ~0xfff000) == 0; }], - shr_12_XFORM>; - - def _negimm_lsl12 : Operand, - ImmLeaf; - } -} - -// The add operands don't need any transformation -defm addsubimm_operand_i32 : addsub_imm_operands; -defm addsubimm_operand_i64 : addsub_imm_operands; - -multiclass addsubimm_varieties shift, - string asmop, string cmpasmop, - Operand imm_operand, Operand cmp_imm_operand, - RegisterClass GPR, RegisterClass GPRsp, - AArch64Reg ZR, ValueType Ty> { - // All registers for non-S variants allow SP - def _s : A64I_addsubimm, - Sched<[WriteALU, ReadALU]>; - - - // S variants can read SP but would write to ZR - def _S : A64I_addsubimm, - Sched<[WriteALU, ReadALU]> { - let Defs = [NZCV]; - } - - // Note that the pattern here for ADDS is subtle. Canonically CMP - // a, b becomes SUBS a, b. If b < 0 then this is equivalent to - // ADDS a, (-b). This is not true in general. - def _cmp : A64I_addsubimm, - Sched<[WriteCMP, ReadCMP]> { - let Rd = 0b11111; - let Defs = [NZCV]; - let isCompare = 1; - } -} - - -multiclass addsubimm_shifts { - defm _lsl0 : addsubimm_varieties(operand # "_lsl0"), - !cast(cmpoperand # "_lsl0"), - GPR, GPRsp, ZR, Ty>; - - defm _lsl12 : addsubimm_varieties(operand # "_lsl12"), - !cast(cmpoperand # "_lsl12"), - GPR, GPRsp, ZR, Ty>; -} - -defm ADDwwi : addsubimm_shifts<"ADDwi", 0b0, 0b0, "add", "cmn", - "addsubimm_operand_i32_posimm", - "addsubimm_operand_i32_negimm", - GPR32, GPR32wsp, WZR, i32>; -defm ADDxxi : addsubimm_shifts<"ADDxi", 0b1, 0b0, "add", "cmn", - "addsubimm_operand_i64_posimm", - "addsubimm_operand_i64_negimm", - GPR64, GPR64xsp, XZR, i64>; -defm SUBwwi : addsubimm_shifts<"SUBwi", 0b0, 0b1, "sub", "cmp", - "addsubimm_operand_i32_negimm", - "addsubimm_operand_i32_posimm", - GPR32, GPR32wsp, WZR, i32>; -defm SUBxxi : addsubimm_shifts<"SUBxi", 0b1, 0b1, "sub", "cmp", - "addsubimm_operand_i64_negimm", - "addsubimm_operand_i64_posimm", - GPR64, GPR64xsp, XZR, i64>; - -multiclass MOVsp { - def _fromsp : InstAlias<"mov $Rd, $Rn", - (addop GPRsp:$Rd, SP:$Rn, 0), - 0b1>; - - def _tosp : InstAlias<"mov $Rd, $Rn", - (addop SP:$Rd, GPRsp:$Rn, 0), - 0b1>; -} - -// Recall Rxsp is a RegisterClass containing *just* xsp. -defm MOVxx : MOVsp; -defm MOVww : MOVsp; - -//===----------------------------------------------------------------------===// -// Add-subtract (shifted register) instructions -//===----------------------------------------------------------------------===// -// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, NEG, NEGS - -//===------------------------------- -// 1. The "shifted register" operands. Shared with logical insts. -//===------------------------------- - -multiclass shift_operands { - def _asmoperand_i32 : AsmOperandClass { - let Name = "Shift" # form # "i32"; - let RenderMethod = "addShiftOperands"; - let PredicateMethod = "isShift"; - let DiagnosticType = "AddSubRegShift32"; - } - - // Note that the operand type is intentionally i64 because the DAGCombiner - // puts these into a canonical form. - def _i32 : Operand, ImmLeaf= 0 && Imm <= 31; }]> { - let ParserMatchClass - = !cast(prefix # "_asmoperand_i32"); - let PrintMethod = "printShiftOperand"; - let DecoderMethod = "Decode32BitShiftOperand"; - } - - def _asmoperand_i64 : AsmOperandClass { - let Name = "Shift" # form # "i64"; - let RenderMethod = "addShiftOperands"; - let PredicateMethod = "isShift"; - let DiagnosticType = "AddSubRegShift64"; - } - - def _i64 : Operand, ImmLeaf= 0 && Imm <= 63; }]> { - let ParserMatchClass - = !cast(prefix # "_asmoperand_i64"); - let PrintMethod = "printShiftOperand"; - } -} - -defm lsl_operand : shift_operands<"lsl_operand", "LSL">; -defm lsr_operand : shift_operands<"lsr_operand", "LSR">; -defm asr_operand : shift_operands<"asr_operand", "ASR">; - -// Not used for add/sub, but defined here for completeness. The "logical -// (shifted register)" instructions *do* have an ROR variant. -defm ror_operand : shift_operands<"ror_operand", "ROR">; - -//===------------------------------- -// 2. The basic 3.5-operand ADD/SUB/ADDS/SUBS instructions. -//===------------------------------- - -// N.b. the commutable parameter is just !N. It will be first against the wall -// when the revolution comes. -multiclass addsub_shifts defs> { - let isCommutable = commutable, Defs = defs in { - def _lsl : A64I_addsubshift("lsl_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), - [(set GPR:$Rd, (opfrag ty:$Rn, (shl ty:$Rm, - !cast("lsl_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteALU, ReadALU]>; - - def _lsr : A64I_addsubshift("lsr_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), - [(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm, - !cast("lsr_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteALU, ReadALU]>; - - def _asr : A64I_addsubshift("asr_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), - [(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm, - !cast("asr_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteALU, ReadALU]>; - } - - def _noshift - : InstAlias(prefix # "_lsl") GPR:$Rd, GPR:$Rn, - GPR:$Rm, 0)>; - - def : Pat<(opfrag ty:$Rn, ty:$Rm), - (!cast(prefix # "_lsl") $Rn, $Rm, 0)>; -} - -multiclass addsub_sizes defs> { - defm xxx : addsub_shifts; - defm www : addsub_shifts; -} - - -defm ADD : addsub_sizes<"ADD", 0b0, 0b0, 0b1, "add", add, []>; -defm SUB : addsub_sizes<"SUB", 0b1, 0b0, 0b0, "sub", sub, []>; - -defm ADDS : addsub_sizes<"ADDS", 0b0, 0b1, 0b1, "adds", addc, [NZCV]>; -defm SUBS : addsub_sizes<"SUBS", 0b1, 0b1, 0b0, "subs", subc, [NZCV]>; - -//===------------------------------- -// 1. The NEG/NEGS aliases -//===------------------------------- - -multiclass neg_alias { - def : InstAlias<"neg $Rd, $Rm, $Imm6", - (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>; - - def : Pat<(sub 0, (shiftop ty:$Rm, shift_operand:$Imm6)), - (INST ZR, $Rm, shift_operand:$Imm6)>; -} - -defm : neg_alias; -defm : neg_alias; -defm : neg_alias; -def : InstAlias<"neg $Rd, $Rm", (SUBwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>; -def : Pat<(sub 0, i32:$Rm), (SUBwww_lsl WZR, $Rm, 0)>; - -defm : neg_alias; -defm : neg_alias; -defm : neg_alias; -def : InstAlias<"neg $Rd, $Rm", (SUBxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>; -def : Pat<(sub 0, i64:$Rm), (SUBxxx_lsl XZR, $Rm, 0)>; - -// NEGS doesn't get any patterns yet: defining multiple outputs means C++ has to -// be involved. -class negs_alias - : InstAlias<"negs $Rd, $Rm, $Imm6", - (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>; - -def : negs_alias; -def : negs_alias; -def : negs_alias; -def : InstAlias<"negs $Rd, $Rm", (SUBSwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>; - -def : negs_alias; -def : negs_alias; -def : negs_alias; -def : InstAlias<"negs $Rd, $Rm", (SUBSxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>; - -//===------------------------------- -// 1. The CMP/CMN aliases -//===------------------------------- - -multiclass cmp_shifts { - let isCommutable = commutable, Rd = 0b11111, Defs = [NZCV] in { - def _lsl : A64I_addsubshift("lsl_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rn, $Rm, $Imm6"), - [(set NZCV, (opfrag ty:$Rn, (shl ty:$Rm, - !cast("lsl_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteCMP, ReadCMP, ReadCMP]>; - - def _lsr : A64I_addsubshift("lsr_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rn, $Rm, $Imm6"), - [(set NZCV, (opfrag ty:$Rn, (srl ty:$Rm, - !cast("lsr_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteCMP, ReadCMP, ReadCMP]>; - - def _asr : A64I_addsubshift("asr_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rn, $Rm, $Imm6"), - [(set NZCV, (opfrag ty:$Rn, (sra ty:$Rm, - !cast("asr_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteCMP, ReadCMP, ReadCMP]>; - } - - def _noshift - : InstAlias(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; - - def : Pat<(opfrag ty:$Rn, ty:$Rm), - (!cast(prefix # "_lsl") $Rn, $Rm, 0)>; -} - -defm CMPww : cmp_shifts<"CMPww", 0b0, 0b1, 0b0, "cmp", A64cmp, i32, GPR32>; -defm CMPxx : cmp_shifts<"CMPxx", 0b1, 0b1, 0b0, "cmp", A64cmp, i64, GPR64>; - -defm CMNww : cmp_shifts<"CMNww", 0b0, 0b0, 0b1, "cmn", A64cmn, i32, GPR32>; -defm CMNxx : cmp_shifts<"CMNxx", 0b1, 0b0, 0b1, "cmn", A64cmn, i64, GPR64>; - -//===----------------------------------------------------------------------===// -// Add-subtract (with carry) instructions -//===----------------------------------------------------------------------===// -// Contains: ADC, ADCS, SBC, SBCS + aliases NGC, NGCS - -multiclass A64I_addsubcarrySizes { - let Uses = [NZCV] in { - def www : A64I_addsubcarry<0b0, op, s, 0b000000, - (outs GPR32:$Rd), (ins GPR32:$Rn, GPR32:$Rm), - !strconcat(asmop, "\t$Rd, $Rn, $Rm"), - [], NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def xxx : A64I_addsubcarry<0b1, op, s, 0b000000, - (outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm), - !strconcat(asmop, "\t$Rd, $Rn, $Rm"), - [], NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - } -} - -let isCommutable = 1 in { - defm ADC : A64I_addsubcarrySizes<0b0, 0b0, "adc">; -} - -defm SBC : A64I_addsubcarrySizes<0b1, 0b0, "sbc">; - -let Defs = [NZCV] in { - let isCommutable = 1 in { - defm ADCS : A64I_addsubcarrySizes<0b0, 0b1, "adcs">; - } - - defm SBCS : A64I_addsubcarrySizes<0b1, 0b1, "sbcs">; -} - -def : InstAlias<"ngc $Rd, $Rm", (SBCwww GPR32:$Rd, WZR, GPR32:$Rm)>; -def : InstAlias<"ngc $Rd, $Rm", (SBCxxx GPR64:$Rd, XZR, GPR64:$Rm)>; -def : InstAlias<"ngcs $Rd, $Rm", (SBCSwww GPR32:$Rd, WZR, GPR32:$Rm)>; -def : InstAlias<"ngcs $Rd, $Rm", (SBCSxxx GPR64:$Rd, XZR, GPR64:$Rm)>; - -// Note that adde and sube can form a chain longer than two (e.g. for 256-bit -// addition). So the flag-setting instructions are appropriate. -def : Pat<(adde i32:$Rn, i32:$Rm), (ADCSwww $Rn, $Rm)>; -def : Pat<(adde i64:$Rn, i64:$Rm), (ADCSxxx $Rn, $Rm)>; -def : Pat<(sube i32:$Rn, i32:$Rm), (SBCSwww $Rn, $Rm)>; -def : Pat<(sube i64:$Rn, i64:$Rm), (SBCSxxx $Rn, $Rm)>; - -//===----------------------------------------------------------------------===// -// Bitfield -//===----------------------------------------------------------------------===// -// Contains: SBFM, BFM, UBFM, [SU]XT[BHW], ASR, LSR, LSL, SBFI[ZX], BFI, BFXIL, -// UBFIZ, UBFX - -// Because of the rather complicated nearly-overlapping aliases, the decoding of -// this range of instructions is handled manually. The architectural -// instructions are BFM, SBFM and UBFM but a disassembler should never produce -// these. -// -// In the end, the best option was to use BFM instructions for decoding under -// almost all circumstances, but to create aliasing *Instructions* for each of -// the canonical forms and specify a completely custom decoder which would -// substitute the correct MCInst as needed. -// -// This also simplifies instruction selection, parsing etc because the MCInsts -// have a shape that's closer to their use in code. - -//===------------------------------- -// 1. The architectural BFM instructions -//===------------------------------- - -def uimm5_asmoperand : AsmOperandClass { - let Name = "UImm5"; - let PredicateMethod = "isUImm<5>"; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "UImm5"; -} - -def uimm6_asmoperand : AsmOperandClass { - let Name = "UImm6"; - let PredicateMethod = "isUImm<6>"; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "UImm6"; -} - -def bitfield32_imm : Operand, - ImmLeaf= 0 && Imm < 32; }]> { - let ParserMatchClass = uimm5_asmoperand; - - let DecoderMethod = "DecodeBitfield32ImmOperand"; -} - - -def bitfield64_imm : Operand, - ImmLeaf= 0 && Imm < 64; }]> { - let ParserMatchClass = uimm6_asmoperand; - - // Default decoder works in 64-bit case: the 6-bit field can take any value. -} - -multiclass A64I_bitfieldSizes opc, string asmop> { - def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), - (ins GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS), - !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [], NoItinerary>, - Sched<[WriteALU, ReadALU]> { - let DecoderMethod = "DecodeBitfieldInstruction"; - } - - def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd), - (ins GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS), - !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [], NoItinerary>, - Sched<[WriteALU, ReadALU]> { - let DecoderMethod = "DecodeBitfieldInstruction"; - } -} - -defm SBFM : A64I_bitfieldSizes<0b00, "sbfm">; -defm UBFM : A64I_bitfieldSizes<0b10, "ubfm">; - -// BFM instructions modify the destination register rather than defining it -// completely. -def BFMwwii : - A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), - (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS), - "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]> { - let DecoderMethod = "DecodeBitfieldInstruction"; - let Constraints = "$src = $Rd"; -} - -def BFMxxii : - A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), - (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS), - "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]> { - let DecoderMethod = "DecodeBitfieldInstruction"; - let Constraints = "$src = $Rd"; -} - - -//===------------------------------- -// 2. Extend aliases to 64-bit dest -//===------------------------------- - -// Unfortunately the extensions that end up as 64-bits cannot be handled by an -// instruction alias: their syntax is (for example) "SXTB x0, w0", which needs -// to be mapped to "SBFM x0, x0, #0, 7" (changing the class of Rn). InstAlias is -// not capable of such a map as far as I'm aware - -// Note that these instructions are strictly more specific than the -// BFM ones (in ImmR) so they can handle their own decoding. -class A64I_bf_ext opc, RegisterClass GPRDest, ValueType dty, - string asmop, bits<6> imms, dag pattern> - : A64I_bitfield, - Sched<[WriteALU, ReadALU]> { - let ImmR = 0b000000; - let ImmS = imms; -} - -// Signed extensions -def SXTBxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxtb", 7, - (sext_inreg (anyext i32:$Rn), i8)>; -def SXTBww : A64I_bf_ext<0b0, 0b00, GPR32, i32, "sxtb", 7, - (sext_inreg i32:$Rn, i8)>; -def SXTHxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxth", 15, - (sext_inreg (anyext i32:$Rn), i16)>; -def SXTHww : A64I_bf_ext<0b0, 0b00, GPR32, i32, "sxth", 15, - (sext_inreg i32:$Rn, i16)>; -def SXTWxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxtw", 31, (sext i32:$Rn)>; - -// Unsigned extensions -def UXTBww : A64I_bf_ext<0b0, 0b10, GPR32, i32, "uxtb", 7, - (and i32:$Rn, 255)>; -def UXTHww : A64I_bf_ext<0b0, 0b10, GPR32, i32, "uxth", 15, - (and i32:$Rn, 65535)>; - -// The 64-bit unsigned variants are not strictly architectural but recommended -// for consistency. -let isAsmParserOnly = 1 in { - def UXTBxw : A64I_bf_ext<0b0, 0b10, GPR64, i64, "uxtb", 7, - (and (anyext i32:$Rn), 255)>; - def UXTHxw : A64I_bf_ext<0b0, 0b10, GPR64, i64, "uxth", 15, - (and (anyext i32:$Rn), 65535)>; -} - -// Extra patterns for when the source register is actually 64-bits -// too. There's no architectural difference here, it's just LLVM -// shinanigans. There's no need for equivalent zero-extension patterns -// because they'll already be caught by logical (immediate) matching. -def : Pat<(sext_inreg i64:$Rn, i8), - (SXTBxw (EXTRACT_SUBREG $Rn, sub_32))>; -def : Pat<(sext_inreg i64:$Rn, i16), - (SXTHxw (EXTRACT_SUBREG $Rn, sub_32))>; -def : Pat<(sext_inreg i64:$Rn, i32), - (SXTWxw (EXTRACT_SUBREG $Rn, sub_32))>; - - -//===------------------------------- -// 3. Aliases for ASR and LSR (the simple shifts) -//===------------------------------- - -// These also handle their own decoding because ImmS being set makes -// them take precedence over BFM. -multiclass A64I_shift opc, string asmop, SDNode opnode> { - def wwi : A64I_bitfield<0b0, opc, 0b0, - (outs GPR32:$Rd), (ins GPR32:$Rn, bitfield32_imm:$ImmR), - !strconcat(asmop, "\t$Rd, $Rn, $ImmR"), - [(set i32:$Rd, (opnode i32:$Rn, bitfield32_imm:$ImmR))], - NoItinerary>, - Sched<[WriteALU, ReadALU]> { - let ImmS = 31; - } - - def xxi : A64I_bitfield<0b1, opc, 0b1, - (outs GPR64:$Rd), (ins GPR64:$Rn, bitfield64_imm:$ImmR), - !strconcat(asmop, "\t$Rd, $Rn, $ImmR"), - [(set i64:$Rd, (opnode i64:$Rn, bitfield64_imm:$ImmR))], - NoItinerary>, - Sched<[WriteALU, ReadALU]> { - let ImmS = 63; - } - -} - -defm ASR : A64I_shift<0b00, "asr", sra>; -defm LSR : A64I_shift<0b10, "lsr", srl>; - -//===------------------------------- -// 4. Aliases for LSL -//===------------------------------- - -// Unfortunately LSL and subsequent aliases are much more complicated. We need -// to be able to say certain output instruction fields depend in a complex -// manner on combinations of input assembly fields). -// -// MIOperandInfo *might* have been able to do it, but at the cost of -// significantly more C++ code. - -// N.b. contrary to usual practice these operands store the shift rather than -// the machine bits in an MCInst. The complexity overhead of consistency -// outweighed the benefits in this case (custom asmparser, printer and selection -// vs custom encoder). -def bitfield32_lsl_imm : Operand, - ImmLeaf= 0 && Imm <= 31; }]> { - let ParserMatchClass = uimm5_asmoperand; - let EncoderMethod = "getBitfield32LSLOpValue"; -} - -def bitfield64_lsl_imm : Operand, - ImmLeaf= 0 && Imm <= 63; }]> { - let ParserMatchClass = uimm6_asmoperand; - let EncoderMethod = "getBitfield64LSLOpValue"; -} - -class A64I_bitfield_lsl - : A64I_bitfield, - Sched<[WriteALU, ReadALU]> { - bits<12> FullImm; - let ImmR = FullImm{5-0}; - let ImmS = FullImm{11-6}; - - // No disassembler allowed because it would overlap with BFM which does the - // actual work. - let isAsmParserOnly = 1; -} - -def LSLwwi : A64I_bitfield_lsl<0b0, GPR32, i32, bitfield32_lsl_imm>; -def LSLxxi : A64I_bitfield_lsl<0b1, GPR64, i64, bitfield64_lsl_imm>; - -//===------------------------------- -// 5. Aliases for bitfield extract instructions -//===------------------------------- - -def bfx32_width_asmoperand : AsmOperandClass { - let Name = "BFX32Width"; - let PredicateMethod = "isBitfieldWidth<32>"; - let RenderMethod = "addBFXWidthOperands"; - let DiagnosticType = "Width32"; -} - -def bfx32_width : Operand, ImmLeaf { - let PrintMethod = "printBFXWidthOperand"; - let ParserMatchClass = bfx32_width_asmoperand; -} - -def bfx64_width_asmoperand : AsmOperandClass { - let Name = "BFX64Width"; - let PredicateMethod = "isBitfieldWidth<64>"; - let RenderMethod = "addBFXWidthOperands"; - let DiagnosticType = "Width64"; -} - -def bfx64_width : Operand { - let PrintMethod = "printBFXWidthOperand"; - let ParserMatchClass = bfx64_width_asmoperand; -} - - -multiclass A64I_bitfield_extract opc, string asmop, SDNode op> { - def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), - (ins GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS), - !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [(set i32:$Rd, (op i32:$Rn, imm:$ImmR, imm:$ImmS))], - NoItinerary>, - Sched<[WriteALU, ReadALU]> { - // As above, no disassembler allowed. - let isAsmParserOnly = 1; - } - - def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd), - (ins GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS), - !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [(set i64:$Rd, (op i64:$Rn, imm:$ImmR, imm:$ImmS))], - NoItinerary>, - Sched<[WriteALU, ReadALU]> { - // As above, no disassembler allowed. - let isAsmParserOnly = 1; - } -} - -defm SBFX : A64I_bitfield_extract<0b00, "sbfx", A64Sbfx>; -defm UBFX : A64I_bitfield_extract<0b10, "ubfx", A64Ubfx>; - -// Again, variants based on BFM modify Rd so need it as an input too. -def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), - (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS), - "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]> { - // As above, no disassembler allowed. - let isAsmParserOnly = 1; - let Constraints = "$src = $Rd"; -} - -def BFXILxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), - (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS), - "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]> { - // As above, no disassembler allowed. - let isAsmParserOnly = 1; - let Constraints = "$src = $Rd"; -} - -// SBFX instructions can do a 1-instruction sign-extension of boolean values. -def : Pat<(sext_inreg i64:$Rn, i1), (SBFXxxii $Rn, 0, 0)>; -def : Pat<(sext_inreg i32:$Rn, i1), (SBFXwwii $Rn, 0, 0)>; -def : Pat<(i64 (sext_inreg (anyext i32:$Rn), i1)), - (SBFXxxii (SUBREG_TO_REG (i64 0), $Rn, sub_32), 0, 0)>; - -// UBFX makes sense as an implementation of a 64-bit zero-extension too. Could -// use either 64-bit or 32-bit variant, but 32-bit might be more efficient. -def : Pat<(i64 (zext i32:$Rn)), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31), - sub_32)>; - -//===------------------------------- -// 6. Aliases for bitfield insert instructions -//===------------------------------- - -def bfi32_lsb_asmoperand : AsmOperandClass { - let Name = "BFI32LSB"; - let PredicateMethod = "isUImm<5>"; - let RenderMethod = "addBFILSBOperands<32>"; - let DiagnosticType = "UImm5"; -} - -def bfi32_lsb : Operand, - ImmLeaf= 0 && Imm <= 31; }]> { - let PrintMethod = "printBFILSBOperand<32>"; - let ParserMatchClass = bfi32_lsb_asmoperand; -} - -def bfi64_lsb_asmoperand : AsmOperandClass { - let Name = "BFI64LSB"; - let PredicateMethod = "isUImm<6>"; - let RenderMethod = "addBFILSBOperands<64>"; - let DiagnosticType = "UImm6"; -} - -def bfi64_lsb : Operand, - ImmLeaf= 0 && Imm <= 63; }]> { - let PrintMethod = "printBFILSBOperand<64>"; - let ParserMatchClass = bfi64_lsb_asmoperand; -} - -// Width verification is performed during conversion so width operand can be -// shared between 32/64-bit cases. Still needed for the print method though -// because ImmR encodes "width - 1". -def bfi32_width_asmoperand : AsmOperandClass { - let Name = "BFI32Width"; - let PredicateMethod = "isBitfieldWidth<32>"; - let RenderMethod = "addBFIWidthOperands"; - let DiagnosticType = "Width32"; -} - -def bfi32_width : Operand, - ImmLeaf= 1 && Imm <= 32; }]> { - let PrintMethod = "printBFIWidthOperand"; - let ParserMatchClass = bfi32_width_asmoperand; -} - -def bfi64_width_asmoperand : AsmOperandClass { - let Name = "BFI64Width"; - let PredicateMethod = "isBitfieldWidth<64>"; - let RenderMethod = "addBFIWidthOperands"; - let DiagnosticType = "Width64"; -} - -def bfi64_width : Operand, - ImmLeaf= 1 && Imm <= 64; }]> { - let PrintMethod = "printBFIWidthOperand"; - let ParserMatchClass = bfi64_width_asmoperand; -} - -multiclass A64I_bitfield_insert opc, string asmop> { - def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), - (ins GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS), - !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [], NoItinerary>, - Sched<[WriteALU, ReadALU]> { - // As above, no disassembler allowed. - let isAsmParserOnly = 1; - } - - def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd), - (ins GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS), - !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [], NoItinerary>, - Sched<[WriteALU, ReadALU]> { - // As above, no disassembler allowed. - let isAsmParserOnly = 1; - } -} - -defm SBFIZ : A64I_bitfield_insert<0b00, "sbfiz">; -defm UBFIZ : A64I_bitfield_insert<0b10, "ubfiz">; - - -def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), - (ins GPR32:$src, GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS), - "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]> { - // As above, no disassembler allowed. - let isAsmParserOnly = 1; - let Constraints = "$src = $Rd"; -} - -def BFIxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), - (ins GPR64:$src, GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS), - "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]> { - // As above, no disassembler allowed. - let isAsmParserOnly = 1; - let Constraints = "$src = $Rd"; -} - -//===----------------------------------------------------------------------===// -// Compare and branch (immediate) -//===----------------------------------------------------------------------===// -// Contains: CBZ, CBNZ - -class label_asmoperand : AsmOperandClass { - let Name = "Label" # width # "_" # scale; - let PredicateMethod = "isLabel<" # width # "," # scale # ">"; - let RenderMethod = "addLabelOperands<" # width # ", " # scale # ">"; - let DiagnosticType = "Label"; -} - -def label_wid19_scal4_asmoperand : label_asmoperand<19, 4>; - -// All conditional immediate branches are the same really: 19 signed bits scaled -// by the instruction-size (4). -def bcc_target : Operand { - // This label is a 19-bit offset from PC, scaled by the instruction-width: 4. - let ParserMatchClass = label_wid19_scal4_asmoperand; - let PrintMethod = "printLabelOperand<19, 4>"; - let EncoderMethod = "getLabelOpValue"; - let OperandType = "OPERAND_PCREL"; -} - -multiclass cmpbr_sizes { - let isBranch = 1, isTerminator = 1 in { - def x : A64I_cmpbr<0b1, op, - (outs), - (ins GPR64:$Rt, bcc_target:$Label), - !strconcat(asmop,"\t$Rt, $Label"), - [(A64br_cc (A64cmp i64:$Rt, 0), SETOP, bb:$Label)], - NoItinerary>, - Sched<[WriteBr, ReadBr]>; - - def w : A64I_cmpbr<0b0, op, - (outs), - (ins GPR32:$Rt, bcc_target:$Label), - !strconcat(asmop,"\t$Rt, $Label"), - [(A64br_cc (A64cmp i32:$Rt, 0), SETOP, bb:$Label)], - NoItinerary>, - Sched<[WriteBr, ReadBr]>; - } -} - -defm CBZ : cmpbr_sizes<0b0, "cbz", ImmLeaf >; -defm CBNZ : cmpbr_sizes<0b1, "cbnz", ImmLeaf >; - -//===----------------------------------------------------------------------===// -// Conditional branch (immediate) instructions -//===----------------------------------------------------------------------===// -// Contains: B.cc - -def cond_code_asmoperand : AsmOperandClass { - let Name = "CondCode"; - let DiagnosticType = "CondCode"; -} - -def cond_code : Operand, ImmLeaf= 0 && Imm <= 15; -}]> { - let PrintMethod = "printCondCodeOperand"; - let ParserMatchClass = cond_code_asmoperand; -} - -def Bcc : A64I_condbr<0b0, 0b0, (outs), - (ins cond_code:$Cond, bcc_target:$Label), - "b.$Cond $Label", [(A64br_cc NZCV, (i32 imm:$Cond), bb:$Label)], - NoItinerary>, - Sched<[WriteBr]> { - let Uses = [NZCV]; - let isBranch = 1; - let isTerminator = 1; -} - -//===----------------------------------------------------------------------===// -// Conditional compare (immediate) instructions -//===----------------------------------------------------------------------===// -// Contains: CCMN, CCMP - -def uimm4_asmoperand : AsmOperandClass { - let Name = "UImm4"; - let PredicateMethod = "isUImm<4>"; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "UImm4"; -} - -def uimm4 : Operand { - let ParserMatchClass = uimm4_asmoperand; -} - -def uimm5 : Operand { - let ParserMatchClass = uimm5_asmoperand; -} - -// The only difference between this operand and the one for instructions like -// B.cc is that it's parsed manually. The other get parsed implicitly as part of -// the mnemonic handling. -def cond_code_op_asmoperand : AsmOperandClass { - let Name = "CondCodeOp"; - let RenderMethod = "addCondCodeOperands"; - let PredicateMethod = "isCondCode"; - let ParserMethod = "ParseCondCodeOperand"; - let DiagnosticType = "CondCode"; -} - -def cond_code_op : Operand { - let PrintMethod = "printCondCodeOperand"; - let ParserMatchClass = cond_code_op_asmoperand; -} - -class A64I_condcmpimmImpl - : A64I_condcmpimm, - Sched<[WriteCMP, ReadCMP]> { - let Defs = [NZCV]; -} - -def CCMNwi : A64I_condcmpimmImpl<0b0, 0b0, GPR32, "ccmn">; -def CCMNxi : A64I_condcmpimmImpl<0b1, 0b0, GPR64, "ccmn">; -def CCMPwi : A64I_condcmpimmImpl<0b0, 0b1, GPR32, "ccmp">; -def CCMPxi : A64I_condcmpimmImpl<0b1, 0b1, GPR64, "ccmp">; - -//===----------------------------------------------------------------------===// -// Conditional compare (register) instructions -//===----------------------------------------------------------------------===// -// Contains: CCMN, CCMP - -class A64I_condcmpregImpl - : A64I_condcmpreg, - Sched<[WriteCMP, ReadCMP, ReadCMP]> { - let Defs = [NZCV]; -} - -def CCMNww : A64I_condcmpregImpl<0b0, 0b0, GPR32, "ccmn">; -def CCMNxx : A64I_condcmpregImpl<0b1, 0b0, GPR64, "ccmn">; -def CCMPww : A64I_condcmpregImpl<0b0, 0b1, GPR32, "ccmp">; -def CCMPxx : A64I_condcmpregImpl<0b1, 0b1, GPR64, "ccmp">; - -//===----------------------------------------------------------------------===// -// Conditional select instructions -//===----------------------------------------------------------------------===// -// Contains: CSEL, CSINC, CSINV, CSNEG + aliases CSET, CSETM, CINC, CINV, CNEG - -// Condition code which is encoded as the inversion (semantically rather than -// bitwise) in the instruction. -def inv_cond_code_op_asmoperand : AsmOperandClass { - let Name = "InvCondCodeOp"; - let RenderMethod = "addInvCondCodeOperands"; - let PredicateMethod = "isCondCode"; - let ParserMethod = "ParseCondCodeOperand"; - let DiagnosticType = "CondCode"; -} - -def inv_cond_code_op : Operand { - let ParserMatchClass = inv_cond_code_op_asmoperand; - let PrintMethod = "printInverseCondCodeOperand"; -} - -// Having a separate operand for the selectable use-case is debatable, but gives -// consistency with cond_code. -def inv_cond_XFORM : SDNodeXForm(N->getZExtValue()); - return CurDAG->getTargetConstant(A64InvertCondCode(CC), MVT::i32); -}]>; - -def inv_cond_code - : ImmLeaf= 0 && Imm <= 15; }], inv_cond_XFORM>; - - -multiclass A64I_condselSizes op2, string asmop, - SDPatternOperator select> { - let Uses = [NZCV] in { - def wwwc : A64I_condsel<0b0, op, 0b0, op2, - (outs GPR32:$Rd), - (ins GPR32:$Rn, GPR32:$Rm, cond_code_op:$Cond), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"), - [(set i32:$Rd, (select i32:$Rn, i32:$Rm))], - NoItinerary>, - Sched<[WriteCMP, ReadCMP, ReadCMP]>; - - - def xxxc : A64I_condsel<0b1, op, 0b0, op2, - (outs GPR64:$Rd), - (ins GPR64:$Rn, GPR64:$Rm, cond_code_op:$Cond), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"), - [(set i64:$Rd, (select i64:$Rn, i64:$Rm))], - NoItinerary>, - Sched<[WriteCMP, ReadCMP, ReadCMP]>; - } -} - -def simple_select - : PatFrag<(ops node:$lhs, node:$rhs), - (A64select_cc NZCV, node:$lhs, node:$rhs, (i32 imm:$Cond))>; - -class complex_select - : PatFrag<(ops node:$lhs, node:$rhs), - (A64select_cc NZCV, node:$lhs, (opnode node:$rhs), (i32 imm:$Cond))>; - - -defm CSEL : A64I_condselSizes<0b0, 0b00, "csel", simple_select>; -defm CSINC : A64I_condselSizes<0b0, 0b01, "csinc", - complex_select>>; -defm CSINV : A64I_condselSizes<0b1, 0b00, "csinv", complex_select>; -defm CSNEG : A64I_condselSizes<0b1, 0b01, "csneg", complex_select>; - -// Now the instruction aliases, which fit nicely into LLVM's model: - -def : InstAlias<"cset $Rd, $Cond", - (CSINCwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>; -def : InstAlias<"cset $Rd, $Cond", - (CSINCxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>; -def : InstAlias<"csetm $Rd, $Cond", - (CSINVwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>; -def : InstAlias<"csetm $Rd, $Cond", - (CSINVxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>; -def : InstAlias<"cinc $Rd, $Rn, $Cond", - (CSINCwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>; -def : InstAlias<"cinc $Rd, $Rn, $Cond", - (CSINCxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>; -def : InstAlias<"cinv $Rd, $Rn, $Cond", - (CSINVwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>; -def : InstAlias<"cinv $Rd, $Rn, $Cond", - (CSINVxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>; -def : InstAlias<"cneg $Rd, $Rn, $Cond", - (CSNEGwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>; -def : InstAlias<"cneg $Rd, $Rn, $Cond", - (CSNEGxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>; - -// Finally some helper patterns. - -// For CSET (a.k.a. zero-extension of icmp) -def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond), - (CSINCwwwc WZR, WZR, cond_code:$Cond)>; -def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond), - (CSINCwwwc WZR, WZR, inv_cond_code:$Cond)>; - -def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond), - (CSINCxxxc XZR, XZR, cond_code:$Cond)>; -def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond), - (CSINCxxxc XZR, XZR, inv_cond_code:$Cond)>; - -// For CSETM (a.k.a. sign-extension of icmp) -def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond), - (CSINVwwwc WZR, WZR, cond_code:$Cond)>; -def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond), - (CSINVwwwc WZR, WZR, inv_cond_code:$Cond)>; - -def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond), - (CSINVxxxc XZR, XZR, cond_code:$Cond)>; -def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond), - (CSINVxxxc XZR, XZR, inv_cond_code:$Cond)>; - -// CINC, CINV and CNEG get dealt with automatically, which leaves the issue of -// commutativity. The instructions are to complex for isCommutable to be used, -// so we have to create the patterns manually: - -// No commutable pattern for CSEL since the commuted version is isomorphic. - -// CSINC -def :Pat<(A64select_cc NZCV, (add i32:$Rm, 1), i32:$Rn, inv_cond_code:$Cond), - (CSINCwwwc $Rn, $Rm, inv_cond_code:$Cond)>; -def :Pat<(A64select_cc NZCV, (add i64:$Rm, 1), i64:$Rn, inv_cond_code:$Cond), - (CSINCxxxc $Rn, $Rm, inv_cond_code:$Cond)>; - -// CSINV -def :Pat<(A64select_cc NZCV, (not i32:$Rm), i32:$Rn, inv_cond_code:$Cond), - (CSINVwwwc $Rn, $Rm, inv_cond_code:$Cond)>; -def :Pat<(A64select_cc NZCV, (not i64:$Rm), i64:$Rn, inv_cond_code:$Cond), - (CSINVxxxc $Rn, $Rm, inv_cond_code:$Cond)>; - -// CSNEG -def :Pat<(A64select_cc NZCV, (ineg i32:$Rm), i32:$Rn, inv_cond_code:$Cond), - (CSNEGwwwc $Rn, $Rm, inv_cond_code:$Cond)>; -def :Pat<(A64select_cc NZCV, (ineg i64:$Rm), i64:$Rn, inv_cond_code:$Cond), - (CSNEGxxxc $Rn, $Rm, inv_cond_code:$Cond)>; - -//===----------------------------------------------------------------------===// -// Data Processing (1 source) instructions -//===----------------------------------------------------------------------===// -// Contains: RBIT, REV16, REV, REV32, CLZ, CLS. - -// We define an unary operator which always fails. We will use this to -// define unary operators that cannot be matched. - -class A64I_dp_1src_impl opcode, string asmop, - list patterns, RegisterClass GPRrc, - InstrItinClass itin>: - A64I_dp_1src, - Sched<[WriteALU, ReadALU]>; - -multiclass A64I_dp_1src opcode, string asmop> { - let hasSideEffects = 0 in { - def ww : A64I_dp_1src_impl<0b0, opcode, asmop, [], GPR32, NoItinerary>; - def xx : A64I_dp_1src_impl<0b1, opcode, asmop, [], GPR64, NoItinerary>; - } -} - -defm RBIT : A64I_dp_1src<0b000000, "rbit">; -defm CLS : A64I_dp_1src<0b000101, "cls">; -defm CLZ : A64I_dp_1src<0b000100, "clz">; - -def : Pat<(ctlz i32:$Rn), (CLZww $Rn)>; -def : Pat<(ctlz i64:$Rn), (CLZxx $Rn)>; -def : Pat<(ctlz_zero_undef i32:$Rn), (CLZww $Rn)>; -def : Pat<(ctlz_zero_undef i64:$Rn), (CLZxx $Rn)>; - -def : Pat<(cttz i32:$Rn), (CLZww (RBITww $Rn))>; -def : Pat<(cttz i64:$Rn), (CLZxx (RBITxx $Rn))>; -def : Pat<(cttz_zero_undef i32:$Rn), (CLZww (RBITww $Rn))>; -def : Pat<(cttz_zero_undef i64:$Rn), (CLZxx (RBITxx $Rn))>; - - -def REVww : A64I_dp_1src_impl<0b0, 0b000010, "rev", - [(set i32:$Rd, (bswap i32:$Rn))], - GPR32, NoItinerary>; -def REVxx : A64I_dp_1src_impl<0b1, 0b000011, "rev", - [(set i64:$Rd, (bswap i64:$Rn))], - GPR64, NoItinerary>; -def REV32xx : A64I_dp_1src_impl<0b1, 0b000010, "rev32", - [(set i64:$Rd, (bswap (rotr i64:$Rn, (i64 32))))], - GPR64, NoItinerary>; -def REV16ww : A64I_dp_1src_impl<0b0, 0b000001, "rev16", - [(set i32:$Rd, (bswap (rotr i32:$Rn, (i64 16))))], - GPR32, - NoItinerary>; -def REV16xx : A64I_dp_1src_impl<0b1, 0b000001, "rev16", [], GPR64, NoItinerary>; - -//===----------------------------------------------------------------------===// -// Data Processing (2 sources) instructions -//===----------------------------------------------------------------------===// -// Contains: CRC32C?[BHWX], UDIV, SDIV, LSLV, LSRV, ASRV, RORV + aliases LSL, -// LSR, ASR, ROR - - -class dp_2src_impl opcode, string asmop, list patterns, - RegisterClass GPRsp, - InstrItinClass itin>: - A64I_dp_2src, - Sched<[WriteALU, ReadALU, ReadALU]>; - -multiclass dp_2src_crc { - def B_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 0}, - !strconcat(asmop, "b"), [], GPR32, NoItinerary>; - def H_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 1}, - !strconcat(asmop, "h"), [], GPR32, NoItinerary>; - def W_www : dp_2src_impl<0b0, {0, 1, 0, c, 1, 0}, - !strconcat(asmop, "w"), [], GPR32, NoItinerary>; - def X_wwx : A64I_dp_2src<0b1, {0, 1, 0, c, 1, 1}, 0b0, - !strconcat(asmop, "x\t$Rd, $Rn, $Rm"), - (outs GPR32:$Rd), (ins GPR32:$Rn, GPR64:$Rm), [], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; -} - -multiclass dp_2src_zext opcode, string asmop, SDPatternOperator op> { - def www : dp_2src_impl<0b0, - opcode, - asmop, - [(set i32:$Rd, - (op i32:$Rn, (i64 (zext i32:$Rm))))], - GPR32, - NoItinerary>; - def xxx : dp_2src_impl<0b1, - opcode, - asmop, - [(set i64:$Rd, (op i64:$Rn, i64:$Rm))], - GPR64, - NoItinerary>; -} - - -multiclass dp_2src opcode, string asmop, SDPatternOperator op> { - def www : dp_2src_impl<0b0, - opcode, - asmop, - [(set i32:$Rd, (op i32:$Rn, i32:$Rm))], - GPR32, - NoItinerary>; - def xxx : dp_2src_impl<0b1, - opcode, - asmop, - [(set i64:$Rd, (op i64:$Rn, i64:$Rm))], - GPR64, - NoItinerary>; -} - -// Here we define the data processing 2 source instructions. -defm CRC32 : dp_2src_crc<0b0, "crc32">; -defm CRC32C : dp_2src_crc<0b1, "crc32c">; - -let SchedRW = [WriteDiv, ReadDiv, ReadDiv] in { - defm UDIV : dp_2src<0b000010, "udiv", udiv>; - defm SDIV : dp_2src<0b000011, "sdiv", sdiv>; -} - -let SchedRW = [WriteALUs, ReadALU, ReadALU] in { - defm LSLV : dp_2src_zext<0b001000, "lsl", shl>; - defm LSRV : dp_2src_zext<0b001001, "lsr", srl>; - defm ASRV : dp_2src_zext<0b001010, "asr", sra>; - defm RORV : dp_2src_zext<0b001011, "ror", rotr>; -} - -// Extra patterns for an incoming 64-bit value for a 32-bit -// operation. Since the LLVM operations are undefined (as in C) if the -// RHS is out of range, it's perfectly permissible to discard the high -// bits of the GPR64. -def : Pat<(shl i32:$Rn, i64:$Rm), - (LSLVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>; -def : Pat<(srl i32:$Rn, i64:$Rm), - (LSRVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>; -def : Pat<(sra i32:$Rn, i64:$Rm), - (ASRVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>; -def : Pat<(rotr i32:$Rn, i64:$Rm), - (RORVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>; - -// Here we define the aliases for the data processing 2 source instructions. -def LSL_mnemonic : MnemonicAlias<"lslv", "lsl">; -def LSR_mnemonic : MnemonicAlias<"lsrv", "lsr">; -def ASR_menmonic : MnemonicAlias<"asrv", "asr">; -def ROR_menmonic : MnemonicAlias<"rorv", "ror">; - -//===----------------------------------------------------------------------===// -// Data Processing (3 sources) instructions -//===----------------------------------------------------------------------===// -// Contains: MADD, MSUB, SMADDL, SMSUBL, SMULH, UMADDL, UMSUBL, UMULH -// + aliases MUL, MNEG, SMULL, SMNEGL, UMULL, UMNEGL - -class A64I_dp3_4operand opcode, RegisterClass AccReg, - ValueType AccTy, RegisterClass SrcReg, - string asmop, dag pattern> - : A64I_dp3, - Sched<[WriteMAC, ReadMAC, ReadMAC, ReadMAC]> { - bits<5> Ra; - let Inst{14-10} = Ra; - - RegisterClass AccGPR = AccReg; - RegisterClass SrcGPR = SrcReg; -} - -def MADDwwww : A64I_dp3_4operand<0b0, 0b000000, GPR32, i32, GPR32, "madd", - (add i32:$Ra, (mul i32:$Rn, i32:$Rm))>; -def MADDxxxx : A64I_dp3_4operand<0b1, 0b000000, GPR64, i64, GPR64, "madd", - (add i64:$Ra, (mul i64:$Rn, i64:$Rm))>; - -def MSUBwwww : A64I_dp3_4operand<0b0, 0b000001, GPR32, i32, GPR32, "msub", - (sub i32:$Ra, (mul i32:$Rn, i32:$Rm))>; -def MSUBxxxx : A64I_dp3_4operand<0b1, 0b000001, GPR64, i64, GPR64, "msub", - (sub i64:$Ra, (mul i64:$Rn, i64:$Rm))>; - -def SMADDLxwwx : A64I_dp3_4operand<0b1, 0b000010, GPR64, i64, GPR32, "smaddl", - (add i64:$Ra, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>; -def SMSUBLxwwx : A64I_dp3_4operand<0b1, 0b000011, GPR64, i64, GPR32, "smsubl", - (sub i64:$Ra, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>; - -def UMADDLxwwx : A64I_dp3_4operand<0b1, 0b001010, GPR64, i64, GPR32, "umaddl", - (add i64:$Ra, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>; -def UMSUBLxwwx : A64I_dp3_4operand<0b1, 0b001011, GPR64, i64, GPR32, "umsubl", - (sub i64:$Ra, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>; - -let isCommutable = 1, PostEncoderMethod = "fixMulHigh" in { - def UMULHxxx : A64I_dp3<0b1, 0b001100, (outs GPR64:$Rd), - (ins GPR64:$Rn, GPR64:$Rm), - "umulh\t$Rd, $Rn, $Rm", - [(set i64:$Rd, (mulhu i64:$Rn, i64:$Rm))], - NoItinerary>, - Sched<[WriteMAC, ReadMAC, ReadMAC]>; - - def SMULHxxx : A64I_dp3<0b1, 0b000100, (outs GPR64:$Rd), - (ins GPR64:$Rn, GPR64:$Rm), - "smulh\t$Rd, $Rn, $Rm", - [(set i64:$Rd, (mulhs i64:$Rn, i64:$Rm))], - NoItinerary>, - Sched<[WriteMAC, ReadMAC, ReadMAC]>; -} - -multiclass A64I_dp3_3operand { - def : InstAlias; - - def : Pat; -} - -defm : A64I_dp3_3operand<"mul", MADDwwww, WZR, (mul i32:$Rn, i32:$Rm)>; -defm : A64I_dp3_3operand<"mul", MADDxxxx, XZR, (mul i64:$Rn, i64:$Rm)>; - -defm : A64I_dp3_3operand<"mneg", MSUBwwww, WZR, - (sub 0, (mul i32:$Rn, i32:$Rm))>; -defm : A64I_dp3_3operand<"mneg", MSUBxxxx, XZR, - (sub 0, (mul i64:$Rn, i64:$Rm))>; - -defm : A64I_dp3_3operand<"smull", SMADDLxwwx, XZR, - (mul (i64 (sext i32:$Rn)), (sext i32:$Rm))>; -defm : A64I_dp3_3operand<"smnegl", SMSUBLxwwx, XZR, - (sub 0, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>; - -defm : A64I_dp3_3operand<"umull", UMADDLxwwx, XZR, - (mul (i64 (zext i32:$Rn)), (zext i32:$Rm))>; -defm : A64I_dp3_3operand<"umnegl", UMSUBLxwwx, XZR, - (sub 0, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>; - - -//===----------------------------------------------------------------------===// -// Exception generation -//===----------------------------------------------------------------------===// -// Contains: SVC, HVC, SMC, BRK, HLT, DCPS1, DCPS2, DCPS3 - -def uimm16_asmoperand : AsmOperandClass { - let Name = "UImm16"; - let PredicateMethod = "isUImm<16>"; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "UImm16"; -} - -def uimm16 : Operand { - let ParserMatchClass = uimm16_asmoperand; -} - -class A64I_exceptImpl opc, bits<2> ll, string asmop> - : A64I_exception, - Sched<[WriteBr]> { - let isBranch = 1; - let isTerminator = 1; -} - -def SVCi : A64I_exceptImpl<0b000, 0b01, "svc">; -def HVCi : A64I_exceptImpl<0b000, 0b10, "hvc">; -def SMCi : A64I_exceptImpl<0b000, 0b11, "smc">; -def BRKi : A64I_exceptImpl<0b001, 0b00, "brk">; -def HLTi : A64I_exceptImpl<0b010, 0b00, "hlt">; - -def DCPS1i : A64I_exceptImpl<0b101, 0b01, "dcps1">; -def DCPS2i : A64I_exceptImpl<0b101, 0b10, "dcps2">; -def DCPS3i : A64I_exceptImpl<0b101, 0b11, "dcps3">; - -// The immediate is optional for the DCPS instructions, defaulting to 0. -def : InstAlias<"dcps1", (DCPS1i 0)>; -def : InstAlias<"dcps2", (DCPS2i 0)>; -def : InstAlias<"dcps3", (DCPS3i 0)>; - -//===----------------------------------------------------------------------===// -// Extract (immediate) -//===----------------------------------------------------------------------===// -// Contains: EXTR + alias ROR - -def EXTRwwwi : A64I_extract<0b0, 0b000, 0b0, - (outs GPR32:$Rd), - (ins GPR32:$Rn, GPR32:$Rm, bitfield32_imm:$LSB), - "extr\t$Rd, $Rn, $Rm, $LSB", - [(set i32:$Rd, - (A64Extr i32:$Rn, i32:$Rm, imm:$LSB))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; -def EXTRxxxi : A64I_extract<0b1, 0b000, 0b1, - (outs GPR64:$Rd), - (ins GPR64:$Rn, GPR64:$Rm, bitfield64_imm:$LSB), - "extr\t$Rd, $Rn, $Rm, $LSB", - [(set i64:$Rd, - (A64Extr i64:$Rn, i64:$Rm, imm:$LSB))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - -def : InstAlias<"ror $Rd, $Rs, $LSB", - (EXTRwwwi GPR32:$Rd, GPR32:$Rs, GPR32:$Rs, bitfield32_imm:$LSB)>; -def : InstAlias<"ror $Rd, $Rs, $LSB", - (EXTRxxxi GPR64:$Rd, GPR64:$Rs, GPR64:$Rs, bitfield64_imm:$LSB)>; - -def : Pat<(rotr i32:$Rn, bitfield32_imm:$LSB), - (EXTRwwwi $Rn, $Rn, bitfield32_imm:$LSB)>; -def : Pat<(rotr i64:$Rn, bitfield64_imm:$LSB), - (EXTRxxxi $Rn, $Rn, bitfield64_imm:$LSB)>; - -//===----------------------------------------------------------------------===// -// Floating-point compare instructions -//===----------------------------------------------------------------------===// -// Contains: FCMP, FCMPE - -def fpzero_asmoperand : AsmOperandClass { - let Name = "FPZero"; - let ParserMethod = "ParseFPImmOperand"; - let DiagnosticType = "FPZero"; -} - -def fpz32 : Operand, - ComplexPattern { - let ParserMatchClass = fpzero_asmoperand; - let PrintMethod = "printFPZeroOperand"; - let DecoderMethod = "DecodeFPZeroOperand"; -} - -def fpz64 : Operand, - ComplexPattern { - let ParserMatchClass = fpzero_asmoperand; - let PrintMethod = "printFPZeroOperand"; - let DecoderMethod = "DecodeFPZeroOperand"; -} - -def fpz64movi : Operand, - ComplexPattern { - let ParserMatchClass = fpzero_asmoperand; - let PrintMethod = "printFPZeroOperand"; - let DecoderMethod = "DecodeFPZeroOperand"; -} - -multiclass A64I_fpcmpSignal type, bit imm, dag ins, dag pattern> { - def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0}, - (outs), ins, "fcmp\t$Rn, $Rm", [pattern], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Defs = [NZCV]; - } - - def _sig : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b1, imm, 0b0, 0b0, 0b0}, - (outs), ins, "fcmpe\t$Rn, $Rm", [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Defs = [NZCV]; - } -} - -defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm), - (set NZCV, (A64cmp f32:$Rn, f32:$Rm))>; -defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm), - (set NZCV, (A64cmp f64:$Rn, f64:$Rm))>; - -// What would be Rm should be written as 0; note that even though it's called -// "$Rm" here to fit in with the InstrFormats, it's actually an immediate. -defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Rm), - (set NZCV, (A64cmp f32:$Rn, fpz32:$Rm))>; - -defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Rm), - (set NZCV, (A64cmp f64:$Rn, fpz64:$Rm))>; - - -//===----------------------------------------------------------------------===// -// Floating-point conditional compare instructions -//===----------------------------------------------------------------------===// -// Contains: FCCMP, FCCMPE - -class A64I_fpccmpImpl type, bit op, RegisterClass FPR, string asmop> - : A64I_fpccmp<0b0, 0b0, type, op, - (outs), - (ins FPR:$Rn, FPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond), - !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"), - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Defs = [NZCV]; -} - -def FCCMPss : A64I_fpccmpImpl<0b00, 0b0, FPR32, "fccmp">; -def FCCMPEss : A64I_fpccmpImpl<0b00, 0b1, FPR32, "fccmpe">; -def FCCMPdd : A64I_fpccmpImpl<0b01, 0b0, FPR64, "fccmp">; -def FCCMPEdd : A64I_fpccmpImpl<0b01, 0b1, FPR64, "fccmpe">; - -//===----------------------------------------------------------------------===// -// Floating-point conditional select instructions -//===----------------------------------------------------------------------===// -// Contains: FCSEL - -let Uses = [NZCV] in { - def FCSELsssc : A64I_fpcondsel<0b0, 0b0, 0b00, (outs FPR32:$Rd), - (ins FPR32:$Rn, FPR32:$Rm, cond_code_op:$Cond), - "fcsel\t$Rd, $Rn, $Rm, $Cond", - [(set f32:$Rd, - (simple_select f32:$Rn, f32:$Rm))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - - def FCSELdddc : A64I_fpcondsel<0b0, 0b0, 0b01, (outs FPR64:$Rd), - (ins FPR64:$Rn, FPR64:$Rm, cond_code_op:$Cond), - "fcsel\t$Rd, $Rn, $Rm, $Cond", - [(set f64:$Rd, - (simple_select f64:$Rn, f64:$Rm))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; -} - -//===----------------------------------------------------------------------===// -// Floating-point data-processing (1 source) -//===----------------------------------------------------------------------===// -// Contains: FMOV, FABS, FNEG, FSQRT, FCVT, FRINT[NPMZAXI]. - -def FPNoUnop : PatFrag<(ops node:$val), (fneg node:$val), - [{ (void)N; return false; }]>; - -// First we do the fairly trivial bunch with uniform "OP s, s" and "OP d, d" -// syntax. Default to no pattern because most are odd enough not to have one. -multiclass A64I_fpdp1sizes opcode, string asmstr, - SDPatternOperator opnode = FPNoUnop> { - def ss : A64I_fpdp1<0b0, 0b0, 0b00, opcode, (outs FPR32:$Rd), (ins FPR32:$Rn), - !strconcat(asmstr, "\t$Rd, $Rn"), - [(set f32:$Rd, (opnode f32:$Rn))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def dd : A64I_fpdp1<0b0, 0b0, 0b01, opcode, (outs FPR64:$Rd), (ins FPR64:$Rn), - !strconcat(asmstr, "\t$Rd, $Rn"), - [(set f64:$Rd, (opnode f64:$Rn))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm FMOV : A64I_fpdp1sizes<0b000000, "fmov">; -defm FABS : A64I_fpdp1sizes<0b000001, "fabs", fabs>; -defm FNEG : A64I_fpdp1sizes<0b000010, "fneg", fneg>; -let SchedRW = [WriteFPSqrt, ReadFPSqrt] in { - defm FSQRT : A64I_fpdp1sizes<0b000011, "fsqrt", fsqrt>; -} - -defm FRINTN : A64I_fpdp1sizes<0b001000, "frintn">; -defm FRINTP : A64I_fpdp1sizes<0b001001, "frintp", fceil>; -defm FRINTM : A64I_fpdp1sizes<0b001010, "frintm", ffloor>; -defm FRINTZ : A64I_fpdp1sizes<0b001011, "frintz", ftrunc>; -defm FRINTA : A64I_fpdp1sizes<0b001100, "frinta">; -defm FRINTX : A64I_fpdp1sizes<0b001110, "frintx", frint>; -defm FRINTI : A64I_fpdp1sizes<0b001111, "frinti", fnearbyint>; - -// The FCVT instrucitons have different source and destination register-types, -// but the fields are uniform everywhere a D-register (say) crops up. Package -// this information in a Record. -class FCVTRegType fld, ValueType vt> { - RegisterClass Class = rc; - ValueType VT = vt; - bit t1 = fld{1}; - bit t0 = fld{0}; -} - -def FCVT16 : FCVTRegType; -def FCVT32 : FCVTRegType; -def FCVT64 : FCVTRegType; - -class A64I_fpdp1_fcvt - : A64I_fpdp1<0b0, 0b0, {SrcReg.t1, SrcReg.t0}, - {0,0,0,1, DestReg.t1, DestReg.t0}, - (outs DestReg.Class:$Rd), (ins SrcReg.Class:$Rn), - "fcvt\t$Rd, $Rn", - [(set DestReg.VT:$Rd, (opnode SrcReg.VT:$Rn))], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - -def FCVTds : A64I_fpdp1_fcvt; -def FCVThs : A64I_fpdp1_fcvt; -def FCVTsd : A64I_fpdp1_fcvt; -def FCVThd : A64I_fpdp1_fcvt; -def FCVTsh : A64I_fpdp1_fcvt; -def FCVTdh : A64I_fpdp1_fcvt; - - -//===----------------------------------------------------------------------===// -// Floating-point data-processing (2 sources) instructions -//===----------------------------------------------------------------------===// -// Contains: FMUL, FDIV, FADD, FSUB, FMAX, FMIN, FMAXNM, FMINNM, FNMUL - -def FPNoBinop : PatFrag<(ops node:$lhs, node:$rhs), (fadd node:$lhs, node:$rhs), - [{ (void)N; return false; }]>; - -multiclass A64I_fpdp2sizes opcode, string asmstr, - SDPatternOperator opnode> { - def sss : A64I_fpdp2<0b0, 0b0, 0b00, opcode, - (outs FPR32:$Rd), - (ins FPR32:$Rn, FPR32:$Rm), - !strconcat(asmstr, "\t$Rd, $Rn, $Rm"), - [(set f32:$Rd, (opnode f32:$Rn, f32:$Rm))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def ddd : A64I_fpdp2<0b0, 0b0, 0b01, opcode, - (outs FPR64:$Rd), - (ins FPR64:$Rn, FPR64:$Rm), - !strconcat(asmstr, "\t$Rd, $Rn, $Rm"), - [(set f64:$Rd, (opnode f64:$Rn, f64:$Rm))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; -} - -let isCommutable = 1 in { - let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { - defm FMUL : A64I_fpdp2sizes<0b0000, "fmul", fmul>; - } - defm FADD : A64I_fpdp2sizes<0b0010, "fadd", fadd>; - - // No patterns for these. - defm FMAX : A64I_fpdp2sizes<0b0100, "fmax", FPNoBinop>; - defm FMIN : A64I_fpdp2sizes<0b0101, "fmin", FPNoBinop>; - defm FMAXNM : A64I_fpdp2sizes<0b0110, "fmaxnm", FPNoBinop>; - defm FMINNM : A64I_fpdp2sizes<0b0111, "fminnm", FPNoBinop>; - - let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { - defm FNMUL : A64I_fpdp2sizes<0b1000, "fnmul", - PatFrag<(ops node:$lhs, node:$rhs), - (fneg (fmul node:$lhs, node:$rhs))> >; - } -} - -let SchedRW = [WriteFPDiv, ReadFPDiv, ReadFPDiv] in { - defm FDIV : A64I_fpdp2sizes<0b0001, "fdiv", fdiv>; -} -defm FSUB : A64I_fpdp2sizes<0b0011, "fsub", fsub>; - -//===----------------------------------------------------------------------===// -// Floating-point data-processing (3 sources) instructions -//===----------------------------------------------------------------------===// -// Contains: FMADD, FMSUB, FNMADD, FNMSUB - -def fmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra), - (fma (fneg node:$Rn), node:$Rm, node:$Ra)>; -def fnmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra), - (fma node:$Rn, node:$Rm, (fneg node:$Ra))>; -def fnmadd : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra), - (fma (fneg node:$Rn), node:$Rm, (fneg node:$Ra))>; - -class A64I_fpdp3Impl type, bit o1, bit o0, SDPatternOperator fmakind> - : A64I_fpdp3<0b0, 0b0, type, o1, o0, (outs FPR:$Rd), - (ins FPR:$Rn, FPR:$Rm, FPR:$Ra), - !strconcat(asmop,"\t$Rd, $Rn, $Rm, $Ra"), - [(set VT:$Rd, (fmakind VT:$Rn, VT:$Rm, VT:$Ra))], - NoItinerary>, - Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]>; - -def FMADDssss : A64I_fpdp3Impl<"fmadd", FPR32, f32, 0b00, 0b0, 0b0, fma>; -def FMSUBssss : A64I_fpdp3Impl<"fmsub", FPR32, f32, 0b00, 0b0, 0b1, fmsub>; -def FNMADDssss : A64I_fpdp3Impl<"fnmadd", FPR32, f32, 0b00, 0b1, 0b0, fnmadd>; -def FNMSUBssss : A64I_fpdp3Impl<"fnmsub", FPR32, f32, 0b00, 0b1, 0b1, fnmsub>; - -def FMADDdddd : A64I_fpdp3Impl<"fmadd", FPR64, f64, 0b01, 0b0, 0b0, fma>; -def FMSUBdddd : A64I_fpdp3Impl<"fmsub", FPR64, f64, 0b01, 0b0, 0b1, fmsub>; -def FNMADDdddd : A64I_fpdp3Impl<"fnmadd", FPR64, f64, 0b01, 0b1, 0b0, fnmadd>; -def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>; - -// Extra patterns for when we're allowed to optimise separate multiplication and -// addition. -let Predicates = [HasFPARMv8, UseFusedMAC] in { -def : Pat<(f32 (fadd FPR32:$Ra, (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)))), - (FMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(f32 (fsub FPR32:$Ra, (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)))), - (FMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(f32 (fsub (f32 (fneg FPR32:$Ra)), (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)))), - (FNMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(f32 (fsub (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)), FPR32:$Ra)), - (FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; - -def : Pat<(f64 (fadd FPR64:$Ra, (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)))), - (FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(f64 (fsub FPR64:$Ra, (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)))), - (FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(f64 (fsub (f64 (fneg FPR64:$Ra)), (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)))), - (FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(f64 (fsub (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)), FPR64:$Ra)), - (FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -} - - -//===----------------------------------------------------------------------===// -// Floating-point <-> fixed-point conversion instructions -//===----------------------------------------------------------------------===// -// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF - -// #1-#32 allowed, encoded as "64 - -def fixedpos_asmoperand_i32 : AsmOperandClass { - let Name = "CVTFixedPos32"; - let RenderMethod = "addCVTFixedPosOperands"; - let PredicateMethod = "isCVTFixedPos<32>"; - let DiagnosticType = "CVTFixedPos32"; -} - -// Also encoded as "64 - " but #1-#64 allowed. -def fixedpos_asmoperand_i64 : AsmOperandClass { - let Name = "CVTFixedPos64"; - let RenderMethod = "addCVTFixedPosOperands"; - let PredicateMethod = "isCVTFixedPos<64>"; - let DiagnosticType = "CVTFixedPos64"; -} - -// We need the cartesian product of f32/f64 i32/i64 operands for -// conversions: -// + Selection needs to use operands of correct floating type -// + Assembly parsing and decoding depend on integer width -class cvtfix_i32_op - : Operand, - ComplexPattern", [fpimm]> { - let ParserMatchClass = fixedpos_asmoperand_i32; - let DecoderMethod = "DecodeCVT32FixedPosOperand"; - let PrintMethod = "printCVTFixedPosOperand"; -} - -class cvtfix_i64_op - : Operand, - ComplexPattern", [fpimm]> { - let ParserMatchClass = fixedpos_asmoperand_i64; - let PrintMethod = "printCVTFixedPosOperand"; -} - -// Because of the proliferation of weird operands, it's not really -// worth going for a multiclass here. Oh well. - -class A64I_fptofix type, bits<3> opcode, - RegisterClass GPR, RegisterClass FPR, - ValueType DstTy, ValueType SrcTy, - Operand scale_op, string asmop, SDNode cvtop> - : A64I_fpfixed, - Sched<[WriteFPALU, ReadFPALU]>; - -def FCVTZSwsi : A64I_fptofix<0b0, 0b00, 0b000, GPR32, FPR32, i32, f32, - cvtfix_i32_op, "fcvtzs", fp_to_sint>; -def FCVTZSxsi : A64I_fptofix<0b1, 0b00, 0b000, GPR64, FPR32, i64, f32, - cvtfix_i64_op, "fcvtzs", fp_to_sint>; -def FCVTZUwsi : A64I_fptofix<0b0, 0b00, 0b001, GPR32, FPR32, i32, f32, - cvtfix_i32_op, "fcvtzu", fp_to_uint>; -def FCVTZUxsi : A64I_fptofix<0b1, 0b00, 0b001, GPR64, FPR32, i64, f32, - cvtfix_i64_op, "fcvtzu", fp_to_uint>; - -def FCVTZSwdi : A64I_fptofix<0b0, 0b01, 0b000, GPR32, FPR64, i32, f64, - cvtfix_i32_op, "fcvtzs", fp_to_sint>; -def FCVTZSxdi : A64I_fptofix<0b1, 0b01, 0b000, GPR64, FPR64, i64, f64, - cvtfix_i64_op, "fcvtzs", fp_to_sint>; -def FCVTZUwdi : A64I_fptofix<0b0, 0b01, 0b001, GPR32, FPR64, i32, f64, - cvtfix_i32_op, "fcvtzu", fp_to_uint>; -def FCVTZUxdi : A64I_fptofix<0b1, 0b01, 0b001, GPR64, FPR64, i64, f64, - cvtfix_i64_op, "fcvtzu", fp_to_uint>; - - -class A64I_fixtofp type, bits<3> opcode, - RegisterClass FPR, RegisterClass GPR, - ValueType DstTy, ValueType SrcTy, - Operand scale_op, string asmop, SDNode cvtop> - : A64I_fpfixed, - Sched<[WriteFPALU, ReadFPALU]>; - -def SCVTFswi : A64I_fixtofp<0b0, 0b00, 0b010, FPR32, GPR32, f32, i32, - cvtfix_i32_op, "scvtf", sint_to_fp>; -def SCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b010, FPR32, GPR64, f32, i64, - cvtfix_i64_op, "scvtf", sint_to_fp>; -def UCVTFswi : A64I_fixtofp<0b0, 0b00, 0b011, FPR32, GPR32, f32, i32, - cvtfix_i32_op, "ucvtf", uint_to_fp>; -def UCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b011, FPR32, GPR64, f32, i64, - cvtfix_i64_op, "ucvtf", uint_to_fp>; -def SCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b010, FPR64, GPR32, f64, i32, - cvtfix_i32_op, "scvtf", sint_to_fp>; -def SCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b010, FPR64, GPR64, f64, i64, - cvtfix_i64_op, "scvtf", sint_to_fp>; -def UCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b011, FPR64, GPR32, f64, i32, - cvtfix_i32_op, "ucvtf", uint_to_fp>; -def UCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b011, FPR64, GPR64, f64, i64, - cvtfix_i64_op, "ucvtf", uint_to_fp>; - -//===----------------------------------------------------------------------===// -// Floating-point <-> integer conversion instructions -//===----------------------------------------------------------------------===// -// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF - -class A64I_fpintI type, bits<2> rmode, bits<3> opcode, - RegisterClass DestPR, RegisterClass SrcPR, string asmop> - : A64I_fpint, - Sched<[WriteFPALU, ReadFPALU]>; - -multiclass A64I_fptointRM rmode, bit o2, string asmop> { - def Sws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 0}, - GPR32, FPR32, asmop # "s">; - def Sxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 0}, - GPR64, FPR32, asmop # "s">; - def Uws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 1}, - GPR32, FPR32, asmop # "u">; - def Uxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 1}, - GPR64, FPR32, asmop # "u">; - - def Swd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 0}, - GPR32, FPR64, asmop # "s">; - def Sxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 0}, - GPR64, FPR64, asmop # "s">; - def Uwd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 1}, - GPR32, FPR64, asmop # "u">; - def Uxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 1}, - GPR64, FPR64, asmop # "u">; -} - -defm FCVTN : A64I_fptointRM<0b00, 0b0, "fcvtn">; -defm FCVTP : A64I_fptointRM<0b01, 0b0, "fcvtp">; -defm FCVTM : A64I_fptointRM<0b10, 0b0, "fcvtm">; -defm FCVTZ : A64I_fptointRM<0b11, 0b0, "fcvtz">; -defm FCVTA : A64I_fptointRM<0b00, 0b1, "fcvta">; - -let Predicates = [HasFPARMv8] in { -def : Pat<(i32 (fp_to_sint f32:$Rn)), (FCVTZSws $Rn)>; -def : Pat<(i64 (fp_to_sint f32:$Rn)), (FCVTZSxs $Rn)>; -def : Pat<(i32 (fp_to_uint f32:$Rn)), (FCVTZUws $Rn)>; -def : Pat<(i64 (fp_to_uint f32:$Rn)), (FCVTZUxs $Rn)>; -def : Pat<(i32 (fp_to_sint f64:$Rn)), (FCVTZSwd $Rn)>; -def : Pat<(i64 (fp_to_sint f64:$Rn)), (FCVTZSxd $Rn)>; -def : Pat<(i32 (fp_to_uint f64:$Rn)), (FCVTZUwd $Rn)>; -def : Pat<(i64 (fp_to_uint f64:$Rn)), (FCVTZUxd $Rn)>; -} - -multiclass A64I_inttofp { - def CVTFsw : A64I_fpintI<0b0, 0b00, 0b00, {0, 1, o0}, FPR32, GPR32, asmop>; - def CVTFsx : A64I_fpintI<0b1, 0b00, 0b00, {0, 1, o0}, FPR32, GPR64, asmop>; - def CVTFdw : A64I_fpintI<0b0, 0b01, 0b00, {0, 1, o0}, FPR64, GPR32, asmop>; - def CVTFdx : A64I_fpintI<0b1, 0b01, 0b00, {0, 1, o0}, FPR64, GPR64, asmop>; -} - -defm S : A64I_inttofp<0b0, "scvtf">; -defm U : A64I_inttofp<0b1, "ucvtf">; - -let Predicates = [HasFPARMv8] in { -def : Pat<(f32 (sint_to_fp i32:$Rn)), (SCVTFsw $Rn)>; -def : Pat<(f32 (sint_to_fp i64:$Rn)), (SCVTFsx $Rn)>; -def : Pat<(f64 (sint_to_fp i32:$Rn)), (SCVTFdw $Rn)>; -def : Pat<(f64 (sint_to_fp i64:$Rn)), (SCVTFdx $Rn)>; -def : Pat<(f32 (uint_to_fp i32:$Rn)), (UCVTFsw $Rn)>; -def : Pat<(f32 (uint_to_fp i64:$Rn)), (UCVTFsx $Rn)>; -def : Pat<(f64 (uint_to_fp i32:$Rn)), (UCVTFdw $Rn)>; -def : Pat<(f64 (uint_to_fp i64:$Rn)), (UCVTFdx $Rn)>; -} - -def FMOVws : A64I_fpintI<0b0, 0b00, 0b00, 0b110, GPR32, FPR32, "fmov">; -def FMOVsw : A64I_fpintI<0b0, 0b00, 0b00, 0b111, FPR32, GPR32, "fmov">; -def FMOVxd : A64I_fpintI<0b1, 0b01, 0b00, 0b110, GPR64, FPR64, "fmov">; -def FMOVdx : A64I_fpintI<0b1, 0b01, 0b00, 0b111, FPR64, GPR64, "fmov">; - -let Predicates = [HasFPARMv8] in { -def : Pat<(i32 (bitconvert f32:$Rn)), (FMOVws $Rn)>; -def : Pat<(f32 (bitconvert i32:$Rn)), (FMOVsw $Rn)>; -def : Pat<(i64 (bitconvert f64:$Rn)), (FMOVxd $Rn)>; -def : Pat<(f64 (bitconvert i64:$Rn)), (FMOVdx $Rn)>; -} - -def lane1_asmoperand : AsmOperandClass { - let Name = "Lane1"; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "Lane1"; -} - -def lane1 : Operand { - let ParserMatchClass = lane1_asmoperand; - let PrintMethod = "printBareImmOperand"; -} - -let DecoderMethod = "DecodeFMOVLaneInstruction" in { - def FMOVxv : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b110, - (outs GPR64:$Rd), (ins VPR128:$Rn, lane1:$Lane), - "fmov\t$Rd, $Rn.d[$Lane]", [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def FMOVvx : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b111, - (outs VPR128:$Rd), (ins GPR64:$Rn, lane1:$Lane), - "fmov\t$Rd.d[$Lane], $Rn", [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -let Predicates = [HasFPARMv8] in { -def : InstAlias<"fmov $Rd, $Rn.2d[$Lane]", - (FMOVxv GPR64:$Rd, VPR128:$Rn, lane1:$Lane), 0b0>; - -def : InstAlias<"fmov $Rd.2d[$Lane], $Rn", - (FMOVvx VPR128:$Rd, GPR64:$Rn, lane1:$Lane), 0b0>; -} - -//===----------------------------------------------------------------------===// -// Floating-point immediate instructions -//===----------------------------------------------------------------------===// -// Contains: FMOV - -def fpimm_asmoperand : AsmOperandClass { - let Name = "FMOVImm"; - let ParserMethod = "ParseFPImmOperand"; - let DiagnosticType = "FPImm"; -} - -// The MCOperand for these instructions are the encoded 8-bit values. -def SDXF_fpimm : SDNodeXFormgetValueAPF(), Imm8); - return CurDAG->getTargetConstant(Imm8, MVT::i32); -}]>; - -class fmov_operand - : Operand, - PatLeaf<(FT fpimm), [{ return A64Imms::isFPImm(N->getValueAPF()); }], - SDXF_fpimm> { - let PrintMethod = "printFPImmOperand"; - let ParserMatchClass = fpimm_asmoperand; -} - -def fmov32_operand : fmov_operand; -def fmov64_operand : fmov_operand; - -class A64I_fpimm_impl type, RegisterClass Reg, ValueType VT, - Operand fmov_operand> - : A64I_fpimm<0b0, 0b0, type, 0b00000, - (outs Reg:$Rd), - (ins fmov_operand:$Imm8), - "fmov\t$Rd, $Imm8", - [(set VT:$Rd, fmov_operand:$Imm8)], - NoItinerary>, - Sched<[WriteFPALU]>; - -def FMOVsi : A64I_fpimm_impl<0b00, FPR32, f32, fmov32_operand>; -def FMOVdi : A64I_fpimm_impl<0b01, FPR64, f64, fmov64_operand>; - -//===----------------------------------------------------------------------===// -// Load-register (literal) instructions -//===----------------------------------------------------------------------===// -// Contains: LDR, LDRSW, PRFM - -def ldrlit_label_asmoperand : AsmOperandClass { - let Name = "LoadLitLabel"; - let RenderMethod = "addLabelOperands<19, 4>"; - let DiagnosticType = "Label"; -} - -def ldrlit_label : Operand { - let EncoderMethod = "getLoadLitLabelOpValue"; - - // This label is a 19-bit offset from PC, scaled by the instruction-width: 4. - let PrintMethod = "printLabelOperand<19, 4>"; - let ParserMatchClass = ldrlit_label_asmoperand; - let OperandType = "OPERAND_PCREL"; -} - -// Various instructions take an immediate value (which can always be used), -// where some numbers have a symbolic name to make things easier. These operands -// and the associated functions abstract away the differences. -multiclass namedimm { - def _asmoperand : AsmOperandClass { - let Name = "NamedImm" # prefix; - let PredicateMethod = "isUImm"; - let RenderMethod = "addImmOperands"; - let ParserMethod = "ParseNamedImmOperand<" # mapper # ">"; - let DiagnosticType = "NamedImm_" # prefix; - } - - def _op : Operand { - let ParserMatchClass = !cast(prefix # "_asmoperand"); - let PrintMethod = "printNamedImmOperand<" # mapper # ">"; - let DecoderMethod = "DecodeNamedImmOperand<" # mapper # ">"; - } -} - -defm prefetch : namedimm<"prefetch", "A64PRFM::PRFMMapper">; - -class A64I_LDRlitSimple opc, bit v, RegisterClass OutReg, - list patterns = []> - : A64I_LDRlit, - Sched<[WriteLd]>; - -let mayLoad = 1 in { - def LDRw_lit : A64I_LDRlitSimple<0b00, 0b0, GPR32>; - def LDRx_lit : A64I_LDRlitSimple<0b01, 0b0, GPR64>; -} - -let Predicates = [HasFPARMv8] in { -def LDRs_lit : A64I_LDRlitSimple<0b00, 0b1, FPR32>; -def LDRd_lit : A64I_LDRlitSimple<0b01, 0b1, FPR64>; -} - -let mayLoad = 1 in { - let Predicates = [HasFPARMv8] in { - def LDRq_lit : A64I_LDRlitSimple<0b10, 0b1, FPR128>; - } - - def LDRSWx_lit : A64I_LDRlit<0b10, 0b0, - (outs GPR64:$Rt), - (ins ldrlit_label:$Imm19), - "ldrsw\t$Rt, $Imm19", - [], NoItinerary>, - Sched<[WriteLd]>; - - def PRFM_lit : A64I_LDRlit<0b11, 0b0, - (outs), (ins prefetch_op:$Rt, ldrlit_label:$Imm19), - "prfm\t$Rt, $Imm19", - [], NoItinerary>, - Sched<[WriteLd, ReadLd]>; -} - -//===----------------------------------------------------------------------===// -// Load-store exclusive instructions -//===----------------------------------------------------------------------===// -// Contains: STXRB, STXRH, STXR, LDXRB, LDXRH, LDXR. STXP, LDXP, STLXRB, -// STLXRH, STLXR, LDAXRB, LDAXRH, LDAXR, STLXP, LDAXP, STLRB, -// STLRH, STLR, LDARB, LDARH, LDAR - -// Since these instructions have the undefined register bits set to 1 in -// their canonical form, we need a post encoder method to set those bits -// to 1 when encoding these instructions. We do this using the -// fixLoadStoreExclusive function. This function has template parameters: -// -// fixLoadStoreExclusive -// -// hasRs indicates that the instruction uses the Rs field, so we won't set -// it to 1 (and the same for Rt2). We don't need template parameters for -// the other register fiels since Rt and Rn are always used. - -// This operand parses a GPR64xsp register, followed by an optional immediate -// #0. -def GPR64xsp0_asmoperand : AsmOperandClass { - let Name = "GPR64xsp0"; - let PredicateMethod = "isWrappedReg"; - let RenderMethod = "addRegOperands"; - let ParserMethod = "ParseLSXAddressOperand"; - // Diagnostics are provided by ParserMethod -} - -def GPR64xsp0 : RegisterOperand { - let ParserMatchClass = GPR64xsp0_asmoperand; -} - -//===---------------------------------- -// Store-exclusive (releasing & normal) -//===---------------------------------- - -class A64I_SRexs_impl size, bits<3> opcode, string asm, dag outs, - dag ins, list pat, - InstrItinClass itin> : - A64I_LDSTex_stn { - let mayStore = 1; - let PostEncoderMethod = "fixLoadStoreExclusive<1,0>"; - let Constraints = "@earlyclobber $Rs"; -} - -multiclass A64I_SRex opcode, string prefix> { - def _byte: A64I_SRexs_impl<0b00, opcode, !strconcat(asmstr, "b"), - (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]>; - - def _hword: A64I_SRexs_impl<0b01, opcode, !strconcat(asmstr, "h"), - (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), - [],NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]>; - - def _word: A64I_SRexs_impl<0b10, opcode, asmstr, - (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]>; - - def _dword: A64I_SRexs_impl<0b11, opcode, asmstr, - (outs GPR32:$Rs), (ins GPR64:$Rt, GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]>; -} - -defm STXR : A64I_SRex<"stxr", 0b000, "STXR">; -defm STLXR : A64I_SRex<"stlxr", 0b001, "STLXR">; - -//===---------------------------------- -// Loads -//===---------------------------------- - -class A64I_LRexs_impl size, bits<3> opcode, string asm, dag outs, - dag ins, list pat, - InstrItinClass itin> : - A64I_LDSTex_tn { - let mayLoad = 1; - let PostEncoderMethod = "fixLoadStoreExclusive<0,0>"; -} - -multiclass A64I_LRex opcode> { - def _byte: A64I_LRexs_impl<0b00, opcode, !strconcat(asmstr, "b"), - (outs GPR32:$Rt), (ins GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteLd]>; - - def _hword: A64I_LRexs_impl<0b01, opcode, !strconcat(asmstr, "h"), - (outs GPR32:$Rt), (ins GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteLd]>; - - def _word: A64I_LRexs_impl<0b10, opcode, asmstr, - (outs GPR32:$Rt), (ins GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteLd]>; - - def _dword: A64I_LRexs_impl<0b11, opcode, asmstr, - (outs GPR64:$Rt), (ins GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteLd]>; -} - -defm LDXR : A64I_LRex<"ldxr", 0b000>; -defm LDAXR : A64I_LRex<"ldaxr", 0b001>; -defm LDAR : A64I_LRex<"ldar", 0b101>; - -class acquiring_load - : PatFrag<(ops node:$ptr), (base node:$ptr), [{ - AtomicOrdering Ordering = cast(N)->getOrdering(); - return Ordering == Acquire || Ordering == SequentiallyConsistent; -}]>; - -def atomic_load_acquire_8 : acquiring_load; -def atomic_load_acquire_16 : acquiring_load; -def atomic_load_acquire_32 : acquiring_load; -def atomic_load_acquire_64 : acquiring_load; - -def : Pat<(atomic_load_acquire_8 i64:$Rn), (LDAR_byte $Rn)>; -def : Pat<(atomic_load_acquire_16 i64:$Rn), (LDAR_hword $Rn)>; -def : Pat<(atomic_load_acquire_32 i64:$Rn), (LDAR_word $Rn)>; -def : Pat<(atomic_load_acquire_64 i64:$Rn), (LDAR_dword $Rn)>; - -//===---------------------------------- -// Store-release (no exclusivity) -//===---------------------------------- - -class A64I_SLexs_impl size, bits<3> opcode, string asm, dag outs, - dag ins, list pat, - InstrItinClass itin> : - A64I_LDSTex_tn { - let mayStore = 1; - let PostEncoderMethod = "fixLoadStoreExclusive<0,0>"; -} - -class releasing_store - : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ - AtomicOrdering Ordering = cast(N)->getOrdering(); - return Ordering == Release || Ordering == SequentiallyConsistent; -}]>; - -def atomic_store_release_8 : releasing_store; -def atomic_store_release_16 : releasing_store; -def atomic_store_release_32 : releasing_store; -def atomic_store_release_64 : releasing_store; - -multiclass A64I_SLex opcode, string prefix> { - def _byte: A64I_SLexs_impl<0b00, opcode, !strconcat(asmstr, "b"), - (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), - [(atomic_store_release_8 i64:$Rn, i32:$Rt)], - NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]>; - - def _hword: A64I_SLexs_impl<0b01, opcode, !strconcat(asmstr, "h"), - (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), - [(atomic_store_release_16 i64:$Rn, i32:$Rt)], - NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]>; - - def _word: A64I_SLexs_impl<0b10, opcode, asmstr, - (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), - [(atomic_store_release_32 i64:$Rn, i32:$Rt)], - NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]>; - - def _dword: A64I_SLexs_impl<0b11, opcode, asmstr, - (outs), (ins GPR64:$Rt, GPR64xsp0:$Rn), - [(atomic_store_release_64 i64:$Rn, i64:$Rt)], - NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]>; -} - -defm STLR : A64I_SLex<"stlr", 0b101, "STLR">; - -//===---------------------------------- -// Store-exclusive pair (releasing & normal) -//===---------------------------------- - -class A64I_SPexs_impl size, bits<3> opcode, string asm, dag outs, - dag ins, list pat, - InstrItinClass itin> : - A64I_LDSTex_stt2n { - let mayStore = 1; -} - - -multiclass A64I_SPex opcode> { - def _word: A64I_SPexs_impl<0b10, opcode, asmstr, (outs), - (ins GPR32:$Rs, GPR32:$Rt, GPR32:$Rt2, - GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>; - - def _dword: A64I_SPexs_impl<0b11, opcode, asmstr, (outs), - (ins GPR32:$Rs, GPR64:$Rt, GPR64:$Rt2, - GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>; -} - -defm STXP : A64I_SPex<"stxp", 0b010>; -defm STLXP : A64I_SPex<"stlxp", 0b011>; - -//===---------------------------------- -// Load-exclusive pair (acquiring & normal) -//===---------------------------------- - -class A64I_LPexs_impl size, bits<3> opcode, string asm, dag outs, - dag ins, list pat, - InstrItinClass itin> : - A64I_LDSTex_tt2n { - let mayLoad = 1; - let DecoderMethod = "DecodeLoadPairExclusiveInstruction"; - let PostEncoderMethod = "fixLoadStoreExclusive<0,1>"; -} - -multiclass A64I_LPex opcode> { - def _word: A64I_LPexs_impl<0b10, opcode, asmstr, - (outs GPR32:$Rt, GPR32:$Rt2), - (ins GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteLd, WriteLd, ReadLd]>; - - def _dword: A64I_LPexs_impl<0b11, opcode, asmstr, - (outs GPR64:$Rt, GPR64:$Rt2), - (ins GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteLd, WriteLd, ReadLd]>; -} - -defm LDXP : A64I_LPex<"ldxp", 0b010>; -defm LDAXP : A64I_LPex<"ldaxp", 0b011>; - -//===----------------------------------------------------------------------===// -// Load-store register (unscaled immediate) instructions -//===----------------------------------------------------------------------===// -// Contains: LDURB, LDURH, LDRUSB, LDRUSH, LDRUSW, STUR, STURB, STURH and PRFUM -// -// and -// -//===----------------------------------------------------------------------===// -// Load-store register (register offset) instructions -//===----------------------------------------------------------------------===// -// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM -// -// and -// -//===----------------------------------------------------------------------===// -// Load-store register (unsigned immediate) instructions -//===----------------------------------------------------------------------===// -// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM -// -// and -// -//===----------------------------------------------------------------------===// -// Load-store register (immediate post-indexed) instructions -//===----------------------------------------------------------------------===// -// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW -// -// and -// -//===----------------------------------------------------------------------===// -// Load-store register (immediate pre-indexed) instructions -//===----------------------------------------------------------------------===// -// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW - -// Note that patterns are much later on in a completely separate section (they -// need ADRPxi to be defined). - -//===------------------------------- -// 1. Various operands needed -//===------------------------------- - -//===------------------------------- -// 1.1 Unsigned 12-bit immediate operands -//===------------------------------- -// The addressing mode for these instructions consists of an unsigned 12-bit -// immediate which is scaled by the size of the memory access. -// -// We represent this in the MC layer by two operands: -// 1. A base register. -// 2. A 12-bit immediate: not multiplied by access size, so "LDR x0,[x0,#8]" -// would have '1' in this field. -// This means that separate functions are needed for converting representations -// which *are* aware of the intended access size. - -// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to -// know the access size via some means. An isolated operand does not have this -// information unless told from here, which means we need separate tablegen -// Operands for each access size. This multiclass takes care of instantiating -// the correct template functions in the rest of the backend. - -//===------------------------------- -// 1.1 Unsigned 12-bit immediate operands -//===------------------------------- - -multiclass offsets_uimm12 { - def uimm12_asmoperand : AsmOperandClass { - let Name = "OffsetUImm12_" # MemSize; - let PredicateMethod = "isOffsetUImm12<" # MemSize # ">"; - let RenderMethod = "addOffsetUImm12Operands<" # MemSize # ">"; - let DiagnosticType = "LoadStoreUImm12_" # MemSize; - } - - // Pattern is really no more than an ImmLeaf, but predicated on MemSize which - // complicates things beyond TableGen's ken. - def uimm12 : Operand, - ComplexPattern"> { - let ParserMatchClass - = !cast(prefix # uimm12_asmoperand); - - let PrintMethod = "printOffsetUImm12Operand<" # MemSize # ">"; - let EncoderMethod = "getOffsetUImm12OpValue<" # MemSize # ">"; - } -} - -defm byte_ : offsets_uimm12<1, "byte_">; -defm hword_ : offsets_uimm12<2, "hword_">; -defm word_ : offsets_uimm12<4, "word_">; -defm dword_ : offsets_uimm12<8, "dword_">; -defm qword_ : offsets_uimm12<16, "qword_">; - -//===------------------------------- -// 1.1 Signed 9-bit immediate operands -//===------------------------------- - -// The MCInst is expected to store the bit-wise encoding of the value, -// which amounts to lopping off the extended sign bits. -def SDXF_simm9 : SDNodeXFormgetTargetConstant(N->getZExtValue() & 0x1ff, MVT::i32); -}]>; - -def simm9_asmoperand : AsmOperandClass { - let Name = "SImm9"; - let PredicateMethod = "isSImm<9>"; - let RenderMethod = "addSImmOperands<9>"; - let DiagnosticType = "LoadStoreSImm9"; -} - -def simm9 : Operand, - ImmLeaf= -0x100 && Imm <= 0xff; }], - SDXF_simm9> { - let PrintMethod = "printOffsetSImm9Operand"; - let ParserMatchClass = simm9_asmoperand; -} - - -//===------------------------------- -// 1.3 Register offset extensions -//===------------------------------- - -// The assembly-syntax for these addressing-modes is: -// [, {, {}}] -// -// The essential semantics are: -// + is a shift: # or #0 -// + can be W or X. -// + If is W, can be UXTW or SXTW -// + If is X, can be LSL or SXTX -// -// The trickiest of those constraints is that Rm can be either GPR32 or GPR64, -// which will need separate instructions for LLVM type-consistency. We'll also -// need separate operands, of course. -multiclass regexts { - def regext_asmoperand : AsmOperandClass { - let Name = "AddrRegExtend_" # MemSize # "_" # Rm; - let PredicateMethod = "isAddrRegExtend<" # MemSize # "," # RmSize # ">"; - let RenderMethod = "addAddrRegExtendOperands<" # MemSize # ">"; - let DiagnosticType = "LoadStoreExtend" # RmSize # "_" # MemSize; - } - - def regext : Operand { - let PrintMethod - = "printAddrRegExtendOperand<" # MemSize # ", " # RmSize # ">"; - - let DecoderMethod = "DecodeAddrRegExtendOperand"; - let ParserMatchClass - = !cast(prefix # regext_asmoperand); - } -} - -multiclass regexts_wx { - // Rm is an X-register if LSL or SXTX are specified as the shift. - defm Xm_ : regexts; - - // Rm is a W-register if UXTW or SXTW are specified as the shift. - defm Wm_ : regexts; -} - -defm byte_ : regexts_wx<1, "byte_">; -defm hword_ : regexts_wx<2, "hword_">; -defm word_ : regexts_wx<4, "word_">; -defm dword_ : regexts_wx<8, "dword_">; -defm qword_ : regexts_wx<16, "qword_">; - - -//===------------------------------ -// 2. The instructions themselves. -//===------------------------------ - -// We have the following instructions to implement: -// | | B | H | W | X | -// |-----------------+-------+-------+-------+--------| -// | unsigned str | STRB | STRH | STR | STR | -// | unsigned ldr | LDRB | LDRH | LDR | LDR | -// | signed ldr to W | LDRSB | LDRSH | - | - | -// | signed ldr to X | LDRSB | LDRSH | LDRSW | (PRFM) | - -// This will instantiate the LDR/STR instructions you'd expect to use for an -// unsigned datatype (first two rows above) or floating-point register, which is -// reasonably uniform across all access sizes. - - -//===------------------------------ -// 2.1 Regular instructions -//===------------------------------ - -// This class covers the basic unsigned or irrelevantly-signed loads and stores, -// to general-purpose and floating-point registers. - -class AddrParams { - Operand uimm12 = !cast(prefix # "_uimm12"); - - Operand regextWm = !cast(prefix # "_Wm_regext"); - Operand regextXm = !cast(prefix # "_Xm_regext"); -} - -def byte_addrparams : AddrParams<"byte">; -def hword_addrparams : AddrParams<"hword">; -def word_addrparams : AddrParams<"word">; -def dword_addrparams : AddrParams<"dword">; -def qword_addrparams : AddrParams<"qword">; - -multiclass A64I_LDRSTR_unsigned size, bit v, - bit high_opc, string asmsuffix, - RegisterClass GPR, AddrParams params> { - // Unsigned immediate - def _STR : A64I_LSunsigimm, - Sched<[WriteSt, ReadSt, ReadSt]> { - let mayStore = 1; - } - def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn]", - (!cast(prefix # "_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>; - - def _LDR : A64I_LSunsigimm, - Sched<[WriteLd, ReadLd]> { - let mayLoad = 1; - } - def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn]", - (!cast(prefix # "_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>; - - // Register offset (four of these: load/store and Wm/Xm). - let mayLoad = 1 in { - def _Wm_RegOffset_LDR : A64I_LSregoff, - Sched<[WriteLd, ReadLd, ReadLd]>; - - def _Xm_RegOffset_LDR : A64I_LSregoff, - Sched<[WriteLd, ReadLd, ReadLd]>; - } - def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn, $Rm]", - (!cast(prefix # "_Xm_RegOffset_LDR") GPR:$Rt, GPR64xsp:$Rn, - GPR64:$Rm, 2)>; - - let mayStore = 1 in { - def _Wm_RegOffset_STR : A64I_LSregoff, - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>; - - def _Xm_RegOffset_STR : A64I_LSregoff, - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>; - } - def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn, $Rm]", - (!cast(prefix # "_Xm_RegOffset_STR") GPR:$Rt, GPR64xsp:$Rn, - GPR64:$Rm, 2)>; - - // Unaligned immediate - def _STUR : A64I_LSunalimm, - Sched<[WriteSt, ReadSt, ReadSt]> { - let mayStore = 1; - } - def : InstAlias<"stur" # asmsuffix # " $Rt, [$Rn]", - (!cast(prefix # "_STUR") GPR:$Rt, GPR64xsp:$Rn, 0)>; - - def _LDUR : A64I_LSunalimm, - Sched<[WriteLd, ReadLd]> { - let mayLoad = 1; - } - def : InstAlias<"ldur" # asmsuffix # " $Rt, [$Rn]", - (!cast(prefix # "_LDUR") GPR:$Rt, GPR64xsp:$Rn, 0)>; - - // Post-indexed - def _PostInd_STR : A64I_LSpostind, - Sched<[WriteSt, ReadSt, ReadSt]> { - let Constraints = "$Rn = $Rn_wb"; - let mayStore = 1; - - // Decoder only needed for unpredictability checking (FIXME). - let DecoderMethod = "DecodeSingleIndexedInstruction"; - } - - def _PostInd_LDR : A64I_LSpostind, - Sched<[WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeSingleIndexedInstruction"; - } - - // Pre-indexed - def _PreInd_STR : A64I_LSpreind, - Sched<[WriteSt, ReadSt, ReadSt]> { - let Constraints = "$Rn = $Rn_wb"; - let mayStore = 1; - - // Decoder only needed for unpredictability checking (FIXME). - let DecoderMethod = "DecodeSingleIndexedInstruction"; - } - - def _PreInd_LDR : A64I_LSpreind, - Sched<[WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeSingleIndexedInstruction"; - } - -} - -// STRB/LDRB: First define the instructions -defm LS8 - : A64I_LDRSTR_unsigned<"LS8", 0b00, 0b0, 0b0, "b", GPR32, byte_addrparams>; - -// STRH/LDRH -defm LS16 - : A64I_LDRSTR_unsigned<"LS16", 0b01, 0b0, 0b0, "h", GPR32, hword_addrparams>; - - -// STR/LDR to/from a W register -defm LS32 - : A64I_LDRSTR_unsigned<"LS32", 0b10, 0b0, 0b0, "", GPR32, word_addrparams>; - -// STR/LDR to/from an X register -defm LS64 - : A64I_LDRSTR_unsigned<"LS64", 0b11, 0b0, 0b0, "", GPR64, dword_addrparams>; - -let Predicates = [HasFPARMv8] in { -// STR/LDR to/from a B register -defm LSFP8 - : A64I_LDRSTR_unsigned<"LSFP8", 0b00, 0b1, 0b0, "", FPR8, byte_addrparams>; - -// STR/LDR to/from an H register -defm LSFP16 - : A64I_LDRSTR_unsigned<"LSFP16", 0b01, 0b1, 0b0, "", FPR16, hword_addrparams>; - -// STR/LDR to/from an S register -defm LSFP32 - : A64I_LDRSTR_unsigned<"LSFP32", 0b10, 0b1, 0b0, "", FPR32, word_addrparams>; -// STR/LDR to/from a D register -defm LSFP64 - : A64I_LDRSTR_unsigned<"LSFP64", 0b11, 0b1, 0b0, "", FPR64, dword_addrparams>; -// STR/LDR to/from a Q register -defm LSFP128 - : A64I_LDRSTR_unsigned<"LSFP128", 0b00, 0b1, 0b1, "", FPR128, - qword_addrparams>; -} - -//===------------------------------ -// 2.3 Signed loads -//===------------------------------ - -// Byte and half-word signed loads can both go into either an X or a W register, -// so it's worth factoring out. Signed word loads don't fit because there is no -// W version. -multiclass A64I_LDR_signed size, string asmopcode, AddrParams params, - string prefix> { - // Unsigned offset - def w : A64I_LSunsigimm, - Sched<[WriteLd, ReadLd]> { - let mayLoad = 1; - } - def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]", - (!cast(prefix # w) GPR32:$Rt, GPR64xsp:$Rn, 0)>; - - def x : A64I_LSunsigimm, - Sched<[WriteLd, ReadLd]> { - let mayLoad = 1; - } - def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]", - (!cast(prefix # x) GPR64:$Rt, GPR64xsp:$Rn, 0)>; - - // Register offset - let mayLoad = 1 in { - def w_Wm_RegOffset : A64I_LSregoff, - Sched<[WriteLd, ReadLd, ReadLd]>; - - def w_Xm_RegOffset : A64I_LSregoff, - Sched<[WriteLd, ReadLd, ReadLd]>; - - def x_Wm_RegOffset : A64I_LSregoff, - Sched<[WriteLd, ReadLd, ReadLd]>; - - def x_Xm_RegOffset : A64I_LSregoff, - Sched<[WriteLd, ReadLd, ReadLd]>; - } - def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]", - (!cast(prefix # "w_Xm_RegOffset") GPR32:$Rt, GPR64xsp:$Rn, - GPR64:$Rm, 2)>; - - def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]", - (!cast(prefix # "x_Xm_RegOffset") GPR64:$Rt, GPR64xsp:$Rn, - GPR64:$Rm, 2)>; - - - let mayLoad = 1 in { - // Unaligned offset - def w_U : A64I_LSunalimm, - Sched<[WriteLd, ReadLd]>; - - def x_U : A64I_LSunalimm, - Sched<[WriteLd, ReadLd]>; - - - // Post-indexed - def w_PostInd : A64I_LSpostind, - Sched<[WriteLd, WriteLd, ReadLd]> { - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeSingleIndexedInstruction"; - } - - def x_PostInd : A64I_LSpostind, - Sched<[WriteLd, WriteLd, ReadLd]> { - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeSingleIndexedInstruction"; - } - - // Pre-indexed - def w_PreInd : A64I_LSpreind, - Sched<[WriteLd, WriteLd, ReadLd]> { - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeSingleIndexedInstruction"; - } - - def x_PreInd : A64I_LSpreind, - Sched<[WriteLd, WriteLd, ReadLd]> { - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeSingleIndexedInstruction"; - } - } // let mayLoad = 1 -} - -// LDRSB -defm LDRSB : A64I_LDR_signed<0b00, "b", byte_addrparams, "LDRSB">; -// LDRSH -defm LDRSH : A64I_LDR_signed<0b01, "h", hword_addrparams, "LDRSH">; - -// LDRSW: load a 32-bit register, sign-extending to 64-bits. -def LDRSWx - : A64I_LSunsigimm<0b10, 0b0, 0b10, - (outs GPR64:$Rt), - (ins GPR64xsp:$Rn, word_uimm12:$UImm12), - "ldrsw\t$Rt, [$Rn, $UImm12]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { - let mayLoad = 1; -} -def : InstAlias<"ldrsw $Rt, [$Rn]", (LDRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; - -let mayLoad = 1 in { - def LDRSWx_Wm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b0, - (outs GPR64:$Rt), - (ins GPR64xsp:$Rn, GPR32:$Rm, word_Wm_regext:$Ext), - "ldrsw\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd, ReadLd]>; - - def LDRSWx_Xm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b1, - (outs GPR64:$Rt), - (ins GPR64xsp:$Rn, GPR64:$Rm, word_Xm_regext:$Ext), - "ldrsw\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd, ReadLd]>; -} -def : InstAlias<"ldrsw $Rt, [$Rn, $Rm]", - (LDRSWx_Xm_RegOffset GPR64:$Rt, GPR64xsp:$Rn, GPR64:$Rm, 2)>; - - -def LDURSWx - : A64I_LSunalimm<0b10, 0b0, 0b10, - (outs GPR64:$Rt), - (ins GPR64xsp:$Rn, simm9:$SImm9), - "ldursw\t$Rt, [$Rn, $SImm9]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { - let mayLoad = 1; -} -def : InstAlias<"ldursw $Rt, [$Rn]", (LDURSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; - -def LDRSWx_PostInd - : A64I_LSpostind<0b10, 0b0, 0b10, - (outs GPR64:$Rt, GPR64xsp:$Rn_wb), - (ins GPR64xsp:$Rn, simm9:$SImm9), - "ldrsw\t$Rt, [$Rn], $SImm9", - [], NoItinerary>, - Sched<[WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeSingleIndexedInstruction"; -} - -def LDRSWx_PreInd : A64I_LSpreind<0b10, 0b0, 0b10, - (outs GPR64:$Rt, GPR64xsp:$Rn_wb), - (ins GPR64xsp:$Rn, simm9:$SImm9), - "ldrsw\t$Rt, [$Rn, $SImm9]!", - [], NoItinerary>, - Sched<[WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeSingleIndexedInstruction"; -} - -//===------------------------------ -// 2.4 Prefetch operations -//===------------------------------ - -def PRFM : A64I_LSunsigimm<0b11, 0b0, 0b10, (outs), - (ins prefetch_op:$Rt, GPR64xsp:$Rn, dword_uimm12:$UImm12), - "prfm\t$Rt, [$Rn, $UImm12]", - [], NoItinerary>, - Sched<[WritePreLd, ReadPreLd]> { - let mayLoad = 1; -} -def : InstAlias<"prfm $Rt, [$Rn]", - (PRFM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>; - -let mayLoad = 1 in { - def PRFM_Wm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b0, (outs), - (ins prefetch_op:$Rt, GPR64xsp:$Rn, - GPR32:$Rm, dword_Wm_regext:$Ext), - "prfm\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>, - Sched<[WritePreLd, ReadPreLd]>; - def PRFM_Xm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b1, (outs), - (ins prefetch_op:$Rt, GPR64xsp:$Rn, - GPR64:$Rm, dword_Xm_regext:$Ext), - "prfm\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>, - Sched<[WritePreLd, ReadPreLd]>; -} - -def : InstAlias<"prfm $Rt, [$Rn, $Rm]", - (PRFM_Xm_RegOffset prefetch_op:$Rt, GPR64xsp:$Rn, - GPR64:$Rm, 2)>; - - -def PRFUM : A64I_LSunalimm<0b11, 0b0, 0b10, (outs), - (ins prefetch_op:$Rt, GPR64xsp:$Rn, simm9:$SImm9), - "prfum\t$Rt, [$Rn, $SImm9]", - [], NoItinerary>, - Sched<[WritePreLd, ReadPreLd]> { - let mayLoad = 1; -} -def : InstAlias<"prfum $Rt, [$Rn]", - (PRFUM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>; - -//===----------------------------------------------------------------------===// -// Load-store register (unprivileged) instructions -//===----------------------------------------------------------------------===// -// Contains: LDTRB, LDTRH, LDTRSB, LDTRSH, LDTRSW, STTR, STTRB and STTRH - -// These instructions very much mirror the "unscaled immediate" loads, but since -// there are no floating-point variants we need to split them out into their own -// section to avoid instantiation of "ldtr d0, [sp]" etc. - -multiclass A64I_LDTRSTTR size, string asmsuffix, RegisterClass GPR, - string prefix> { - def _UnPriv_STR : A64I_LSunpriv, - Sched<[WriteLd, ReadLd]> { - let mayStore = 1; - } - - def : InstAlias<"sttr" # asmsuffix # " $Rt, [$Rn]", - (!cast(prefix # "_UnPriv_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>; - - def _UnPriv_LDR : A64I_LSunpriv, - Sched<[WriteLd, ReadLd]> { - let mayLoad = 1; - } - - def : InstAlias<"ldtr" # asmsuffix # " $Rt, [$Rn]", - (!cast(prefix # "_UnPriv_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>; - -} - -// STTRB/LDTRB: First define the instructions -defm LS8 : A64I_LDTRSTTR<0b00, "b", GPR32, "LS8">; - -// STTRH/LDTRH -defm LS16 : A64I_LDTRSTTR<0b01, "h", GPR32, "LS16">; - -// STTR/LDTR to/from a W register -defm LS32 : A64I_LDTRSTTR<0b10, "", GPR32, "LS32">; - -// STTR/LDTR to/from an X register -defm LS64 : A64I_LDTRSTTR<0b11, "", GPR64, "LS64">; - -// Now a class for the signed instructions that can go to either 32 or 64 -// bits... -multiclass A64I_LDTR_signed size, string asmopcode, string prefix> { - let mayLoad = 1 in { - def w : A64I_LSunpriv, - Sched<[WriteLd, ReadLd]>; - - def x : A64I_LSunpriv, - Sched<[WriteLd, ReadLd]>; - } - - def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]", - (!cast(prefix # "w") GPR32:$Rt, GPR64xsp:$Rn, 0)>; - - def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]", - (!cast(prefix # "x") GPR64:$Rt, GPR64xsp:$Rn, 0)>; - -} - -// LDTRSB -defm LDTRSB : A64I_LDTR_signed<0b00, "b", "LDTRSB">; -// LDTRSH -defm LDTRSH : A64I_LDTR_signed<0b01, "h", "LDTRSH">; - -// And finally LDTRSW which only goes to 64 bits. -def LDTRSWx : A64I_LSunpriv<0b10, 0b0, 0b10, - (outs GPR64:$Rt), - (ins GPR64xsp:$Rn, simm9:$SImm9), - "ldtrsw\t$Rt, [$Rn, $SImm9]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { - let mayLoad = 1; -} -def : InstAlias<"ldtrsw $Rt, [$Rn]", (LDTRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; - -//===----------------------------------------------------------------------===// -// Load-store register pair (offset) instructions -//===----------------------------------------------------------------------===// -// -// and -// -//===----------------------------------------------------------------------===// -// Load-store register pair (post-indexed) instructions -//===----------------------------------------------------------------------===// -// Contains: STP, LDP, LDPSW -// -// and -// -//===----------------------------------------------------------------------===// -// Load-store register pair (pre-indexed) instructions -//===----------------------------------------------------------------------===// -// Contains: STP, LDP, LDPSW -// -// and -// -//===----------------------------------------------------------------------===// -// Load-store non-temporal register pair (offset) instructions -//===----------------------------------------------------------------------===// -// Contains: STNP, LDNP - - -// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to -// know the access size via some means. An isolated operand does not have this -// information unless told from here, which means we need separate tablegen -// Operands for each access size. This multiclass takes care of instantiating -// the correct template functions in the rest of the backend. - -multiclass offsets_simm7 { - // The bare signed 7-bit immediate is used in post-indexed instructions, but - // because of the scaling performed a generic "simm7" operand isn't - // appropriate here either. - def simm7_asmoperand : AsmOperandClass { - let Name = "SImm7_Scaled" # MemSize; - let PredicateMethod = "isSImm7Scaled<" # MemSize # ">"; - let RenderMethod = "addSImm7ScaledOperands<" # MemSize # ">"; - let DiagnosticType = "LoadStoreSImm7_" # MemSize; - } - - def simm7 : Operand { - let PrintMethod = "printSImm7ScaledOperand<" # MemSize # ">"; - let ParserMatchClass = !cast(prefix # "simm7_asmoperand"); - } -} - -defm word_ : offsets_simm7<"4", "word_">; -defm dword_ : offsets_simm7<"8", "dword_">; -defm qword_ : offsets_simm7<"16", "qword_">; - -multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, - Operand simm7, string prefix> { - def _STR : A64I_LSPoffset, - Sched<[WriteLd, ReadLd]> { - let mayStore = 1; - let DecoderMethod = "DecodeLDSTPairInstruction"; - } - def : InstAlias<"stp $Rt, $Rt2, [$Rn]", - (!cast(prefix # "_STR") SomeReg:$Rt, - SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; - - def _LDR : A64I_LSPoffset, - Sched<[WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let DecoderMethod = "DecodeLDSTPairInstruction"; - } - def : InstAlias<"ldp $Rt, $Rt2, [$Rn]", - (!cast(prefix # "_LDR") SomeReg:$Rt, - SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; - - def _PostInd_STR : A64I_LSPpostind, - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> { - let mayStore = 1; - let Constraints = "$Rn = $Rn_wb"; - - // Decoder only needed for unpredictability checking (FIXME). - let DecoderMethod = "DecodeLDSTPairInstruction"; - } - - def _PostInd_LDR : A64I_LSPpostind, - Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeLDSTPairInstruction"; - } - - def _PreInd_STR : A64I_LSPpreind, - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> { - let mayStore = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeLDSTPairInstruction"; - } - - def _PreInd_LDR : A64I_LSPpreind, - Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeLDSTPairInstruction"; - } - - def _NonTemp_STR : A64I_LSPnontemp, - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> { - let mayStore = 1; - let DecoderMethod = "DecodeLDSTPairInstruction"; - } - def : InstAlias<"stnp $Rt, $Rt2, [$Rn]", - (!cast(prefix # "_NonTemp_STR") SomeReg:$Rt, - SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; - - def _NonTemp_LDR : A64I_LSPnontemp, - Sched<[WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let DecoderMethod = "DecodeLDSTPairInstruction"; - } - def : InstAlias<"ldnp $Rt, $Rt2, [$Rn]", - (!cast(prefix # "_NonTemp_LDR") SomeReg:$Rt, - SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; - -} - - -defm LSPair32 : A64I_LSPsimple<0b00, 0b0, GPR32, word_simm7, "LSPair32">; -defm LSPair64 : A64I_LSPsimple<0b10, 0b0, GPR64, dword_simm7, "LSPair64">; - -let Predicates = [HasFPARMv8] in { -defm LSFPPair32 : A64I_LSPsimple<0b00, 0b1, FPR32, word_simm7, "LSFPPair32">; -defm LSFPPair64 : A64I_LSPsimple<0b01, 0b1, FPR64, dword_simm7, "LSFPPair64">; -defm LSFPPair128 : A64I_LSPsimple<0b10, 0b1, FPR128, qword_simm7, - "LSFPPair128">; -} - - -def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1, - (outs GPR64:$Rt, GPR64:$Rt2), - (ins GPR64xsp:$Rn, word_simm7:$SImm7), - "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>, - Sched<[WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let DecoderMethod = "DecodeLDSTPairInstruction"; -} -def : InstAlias<"ldpsw $Rt, $Rt2, [$Rn]", - (LDPSWx GPR64:$Rt, GPR64:$Rt2, GPR64xsp:$Rn, 0)>; - -def LDPSWx_PostInd : A64I_LSPpostind<0b01, 0b0, 0b1, - (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb), - (ins GPR64xsp:$Rn, word_simm7:$SImm7), - "ldpsw\t$Rt, $Rt2, [$Rn], $SImm7", - [], NoItinerary>, - Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeLDSTPairInstruction"; -} - -def LDPSWx_PreInd : A64I_LSPpreind<0b01, 0b0, 0b1, - (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb), - (ins GPR64xsp:$Rn, word_simm7:$SImm7), - "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]!", - [], NoItinerary>, - Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeLDSTPairInstruction"; -} - -//===----------------------------------------------------------------------===// -// Logical (immediate) instructions -//===----------------------------------------------------------------------===// -// Contains: AND, ORR, EOR, ANDS, + aliases TST, MOV - -multiclass logical_imm_operands { - def _asmoperand : AsmOperandClass { - let Name = "LogicalImm" # note # size; - let PredicateMethod = "isLogicalImm" # note # "<" # size # ">"; - let RenderMethod = "addLogicalImmOperands<" # size # ">"; - let DiagnosticType = "LogicalSecondSource"; - } - - def _operand - : Operand, ComplexPattern { - let ParserMatchClass = !cast(prefix # "_asmoperand"); - let PrintMethod = "printLogicalImmOperand<" # size # ">"; - let DecoderMethod = "DecodeLogicalImmOperand<" # size # ">"; - } -} - -defm logical_imm32 : logical_imm_operands<"logical_imm32", "", 32, i32>; -defm logical_imm64 : logical_imm_operands<"logical_imm64", "", 64, i64>; - -// The mov versions only differ in assembly parsing, where they -// exclude values representable with either MOVZ or MOVN. -defm logical_imm32_mov - : logical_imm_operands<"logical_imm32_mov", "MOV", 32, i32>; -defm logical_imm64_mov - : logical_imm_operands<"logical_imm64_mov", "MOV", 64, i64>; - - -multiclass A64I_logimmSizes opc, string asmop, SDNode opnode> { - def wwi : A64I_logicalimm<0b0, opc, (outs GPR32wsp:$Rd), - (ins GPR32:$Rn, logical_imm32_operand:$Imm), - !strconcat(asmop, "\t$Rd, $Rn, $Imm"), - [(set i32:$Rd, - (opnode i32:$Rn, logical_imm32_operand:$Imm))], - NoItinerary>, - Sched<[WriteALU, ReadALU]>; - - def xxi : A64I_logicalimm<0b1, opc, (outs GPR64xsp:$Rd), - (ins GPR64:$Rn, logical_imm64_operand:$Imm), - !strconcat(asmop, "\t$Rd, $Rn, $Imm"), - [(set i64:$Rd, - (opnode i64:$Rn, logical_imm64_operand:$Imm))], - NoItinerary>, - Sched<[WriteALU, ReadALU]>; -} - -defm AND : A64I_logimmSizes<0b00, "and", and>; -defm ORR : A64I_logimmSizes<0b01, "orr", or>; -defm EOR : A64I_logimmSizes<0b10, "eor", xor>; - -let Defs = [NZCV] in { - def ANDSwwi : A64I_logicalimm<0b0, 0b11, (outs GPR32:$Rd), - (ins GPR32:$Rn, logical_imm32_operand:$Imm), - "ands\t$Rd, $Rn, $Imm", - [], NoItinerary>, - Sched<[WriteALU, ReadALU]>; - - def ANDSxxi : A64I_logicalimm<0b1, 0b11, (outs GPR64:$Rd), - (ins GPR64:$Rn, logical_imm64_operand:$Imm), - "ands\t$Rd, $Rn, $Imm", - [], NoItinerary>, - Sched<[WriteALU, ReadALU]>; -} - -def : InstAlias<"tst $Rn, $Imm", - (ANDSwwi WZR, GPR32:$Rn, logical_imm32_operand:$Imm)>; -def : InstAlias<"tst $Rn, $Imm", - (ANDSxxi XZR, GPR64:$Rn, logical_imm64_operand:$Imm)>; -// FIXME: these sometimes are canonical. -def : InstAlias<"mov $Rd, $Imm", - (ORRwwi GPR32wsp:$Rd, WZR, logical_imm32_mov_operand:$Imm), 0>; -def : InstAlias<"mov $Rd, $Imm", - (ORRxxi GPR64xsp:$Rd, XZR, logical_imm64_mov_operand:$Imm), 0>; - -//===----------------------------------------------------------------------===// -// Logical (shifted register) instructions -//===----------------------------------------------------------------------===// -// Contains: AND, BIC, ORR, ORN, EOR, EON, ANDS, BICS + aliases TST, MVN, MOV - -// Operand for optimizing (icmp (and LHS, RHS), 0, SomeCode). In theory "ANDS" -// behaves differently for unsigned comparisons, so we defensively only allow -// signed or n/a as the operand. In practice "unsigned greater than 0" is "not -// equal to 0" and LLVM gives us this. -def signed_cond : PatLeaf<(cond), [{ - return !isUnsignedIntSetCC(N->get()); -}]>; - - -// These instructions share their "shift" operands with add/sub (shifted -// register instructions). They are defined there. - -// N.b. the commutable parameter is just !N. It will be first against the wall -// when the revolution comes. -multiclass logical_shifts opc, - bit N, bit commutable, - string asmop, SDPatternOperator opfrag, ValueType ty, - RegisterClass GPR, list defs> { - let isCommutable = commutable, Defs = defs in { - def _lsl : A64I_logicalshift("lsl_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), - [(set ty:$Rd, (opfrag ty:$Rn, (shl ty:$Rm, - !cast("lsl_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def _lsr : A64I_logicalshift("lsr_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), - [(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm, - !cast("lsr_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def _asr : A64I_logicalshift("asr_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), - [(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm, - !cast("asr_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def _ror : A64I_logicalshift("ror_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), - [(set ty:$Rd, (opfrag ty:$Rn, (rotr ty:$Rm, - !cast("ror_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - } - - def _noshift - : InstAlias(prefix # "_lsl") GPR:$Rd, GPR:$Rn, - GPR:$Rm, 0)>; - - def : Pat<(opfrag ty:$Rn, ty:$Rm), - (!cast(prefix # "_lsl") $Rn, $Rm, 0)>; -} - -multiclass logical_sizes opc, bit N, bit commutable, - string asmop, SDPatternOperator opfrag, - list defs> { - defm xxx : logical_shifts; - defm www : logical_shifts; -} - - -defm AND : logical_sizes<"AND", 0b00, 0b0, 0b1, "and", and, []>; -defm ORR : logical_sizes<"ORR", 0b01, 0b0, 0b1, "orr", or, []>; -defm EOR : logical_sizes<"EOR", 0b10, 0b0, 0b1, "eor", xor, []>; -defm ANDS : logical_sizes<"ANDS", 0b11, 0b0, 0b1, "ands", - PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), - [{ (void)N; return false; }]>, - [NZCV]>; - -defm BIC : logical_sizes<"BIC", 0b00, 0b1, 0b0, "bic", - PatFrag<(ops node:$lhs, node:$rhs), - (and node:$lhs, (not node:$rhs))>, []>; -defm ORN : logical_sizes<"ORN", 0b01, 0b1, 0b0, "orn", - PatFrag<(ops node:$lhs, node:$rhs), - (or node:$lhs, (not node:$rhs))>, []>; -defm EON : logical_sizes<"EON", 0b10, 0b1, 0b0, "eon", - PatFrag<(ops node:$lhs, node:$rhs), - (xor node:$lhs, (not node:$rhs))>, []>; -defm BICS : logical_sizes<"BICS", 0b11, 0b1, 0b0, "bics", - PatFrag<(ops node:$lhs, node:$rhs), - (and node:$lhs, (not node:$rhs)), - [{ (void)N; return false; }]>, - [NZCV]>; - -multiclass tst_shifts { - let isCommutable = 1, Rd = 0b11111, Defs = [NZCV] in { - def _lsl : A64I_logicalshift("lsl_operand_" # ty):$Imm6), - "tst\t$Rn, $Rm, $Imm6", - [(set NZCV, (A64setcc (and ty:$Rn, (shl ty:$Rm, - !cast("lsl_operand_" # ty):$Imm6)), - 0, signed_cond))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - - def _lsr : A64I_logicalshift("lsr_operand_" # ty):$Imm6), - "tst\t$Rn, $Rm, $Imm6", - [(set NZCV, (A64setcc (and ty:$Rn, (srl ty:$Rm, - !cast("lsr_operand_" # ty):$Imm6)), - 0, signed_cond))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def _asr : A64I_logicalshift("asr_operand_" # ty):$Imm6), - "tst\t$Rn, $Rm, $Imm6", - [(set NZCV, (A64setcc (and ty:$Rn, (sra ty:$Rm, - !cast("asr_operand_" # ty):$Imm6)), - 0, signed_cond))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def _ror : A64I_logicalshift("ror_operand_" # ty):$Imm6), - "tst\t$Rn, $Rm, $Imm6", - [(set NZCV, (A64setcc (and ty:$Rn, (rotr ty:$Rm, - !cast("ror_operand_" # ty):$Imm6)), - 0, signed_cond))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - } - - def _noshift : InstAlias<"tst $Rn, $Rm", - (!cast(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; - - def : Pat<(A64setcc (and ty:$Rn, ty:$Rm), 0, signed_cond), - (!cast(prefix # "_lsl") $Rn, $Rm, 0)>; -} - -defm TSTxx : tst_shifts<"TSTxx", 0b1, i64, GPR64>; -defm TSTww : tst_shifts<"TSTww", 0b0, i32, GPR32>; - - -multiclass mvn_shifts { - let isCommutable = 0, Rn = 0b11111 in { - def _lsl : A64I_logicalshift("lsl_operand_" # ty):$Imm6), - "mvn\t$Rd, $Rm, $Imm6", - [(set ty:$Rd, (not (shl ty:$Rm, - !cast("lsl_operand_" # ty):$Imm6)))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - - def _lsr : A64I_logicalshift("lsr_operand_" # ty):$Imm6), - "mvn\t$Rd, $Rm, $Imm6", - [(set ty:$Rd, (not (srl ty:$Rm, - !cast("lsr_operand_" # ty):$Imm6)))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def _asr : A64I_logicalshift("asr_operand_" # ty):$Imm6), - "mvn\t$Rd, $Rm, $Imm6", - [(set ty:$Rd, (not (sra ty:$Rm, - !cast("asr_operand_" # ty):$Imm6)))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def _ror : A64I_logicalshift("ror_operand_" # ty):$Imm6), - "mvn\t$Rd, $Rm, $Imm6", - [(set ty:$Rd, (not (rotr ty:$Rm, - !cast("lsl_operand_" # ty):$Imm6)))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - } - - def _noshift : InstAlias<"mvn $Rn, $Rm", - (!cast(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; - - def : Pat<(not ty:$Rm), - (!cast(prefix # "_lsl") $Rm, 0)>; -} - -defm MVNxx : mvn_shifts<"MVNxx", 0b1, i64, GPR64>; -defm MVNww : mvn_shifts<"MVNww", 0b0, i32, GPR32>; - -def MOVxx :InstAlias<"mov $Rd, $Rm", (ORRxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>; -def MOVww :InstAlias<"mov $Rd, $Rm", (ORRwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>; - -//===----------------------------------------------------------------------===// -// Move wide (immediate) instructions -//===----------------------------------------------------------------------===// -// Contains: MOVN, MOVZ, MOVK + MOV aliases - -// A wide variety of different relocations are needed for variants of these -// instructions, so it turns out that we need a different operand for all of -// them. -multiclass movw_operands { - def _imm_asmoperand : AsmOperandClass { - let Name = instname # width # "Shifted" # shift; - let PredicateMethod = "is" # instname # width # "Imm"; - let RenderMethod = "addMoveWideImmOperands"; - let ParserMethod = "ParseImmWithLSLOperand"; - let DiagnosticType = "MOVWUImm16"; - } - - def _imm : Operand { - let ParserMatchClass = !cast(prefix # "_imm_asmoperand"); - let PrintMethod = "printMoveWideImmOperand"; - let EncoderMethod = "getMoveWideImmOpValue"; - let DecoderMethod = "DecodeMoveWideImmOperand<" # width # ">"; - - let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift); - } -} - -defm movn32 : movw_operands<"movn32", "MOVN", 32>; -defm movn64 : movw_operands<"movn64", "MOVN", 64>; -defm movz32 : movw_operands<"movz32", "MOVZ", 32>; -defm movz64 : movw_operands<"movz64", "MOVZ", 64>; -defm movk32 : movw_operands<"movk32", "MOVK", 32>; -defm movk64 : movw_operands<"movk64", "MOVK", 64>; - -multiclass A64I_movwSizes opc, string asmop, dag ins32bit, - dag ins64bit> { - - def wii : A64I_movw<0b0, opc, (outs GPR32:$Rd), ins32bit, - !strconcat(asmop, "\t$Rd, $FullImm"), - [], NoItinerary>, - Sched<[WriteALU]> { - bits<18> FullImm; - let UImm16 = FullImm{15-0}; - let Shift = FullImm{17-16}; - } - - def xii : A64I_movw<0b1, opc, (outs GPR64:$Rd), ins64bit, - !strconcat(asmop, "\t$Rd, $FullImm"), - [], NoItinerary>, - Sched<[WriteALU]> { - bits<18> FullImm; - let UImm16 = FullImm{15-0}; - let Shift = FullImm{17-16}; - } -} - -let isMoveImm = 1, isReMaterializable = 1, - isAsCheapAsAMove = 1, hasSideEffects = 0 in { - defm MOVN : A64I_movwSizes<0b00, "movn", - (ins movn32_imm:$FullImm), - (ins movn64_imm:$FullImm)>; - - // Some relocations are able to convert between a MOVZ and a MOVN. If these - // are applied the instruction must be emitted with the corresponding bits as - // 0, which means a MOVZ needs to override that bit from the default. - let PostEncoderMethod = "fixMOVZ" in - defm MOVZ : A64I_movwSizes<0b10, "movz", - (ins movz32_imm:$FullImm), - (ins movz64_imm:$FullImm)>; -} - -let Constraints = "$src = $Rd", - SchedRW = [WriteALU, ReadALU] in -defm MOVK : A64I_movwSizes<0b11, "movk", - (ins GPR32:$src, movk32_imm:$FullImm), - (ins GPR64:$src, movk64_imm:$FullImm)>; - - -// And now the "MOV" aliases. These also need their own operands because what -// they accept is completely different to what the base instructions accept. -multiclass movalias_operand { - def _asmoperand : AsmOperandClass { - let Name = basename # width # "MovAlias"; - let PredicateMethod - = "isMoveWideMovAlias<" # width # ", A64Imms::" # immpredicate # ">"; - let RenderMethod - = "addMoveWideMovAliasOperands<" # width # ", " - # "A64Imms::" # immpredicate # ">"; - } - - def _movimm : Operand { - let ParserMatchClass = !cast(prefix # "_asmoperand"); - - let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift); - } -} - -defm movz32 : movalias_operand<"movz32", "MOVZ", "isMOVZImm", 32>; -defm movz64 : movalias_operand<"movz64", "MOVZ", "isMOVZImm", 64>; -defm movn32 : movalias_operand<"movn32", "MOVN", "isOnlyMOVNImm", 32>; -defm movn64 : movalias_operand<"movn64", "MOVN", "isOnlyMOVNImm", 64>; - -// FIXME: these are officially canonical aliases, but TableGen is too limited to -// print them at the moment. I believe in this case an "AliasPredicate" method -// will need to be implemented. to allow it, as well as the more generally -// useful handling of non-register, non-constant operands. -class movalias - : InstAlias<"mov $Rd, $FullImm", (INST GPR:$Rd, operand:$FullImm), 0>; - -def : movalias; -def : movalias; -def : movalias; -def : movalias; - -def movw_addressref_g0 : ComplexPattern">; -def movw_addressref_g1 : ComplexPattern">; -def movw_addressref_g2 : ComplexPattern">; -def movw_addressref_g3 : ComplexPattern">; - -def : Pat<(A64WrapperLarge movw_addressref_g3:$G3, movw_addressref_g2:$G2, - movw_addressref_g1:$G1, movw_addressref_g0:$G0), - (MOVKxii (MOVKxii (MOVKxii (MOVZxii movw_addressref_g3:$G3), - movw_addressref_g2:$G2), - movw_addressref_g1:$G1), - movw_addressref_g0:$G0)>; - -//===----------------------------------------------------------------------===// -// PC-relative addressing instructions -//===----------------------------------------------------------------------===// -// Contains: ADR, ADRP - -def adr_label : Operand { - let EncoderMethod = "getLabelOpValue"; - - // This label is a 21-bit offset from PC, unscaled - let PrintMethod = "printLabelOperand<21, 1>"; - let ParserMatchClass = label_asmoperand<21, 1>; - let OperandType = "OPERAND_PCREL"; -} - -def adrp_label_asmoperand : AsmOperandClass { - let Name = "AdrpLabel"; - let RenderMethod = "addLabelOperands<21, 4096>"; - let DiagnosticType = "Label"; -} - -def adrp_label : Operand { - let EncoderMethod = "getAdrpLabelOpValue"; - - // This label is a 21-bit offset from PC, scaled by the page-size: 4096. - let PrintMethod = "printLabelOperand<21, 4096>"; - let ParserMatchClass = adrp_label_asmoperand; - let OperandType = "OPERAND_PCREL"; -} - -let hasSideEffects = 0 in { - def ADRxi : A64I_PCADR<0b0, (outs GPR64:$Rd), (ins adr_label:$Label), - "adr\t$Rd, $Label", [], NoItinerary>, - Sched<[WriteALUs]>; - - def ADRPxi : A64I_PCADR<0b1, (outs GPR64:$Rd), (ins adrp_label:$Label), - "adrp\t$Rd, $Label", [], NoItinerary>, - Sched<[WriteALUs]>; -} - -//===----------------------------------------------------------------------===// -// System instructions -//===----------------------------------------------------------------------===// -// Contains: HINT, CLREX, DSB, DMB, ISB, MSR, SYS, SYSL, MRS -// + aliases IC, DC, AT, TLBI, NOP, YIELD, WFE, WFI, SEV, SEVL - -// Op1 and Op2 fields are sometimes simple 3-bit unsigned immediate values. -def uimm3_asmoperand : AsmOperandClass { - let Name = "UImm3"; - let PredicateMethod = "isUImm<3>"; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "UImm3"; -} - -def uimm3 : Operand { - let ParserMatchClass = uimm3_asmoperand; -} - -// The HINT alias can accept a simple unsigned 7-bit immediate. -def uimm7_asmoperand : AsmOperandClass { - let Name = "UImm7"; - let PredicateMethod = "isUImm<7>"; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "UImm7"; -} - -def uimm7 : Operand { - let ParserMatchClass = uimm7_asmoperand; -} - -// Multiclass namedimm is defined with the prefetch operands. Most of these fit -// into the NamedImmMapper scheme well: they either accept a named operand or -// any immediate under a particular value (which may be 0, implying no immediate -// is allowed). -defm dbarrier : namedimm<"dbarrier", "A64DB::DBarrierMapper">; -defm isb : namedimm<"isb", "A64ISB::ISBMapper">; -defm ic : namedimm<"ic", "A64IC::ICMapper">; -defm dc : namedimm<"dc", "A64DC::DCMapper">; -defm at : namedimm<"at", "A64AT::ATMapper">; -defm tlbi : namedimm<"tlbi", "A64TLBI::TLBIMapper">; - -// However, MRS and MSR are more complicated for a few reasons: -// * There are ~1000 generic names S3____ which have an -// implementation-defined effect -// * Most registers are shared, but some are read-only or write-only. -// * There is a variant of MSR which accepts the same register name (SPSel), -// but which would have a different encoding. - -// In principle these could be resolved in with more complicated subclasses of -// NamedImmMapper, however that imposes an overhead on other "named -// immediates". Both in concrete terms with virtual tables and in unnecessary -// abstraction. - -// The solution adopted here is to take the MRS/MSR Mappers out of the usual -// hierarchy (they're not derived from NamedImmMapper) and to add logic for -// their special situation. -def mrs_asmoperand : AsmOperandClass { - let Name = "MRS"; - let ParserMethod = "ParseSysRegOperand"; - let DiagnosticType = "MRS"; -} - -def mrs_op : Operand { - let ParserMatchClass = mrs_asmoperand; - let PrintMethod = "printMRSOperand"; - let DecoderMethod = "DecodeMRSOperand"; -} - -def msr_asmoperand : AsmOperandClass { - let Name = "MSRWithReg"; - - // Note that SPSel is valid for both this and the pstate operands, but with - // different immediate encodings. This is why these operands provide a string - // AArch64Operand rather than an immediate. The overlap is small enough that - // it could be resolved with hackery now, but who can say in future? - let ParserMethod = "ParseSysRegOperand"; - let DiagnosticType = "MSR"; -} - -def msr_op : Operand { - let ParserMatchClass = msr_asmoperand; - let PrintMethod = "printMSROperand"; - let DecoderMethod = "DecodeMSROperand"; -} - -def pstate_asmoperand : AsmOperandClass { - let Name = "MSRPState"; - // See comment above about parser. - let ParserMethod = "ParseSysRegOperand"; - let DiagnosticType = "MSR"; -} - -def pstate_op : Operand { - let ParserMatchClass = pstate_asmoperand; - let PrintMethod = "printNamedImmOperand"; - let DecoderMethod = "DecodeNamedImmOperand"; -} - -// When is specified, an assembler should accept something like "C4", not -// the usual "#4" immediate. -def CRx_asmoperand : AsmOperandClass { - let Name = "CRx"; - let PredicateMethod = "isUImm<4>"; - let RenderMethod = "addImmOperands"; - let ParserMethod = "ParseCRxOperand"; - // Diagnostics are handled in all cases by ParseCRxOperand. -} - -def CRx : Operand { - let ParserMatchClass = CRx_asmoperand; - let PrintMethod = "printCRxOperand"; -} - - -// Finally, we can start defining the instructions. - -// HINT is straightforward, with a few aliases. -def HINTi : A64I_system<0b0, (outs), (ins uimm7:$UImm7), "hint\t$UImm7", - [], NoItinerary> { - bits<7> UImm7; - let CRm = UImm7{6-3}; - let Op2 = UImm7{2-0}; - - let Op0 = 0b00; - let Op1 = 0b011; - let CRn = 0b0010; - let Rt = 0b11111; -} - -def : InstAlias<"nop", (HINTi 0)>; -def : InstAlias<"yield", (HINTi 1)>; -def : InstAlias<"wfe", (HINTi 2)>; -def : InstAlias<"wfi", (HINTi 3)>; -def : InstAlias<"sev", (HINTi 4)>; -def : InstAlias<"sevl", (HINTi 5)>; - -// Quite a few instructions then follow a similar pattern of fixing common -// fields in the bitpattern, we'll define a helper-class for them. -class simple_sys op0, bits<3> op1, bits<4> crn, bits<3> op2, - Operand operand, string asmop> - : A64I_system<0b0, (outs), (ins operand:$CRm), !strconcat(asmop, "\t$CRm"), - [], NoItinerary> { - let Op0 = op0; - let Op1 = op1; - let CRn = crn; - let Op2 = op2; - let Rt = 0b11111; -} - - -def CLREXi : simple_sys<0b00, 0b011, 0b0011, 0b010, uimm4, "clrex">; -def DSBi : simple_sys<0b00, 0b011, 0b0011, 0b100, dbarrier_op, "dsb">; -def DMBi : simple_sys<0b00, 0b011, 0b0011, 0b101, dbarrier_op, "dmb">; -def ISBi : simple_sys<0b00, 0b011, 0b0011, 0b110, isb_op, "isb">; - -def : InstAlias<"clrex", (CLREXi 0b1111)>; -def : InstAlias<"isb", (ISBi 0b1111)>; - -// (DMBi 0xb) is a "DMB ISH" instruciton, appropriate for Linux SMP -// configurations at least. -def : Pat<(atomic_fence imm, imm), (DMBi 0xb)>; - -// Any SYS bitpattern can be represented with a complex and opaque "SYS" -// instruction. -def SYSiccix : A64I_system<0b0, (outs), - (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm, - uimm3:$Op2, GPR64:$Rt), - "sys\t$Op1, $CRn, $CRm, $Op2, $Rt", - [], NoItinerary> { - let Op0 = 0b01; -} - -// You can skip the Xt argument whether it makes sense or not for the generic -// SYS instruction. -def : InstAlias<"sys $Op1, $CRn, $CRm, $Op2", - (SYSiccix uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2, XZR)>; - - -// But many have aliases, which obviously don't fit into -class SYSalias - : A64I_system<0b0, (outs), ins, asmstring, [], NoItinerary> { - let isAsmParserOnly = 1; - - bits<14> SysOp; - let Op0 = 0b01; - let Op1 = SysOp{13-11}; - let CRn = SysOp{10-7}; - let CRm = SysOp{6-3}; - let Op2 = SysOp{2-0}; -} - -def ICix : SYSalias<(ins ic_op:$SysOp, GPR64:$Rt), "ic\t$SysOp, $Rt">; - -def ICi : SYSalias<(ins ic_op:$SysOp), "ic\t$SysOp"> { - let Rt = 0b11111; -} - -def DCix : SYSalias<(ins dc_op:$SysOp, GPR64:$Rt), "dc\t$SysOp, $Rt">; -def ATix : SYSalias<(ins at_op:$SysOp, GPR64:$Rt), "at\t$SysOp, $Rt">; - -def TLBIix : SYSalias<(ins tlbi_op:$SysOp, GPR64:$Rt), "tlbi\t$SysOp, $Rt">; - -def TLBIi : SYSalias<(ins tlbi_op:$SysOp), "tlbi\t$SysOp"> { - let Rt = 0b11111; -} - - -def SYSLxicci : A64I_system<0b1, (outs GPR64:$Rt), - (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2), - "sysl\t$Rt, $Op1, $CRn, $CRm, $Op2", - [], NoItinerary> { - let Op0 = 0b01; -} - -// The instructions themselves are rather simple for MSR and MRS. -def MSRix : A64I_system<0b0, (outs), (ins msr_op:$SysReg, GPR64:$Rt), - "msr\t$SysReg, $Rt", [], NoItinerary> { - bits<16> SysReg; - let Op0 = SysReg{15-14}; - let Op1 = SysReg{13-11}; - let CRn = SysReg{10-7}; - let CRm = SysReg{6-3}; - let Op2 = SysReg{2-0}; -} - -def MRSxi : A64I_system<0b1, (outs GPR64:$Rt), (ins mrs_op:$SysReg), - "mrs\t$Rt, $SysReg", [], NoItinerary> { - bits<16> SysReg; - let Op0 = SysReg{15-14}; - let Op1 = SysReg{13-11}; - let CRn = SysReg{10-7}; - let CRm = SysReg{6-3}; - let Op2 = SysReg{2-0}; -} - -def MSRii : A64I_system<0b0, (outs), (ins pstate_op:$PState, uimm4:$CRm), - "msr\t$PState, $CRm", [], NoItinerary> { - bits<6> PState; - - let Op0 = 0b00; - let Op1 = PState{5-3}; - let CRn = 0b0100; - let Op2 = PState{2-0}; - let Rt = 0b11111; -} - -//===----------------------------------------------------------------------===// -// Test & branch (immediate) instructions -//===----------------------------------------------------------------------===// -// Contains: TBZ, TBNZ - -// The bit to test is a simple unsigned 6-bit immediate in the X-register -// versions. -def uimm6 : Operand { - let ParserMatchClass = uimm6_asmoperand; -} - -def label_wid14_scal4_asmoperand : label_asmoperand<14, 4>; - -def tbimm_target : Operand { - let EncoderMethod = "getLabelOpValue"; - - // This label is a 14-bit offset from PC, scaled by the instruction-width: 4. - let PrintMethod = "printLabelOperand<14, 4>"; - let ParserMatchClass = label_wid14_scal4_asmoperand; - - let OperandType = "OPERAND_PCREL"; -} - -def A64eq : ImmLeaf; -def A64ne : ImmLeaf; - -// These instructions correspond to patterns involving "and" with a power of -// two, which we need to be able to select. -def tstb64_pat : ComplexPattern">; -def tstb32_pat : ComplexPattern">; - -let isBranch = 1, isTerminator = 1 in { - def TBZxii : A64I_TBimm<0b0, (outs), - (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label), - "tbz\t$Rt, $Imm, $Label", - [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0), - A64eq, bb:$Label)], - NoItinerary>, - Sched<[WriteBr]>; - - def TBNZxii : A64I_TBimm<0b1, (outs), - (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label), - "tbnz\t$Rt, $Imm, $Label", - [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0), - A64ne, bb:$Label)], - NoItinerary>, - Sched<[WriteBr]>; - - - // Note, these instructions overlap with the above 64-bit patterns. This is - // intentional, "tbz x3, #1, somewhere" and "tbz w3, #1, somewhere" would both - // do the same thing and are both permitted assembly. They also both have - // sensible DAG patterns. - def TBZwii : A64I_TBimm<0b0, (outs), - (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label), - "tbz\t$Rt, $Imm, $Label", - [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0), - A64eq, bb:$Label)], - NoItinerary>, - Sched<[WriteBr]> { - let Imm{5} = 0b0; - } - - def TBNZwii : A64I_TBimm<0b1, (outs), - (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label), - "tbnz\t$Rt, $Imm, $Label", - [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0), - A64ne, bb:$Label)], - NoItinerary>, - Sched<[WriteBr]> { - let Imm{5} = 0b0; - } -} - -//===----------------------------------------------------------------------===// -// Unconditional branch (immediate) instructions -//===----------------------------------------------------------------------===// -// Contains: B, BL - -def label_wid26_scal4_asmoperand : label_asmoperand<26, 4>; - -def bimm_target : Operand { - let EncoderMethod = "getLabelOpValue"; - - // This label is a 26-bit offset from PC, scaled by the instruction-width: 4. - let PrintMethod = "printLabelOperand<26, 4>"; - let ParserMatchClass = label_wid26_scal4_asmoperand; - - let OperandType = "OPERAND_PCREL"; -} - -def blimm_target : Operand { - let EncoderMethod = "getLabelOpValue"; - - // This label is a 26-bit offset from PC, scaled by the instruction-width: 4. - let PrintMethod = "printLabelOperand<26, 4>"; - let ParserMatchClass = label_wid26_scal4_asmoperand; - - let OperandType = "OPERAND_PCREL"; -} - -class A64I_BimmImpl patterns, Operand lbl_type> - : A64I_Bimm, - Sched<[WriteBr]>; - -let isBranch = 1 in { - def Bimm : A64I_BimmImpl<0b0, "b", [(br bb:$Label)], bimm_target> { - let isTerminator = 1; - let isBarrier = 1; - } - - let SchedRW = [WriteBrL] in { - def BLimm : A64I_BimmImpl<0b1, "bl", - [(AArch64Call tglobaladdr:$Label)], blimm_target> { - let isCall = 1; - let Defs = [X30]; - } - } -} - -def : Pat<(AArch64Call texternalsym:$Label), (BLimm texternalsym:$Label)>; - -//===----------------------------------------------------------------------===// -// Unconditional branch (register) instructions -//===----------------------------------------------------------------------===// -// Contains: BR, BLR, RET, ERET, DRP. - -// Most of the notional opcode fields in the A64I_Breg format are fixed in A64 -// at the moment. -class A64I_BregImpl opc, - dag outs, dag ins, string asmstr, list patterns, - InstrItinClass itin = NoItinerary> - : A64I_Breg, - Sched<[WriteBr]> { - let isBranch = 1; - let isIndirectBranch = 1; -} - -// Note that these are not marked isCall or isReturn because as far as LLVM is -// concerned they're not. "ret" is just another jump unless it has been selected -// by LLVM as the function's return. - -let isBranch = 1 in { - def BRx : A64I_BregImpl<0b0000,(outs), (ins GPR64:$Rn), - "br\t$Rn", [(brind i64:$Rn)]> { - let isBarrier = 1; - let isTerminator = 1; - } - - let SchedRW = [WriteBrL] in { - def BLRx : A64I_BregImpl<0b0001, (outs), (ins GPR64:$Rn), - "blr\t$Rn", [(AArch64Call i64:$Rn)]> { - let isBarrier = 0; - let isCall = 1; - let Defs = [X30]; - } - } - - def RETx : A64I_BregImpl<0b0010, (outs), (ins GPR64:$Rn), - "ret\t$Rn", []> { - let isBarrier = 1; - let isTerminator = 1; - let isReturn = 1; - } - - // Create a separate pseudo-instruction for codegen to use so that we don't - // flag x30 as used in every function. It'll be restored before the RET by the - // epilogue if it's legitimately used. - def RET : A64PseudoExpand<(outs), (ins), [(A64ret)], (RETx (ops X30))> { - let isTerminator = 1; - let isBarrier = 1; - let isReturn = 1; - } - - def ERET : A64I_BregImpl<0b0100, (outs), (ins), "eret", []> { - let Rn = 0b11111; - let isBarrier = 1; - let isTerminator = 1; - let isReturn = 1; - } - - def DRPS : A64I_BregImpl<0b0101, (outs), (ins), "drps", []> { - let Rn = 0b11111; - let isBarrier = 1; - } -} - -def RETAlias : InstAlias<"ret", (RETx X30)>; - - -//===----------------------------------------------------------------------===// -// Address generation patterns -//===----------------------------------------------------------------------===// - -// Primary method of address generation for the small/absolute memory model is -// an ADRP/ADR pair: -// ADRP x0, some_variable -// ADD x0, x0, #:lo12:some_variable -// -// The load/store elision of the ADD is accomplished when selecting -// addressing-modes. This just mops up the cases where that doesn't work and we -// really need an address in some register. - -// This wrapper applies a LO12 modifier to the address. Otherwise we could just -// use the same address. - -class ADRP_ADD - : Pat<(Wrapper addrop:$Hi, addrop:$Lo12, (i32 imm)), - (ADDxxi_lsl0_s (ADRPxi addrop:$Hi), addrop:$Lo12)>; - -def : ADRP_ADD; -def : ADRP_ADD; -def : ADRP_ADD; -def : ADRP_ADD; -def : ADRP_ADD; -def : ADRP_ADD; - -//===----------------------------------------------------------------------===// -// GOT access patterns -//===----------------------------------------------------------------------===// - -class GOTLoadSmall - : Pat<(A64GOTLoad (A64WrapperSmall addrfrag:$Hi, addrfrag:$Lo12, 8)), - (LS64_LDR (ADRPxi addrfrag:$Hi), addrfrag:$Lo12)>; - -def : GOTLoadSmall; -def : GOTLoadSmall; -def : GOTLoadSmall; - -//===----------------------------------------------------------------------===// -// Tail call handling -//===----------------------------------------------------------------------===// - -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [XSP] in { - def TC_RETURNdi - : PseudoInst<(outs), (ins i64imm:$dst, i32imm:$FPDiff), - [(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff))]>; - - def TC_RETURNxi - : PseudoInst<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), - [(AArch64tcret i64:$dst, (i32 timm:$FPDiff))]>; -} - -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, - Uses = [XSP] in { - def TAIL_Bimm : A64PseudoExpand<(outs), (ins bimm_target:$Label), [], - (Bimm bimm_target:$Label)>; - - def TAIL_BRx : A64PseudoExpand<(outs), (ins tcGPR64:$Rd), [], - (BRx GPR64:$Rd)>; -} - - -def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), - (TC_RETURNdi texternalsym:$dst, imm:$FPDiff)>; - -//===----------------------------------------------------------------------===// -// Thread local storage -//===----------------------------------------------------------------------===// - -// This is a pseudo-instruction representing the ".tlsdesccall" directive in -// assembly. Its effect is to insert an R_AARCH64_TLSDESC_CALL relocation at the -// current location. It should always be immediately followed by a BLR -// instruction, and is intended solely for relaxation by the linker. - -def : Pat<(A64threadpointer), (MRSxi 0xde82)>; - -def TLSDESCCALL : PseudoInst<(outs), (ins i64imm:$Lbl), []> { - let hasSideEffects = 1; -} - -def TLSDESC_BLRx : PseudoInst<(outs), (ins GPR64:$Rn, i64imm:$Var), - [(A64tlsdesc_blr i64:$Rn, tglobaltlsaddr:$Var)]> { - let isCall = 1; - let Defs = [X30]; -} - -def : Pat<(A64tlsdesc_blr i64:$Rn, texternalsym:$Var), - (TLSDESC_BLRx $Rn, texternalsym:$Var)>; - -//===----------------------------------------------------------------------===// -// Bitfield patterns -//===----------------------------------------------------------------------===// - -def bfi32_lsb_to_immr : SDNodeXFormgetTargetConstant((32 - N->getZExtValue()) % 32, MVT::i64); -}]>; - -def bfi64_lsb_to_immr : SDNodeXFormgetTargetConstant((64 - N->getZExtValue()) % 64, MVT::i64); -}]>; - -def bfi_width_to_imms : SDNodeXFormgetTargetConstant(N->getZExtValue() - 1, MVT::i64); -}]>; - - -// The simpler patterns deal with cases where no AND mask is actually needed -// (either all bits are used or the low 32 bits are used). -let AddedComplexity = 10 in { - -def : Pat<(A64Bfi i64:$src, i64:$Rn, imm:$ImmR, imm:$ImmS), - (BFIxxii $src, $Rn, - (bfi64_lsb_to_immr (i64 imm:$ImmR)), - (bfi_width_to_imms (i64 imm:$ImmS)))>; - -def : Pat<(A64Bfi i32:$src, i32:$Rn, imm:$ImmR, imm:$ImmS), - (BFIwwii $src, $Rn, - (bfi32_lsb_to_immr (i64 imm:$ImmR)), - (bfi_width_to_imms (i64 imm:$ImmS)))>; - - -def : Pat<(and (A64Bfi i64:$src, i64:$Rn, imm:$ImmR, imm:$ImmS), - (i64 4294967295)), - (SUBREG_TO_REG (i64 0), - (BFIwwii (EXTRACT_SUBREG $src, sub_32), - (EXTRACT_SUBREG $Rn, sub_32), - (bfi32_lsb_to_immr (i64 imm:$ImmR)), - (bfi_width_to_imms (i64 imm:$ImmS))), - sub_32)>; - -} - -//===----------------------------------------------------------------------===// -// Miscellaneous patterns -//===----------------------------------------------------------------------===// - -// Truncation from 64 to 32-bits just involves renaming your register. -def : Pat<(i32 (trunc i64:$val)), (EXTRACT_SUBREG $val, sub_32)>; - -// Similarly, extension where we don't care about the high bits is -// just a rename. -def : Pat<(i64 (anyext i32:$val)), - (INSERT_SUBREG (IMPLICIT_DEF), $val, sub_32)>; - -// SELECT instructions providing f128 types need to be handled by a -// pseudo-instruction since the eventual code will need to introduce basic -// blocks and control flow. -def F128CSEL : PseudoInst<(outs FPR128:$Rd), - (ins FPR128:$Rn, FPR128:$Rm, cond_code_op:$Cond), - [(set f128:$Rd, (simple_select f128:$Rn, f128:$Rm))]> { - let Uses = [NZCV]; - let usesCustomInserter = 1; -} - -//===----------------------------------------------------------------------===// -// Load/store patterns -//===----------------------------------------------------------------------===// - -// There are lots of patterns here, because we need to allow at least three -// parameters to vary independently. -// 1. Instruction: "ldrb w9, [sp]", "ldrh w9, [sp]", ... -// 2. LLVM source: zextloadi8, anyextloadi8, ... -// 3. Address-generation: A64Wrapper, (add BASE, OFFSET), ... -// -// The biggest problem turns out to be the address-generation variable. At the -// point of instantiation we need to produce two DAGs, one for the pattern and -// one for the instruction. Doing this at the lowest level of classes doesn't -// work. -// -// Consider the simple uimm12 addressing mode, and the desire to match both (add -// GPR64xsp:$Rn, uimm12:$Offset) and GPR64xsp:$Rn, particularly on the -// instruction side. We'd need to insert either "GPR64xsp" and "uimm12" or -// "GPR64xsp" and "0" into an unknown dag. !subst is not capable of this -// operation, and PatFrags are for selection not output. -// -// As a result, the address-generation patterns are the final -// instantiations. However, we do still need to vary the operand for the address -// further down (At the point we're deciding A64WrapperSmall, we don't know -// the memory width of the operation). - -//===------------------------------ -// 1. Basic infrastructural defs -//===------------------------------ - -// First, some simple classes for !foreach and !subst to use: -class Decls { - dag pattern; -} - -def decls : Decls; -def ALIGN; -def INST; -def OFFSET; -def SHIFT; - -// You can't use !subst on an actual immediate, but you *can* use it on an -// operand record that happens to match a single immediate. So we do. -def imm_eq0 : ImmLeaf; -def imm_eq1 : ImmLeaf; -def imm_eq2 : ImmLeaf; -def imm_eq3 : ImmLeaf; -def imm_eq4 : ImmLeaf; - -// If the low bits of a pointer are known to be 0 then an "or" is just as good -// as addition for computing an offset. This fragment forwards that check for -// TableGen's use. -def add_like_or : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs), -[{ - return CurDAG->isBaseWithConstantOffset(SDValue(N, 0)); -}]>; - -// Load/store (unsigned immediate) operations with relocations against global -// symbols (for lo12) are only valid if those symbols have correct alignment -// (since the immediate offset is divided by the access scale, it can't have a -// remainder). -// -// The guaranteed alignment is provided as part of the WrapperSmall -// operation, and checked against one of these. -def any_align : ImmLeaf; -def min_align2 : ImmLeaf= 2; }]>; -def min_align4 : ImmLeaf= 4; }]>; -def min_align8 : ImmLeaf= 8; }]>; -def min_align16 : ImmLeaf= 16; }]>; - -// "Normal" load/store instructions can be used on atomic operations, provided -// the ordering parameter is at most "monotonic". Anything above that needs -// special handling with acquire/release instructions. -class simple_load - : PatFrag<(ops node:$ptr), (base node:$ptr), [{ - return cast(N)->getOrdering() <= Monotonic; -}]>; - -def atomic_load_simple_i8 : simple_load; -def atomic_load_simple_i16 : simple_load; -def atomic_load_simple_i32 : simple_load; -def atomic_load_simple_i64 : simple_load; - -class simple_store - : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ - return cast(N)->getOrdering() <= Monotonic; -}]>; - -def atomic_store_simple_i8 : simple_store; -def atomic_store_simple_i16 : simple_store; -def atomic_store_simple_i32 : simple_store; -def atomic_store_simple_i64 : simple_store; - -//===------------------------------ -// 2. UImm12 and SImm9 -//===------------------------------ - -// These instructions have two operands providing the address so they can be -// treated similarly for most purposes. - -//===------------------------------ -// 2.1 Base patterns covering extend/truncate semantics -//===------------------------------ - -// Atomic patterns can be shared between integer operations of all sizes, a -// quick multiclass here allows reuse. -multiclass ls_atomic_pats { - def : Pat<(!cast("atomic_load_simple_" # sty) address), - (LOAD Base, Offset)>; - - def : Pat<(!cast("atomic_store_simple_" # sty) address, transty:$Rt), - (STORE $Rt, Base, Offset)>; -} - -// Instructions accessing a memory chunk smaller than a register (or, in a -// pinch, the same size) have a characteristic set of patterns they want to -// match: extending loads and truncating stores. This class deals with the -// sign-neutral version of those patterns. -// -// It will be instantiated across multiple addressing-modes. -multiclass ls_small_pats - : ls_atomic_pats { - def : Pat<(!cast(zextload # sty) address), (LOAD Base, Offset)>; - - def : Pat<(!cast(extload # sty) address), (LOAD Base, Offset)>; - - // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit - // register was actually set. - def : Pat<(i64 (!cast(zextload # sty) address)), - (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>; - - def : Pat<(i64 (!cast(extload # sty) address)), - (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>; - - def : Pat<(!cast(truncstore # sty) i32:$Rt, address), - (STORE $Rt, Base, Offset)>; - - // For truncating store from 64-bits, we have to manually tell LLVM to - // ignore the high bits of the x register. - def : Pat<(!cast(truncstore # sty) i64:$Rt, address), - (STORE (EXTRACT_SUBREG $Rt, sub_32), Base, Offset)>; -} - -// Next come patterns for sign-extending loads. -multiclass load_signed_pats { - def : Pat<(i32 (!cast("sextload" # sty) address)), - (!cast("LDRS" # T # "w" # U) Base, Offset)>; - - def : Pat<(i64 (!cast("sextload" # sty) address)), - (!cast("LDRS" # T # "x" # U) Base, Offset)>; - -} - -// and finally "natural-width" loads and stores come next. -multiclass ls_neutral_pats { - def : Pat<(sty (load address)), (LOAD Base, Offset)>; - def : Pat<(store sty:$Rt, address), (STORE $Rt, Base, Offset)>; -} - -// Integer operations also get atomic instructions to select for. -multiclass ls_int_neutral_pats - : ls_neutral_pats, - ls_atomic_pats; - -//===------------------------------ -// 2.2. Addressing-mode instantiations -//===------------------------------ - -multiclass uimm12_pats { - defm : ls_small_pats; - defm : ls_small_pats; - defm : ls_small_pats; - - defm : ls_int_neutral_pats; - - defm : ls_int_neutral_pats; - - defm : ls_neutral_pats; - - defm : ls_neutral_pats; - - defm : ls_neutral_pats; - - defm : ls_neutral_pats; - - defm : load_signed_pats<"B", "", Base, - !foreach(decls.pattern, Offset, - !subst(OFFSET, byte_uimm12, decls.pattern)), - !foreach(decls.pattern, address, - !subst(OFFSET, byte_uimm12, - !subst(ALIGN, any_align, decls.pattern))), - i8>; - - defm : load_signed_pats<"H", "", Base, - !foreach(decls.pattern, Offset, - !subst(OFFSET, hword_uimm12, decls.pattern)), - !foreach(decls.pattern, address, - !subst(OFFSET, hword_uimm12, - !subst(ALIGN, min_align2, decls.pattern))), - i16>; - - def : Pat<(sextloadi32 !foreach(decls.pattern, address, - !subst(OFFSET, word_uimm12, - !subst(ALIGN, min_align4, decls.pattern)))), - (LDRSWx Base, !foreach(decls.pattern, Offset, - !subst(OFFSET, word_uimm12, decls.pattern)))>; -} - -// Straightforward patterns of last resort: a pointer with or without an -// appropriate offset. -defm : uimm12_pats<(i64 i64:$Rn), (i64 i64:$Rn), (i64 0)>; -defm : uimm12_pats<(add i64:$Rn, OFFSET:$UImm12), - (i64 i64:$Rn), (i64 OFFSET:$UImm12)>; - -// The offset could be hidden behind an "or", of course: -defm : uimm12_pats<(add_like_or i64:$Rn, OFFSET:$UImm12), - (i64 i64:$Rn), (i64 OFFSET:$UImm12)>; - -// Global addresses under the small-absolute model should use these -// instructions. There are ELF relocations specifically for it. -defm : uimm12_pats<(A64WrapperSmall tglobaladdr:$Hi, tglobaladdr:$Lo12, ALIGN), - (ADRPxi tglobaladdr:$Hi), (i64 tglobaladdr:$Lo12)>; - -defm : uimm12_pats<(A64WrapperSmall tglobaltlsaddr:$Hi, tglobaltlsaddr:$Lo12, - ALIGN), - (ADRPxi tglobaltlsaddr:$Hi), (i64 tglobaltlsaddr:$Lo12)>; - -// External symbols that make it this far should also get standard relocations. -defm : uimm12_pats<(A64WrapperSmall texternalsym:$Hi, texternalsym:$Lo12, - ALIGN), - (ADRPxi texternalsym:$Hi), (i64 texternalsym:$Lo12)>; - -defm : uimm12_pats<(A64WrapperSmall tconstpool:$Hi, tconstpool:$Lo12, ALIGN), - (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>; - -// We also want to use uimm12 instructions for local variables at the moment. -def tframeindex_XFORM : SDNodeXForm(N)->getIndex(); - return CurDAG->getTargetFrameIndex(FI, MVT::i64); -}]>; - -defm : uimm12_pats<(i64 frameindex:$Rn), - (tframeindex_XFORM tframeindex:$Rn), (i64 0)>; - -// These can be much simpler than uimm12 because we don't to change the operand -// type (e.g. LDURB and LDURH take the same operands). -multiclass simm9_pats { - defm : ls_small_pats; - defm : ls_small_pats; - - defm : ls_int_neutral_pats; - defm : ls_int_neutral_pats; - - defm : ls_neutral_pats; - defm : ls_neutral_pats; - defm : ls_neutral_pats; - defm : ls_neutral_pats; - - def : Pat<(i64 (zextloadi32 address)), - (SUBREG_TO_REG (i64 0), (LS32_LDUR Base, Offset), sub_32)>; - - def : Pat<(truncstorei32 i64:$Rt, address), - (LS32_STUR (EXTRACT_SUBREG $Rt, sub_32), Base, Offset)>; - - defm : load_signed_pats<"B", "_U", Base, Offset, address, i8>; - defm : load_signed_pats<"H", "_U", Base, Offset, address, i16>; - def : Pat<(sextloadi32 address), (LDURSWx Base, Offset)>; -} - -defm : simm9_pats<(add i64:$Rn, simm9:$SImm9), - (i64 $Rn), (SDXF_simm9 simm9:$SImm9)>; - -defm : simm9_pats<(add_like_or i64:$Rn, simm9:$SImm9), - (i64 $Rn), (SDXF_simm9 simm9:$SImm9)>; - - -//===------------------------------ -// 3. Register offset patterns -//===------------------------------ - -// Atomic patterns can be shared between integer operations of all sizes, a -// quick multiclass here allows reuse. -multiclass ro_atomic_pats { - def : Pat<(!cast("atomic_load_simple_" # sty) address), - (LOAD Base, Offset, Extend)>; - - def : Pat<(!cast("atomic_store_simple_" # sty) address, transty:$Rt), - (STORE $Rt, Base, Offset, Extend)>; -} - -// The register offset instructions take three operands giving the instruction, -// and have an annoying split between instructions where Rm is 32-bit and -// 64-bit. So we need a special hierarchy to describe them. Other than that the -// same operations should be supported as for simm9 and uimm12 addressing. - -multiclass ro_small_pats - : ro_atomic_pats { - def : Pat<(!cast(zextload # sty) address), - (LOAD Base, Offset, Extend)>; - - def : Pat<(!cast(extload # sty) address), - (LOAD Base, Offset, Extend)>; - - // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit - // register was actually set. - def : Pat<(i64 (!cast(zextload # sty) address)), - (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>; - - def : Pat<(i64 (!cast(extload # sty) address)), - (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>; - - def : Pat<(!cast(truncstore # sty) i32:$Rt, address), - (STORE $Rt, Base, Offset, Extend)>; - - // For truncating store from 64-bits, we have to manually tell LLVM to - // ignore the high bits of the x register. - def : Pat<(!cast(truncstore # sty) i64:$Rt, address), - (STORE (EXTRACT_SUBREG $Rt, sub_32), Base, Offset, Extend)>; - -} - -// Next come patterns for sign-extending loads. -multiclass ro_signed_pats { - def : Pat<(i32 (!cast("sextload" # sty) address)), - (!cast("LDRS" # T # "w_" # Rm # "_RegOffset") - Base, Offset, Extend)>; - - def : Pat<(i64 (!cast("sextload" # sty) address)), - (!cast("LDRS" # T # "x_" # Rm # "_RegOffset") - Base, Offset, Extend)>; -} - -// and finally "natural-width" loads and stores come next. -multiclass ro_neutral_pats { - def : Pat<(sty (load address)), (LOAD Base, Offset, Extend)>; - def : Pat<(store sty:$Rt, address), - (STORE $Rt, Base, Offset, Extend)>; -} - -multiclass ro_int_neutral_pats - : ro_neutral_pats, - ro_atomic_pats; - -multiclass regoff_pats { - defm : ro_small_pats("LS8_" # Rm # "_RegOffset_LDR"), - !cast("LS8_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq0, decls.pattern)), - i8>; - defm : ro_small_pats("LS16_" # Rm # "_RegOffset_LDR"), - !cast("LS16_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq1, decls.pattern)), - i16>; - defm : ro_small_pats("LS32_" # Rm # "_RegOffset_LDR"), - !cast("LS32_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq2, decls.pattern)), - i32>; - - defm : ro_int_neutral_pats< - !cast("LS32_" # Rm # "_RegOffset_LDR"), - !cast("LS32_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq2, decls.pattern)), - i32>; - - defm : ro_int_neutral_pats< - !cast("LS64_" # Rm # "_RegOffset_LDR"), - !cast("LS64_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq3, decls.pattern)), - i64>; - - defm : ro_neutral_pats("LSFP16_" # Rm # "_RegOffset_LDR"), - !cast("LSFP16_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq1, decls.pattern)), - f16>; - - defm : ro_neutral_pats("LSFP32_" # Rm # "_RegOffset_LDR"), - !cast("LSFP32_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq2, decls.pattern)), - f32>; - - defm : ro_neutral_pats("LSFP64_" # Rm # "_RegOffset_LDR"), - !cast("LSFP64_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq3, decls.pattern)), - f64>; - - defm : ro_neutral_pats("LSFP128_" # Rm # "_RegOffset_LDR"), - !cast("LSFP128_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq4, decls.pattern)), - f128>; - - defm : ro_signed_pats<"B", Rm, Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq0, decls.pattern)), - i8>; - - defm : ro_signed_pats<"H", Rm, Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq1, decls.pattern)), - i16>; - - def : Pat<(sextloadi32 !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq2, decls.pattern))), - (!cast("LDRSWx_" # Rm # "_RegOffset") - Base, Offset, Extend)>; -} - - -// Finally we're in a position to tell LLVM exactly what addresses are reachable -// using register-offset instructions. Essentially a base plus a possibly -// extended, possibly shifted (by access size) offset. - -defm : regoff_pats<"Wm", (add i64:$Rn, (sext i32:$Rm)), - (i64 i64:$Rn), (i32 i32:$Rm), (i64 6)>; - -defm : regoff_pats<"Wm", (add i64:$Rn, (shl (sext i32:$Rm), SHIFT)), - (i64 i64:$Rn), (i32 i32:$Rm), (i64 7)>; - -defm : regoff_pats<"Wm", (add i64:$Rn, (zext i32:$Rm)), - (i64 i64:$Rn), (i32 i32:$Rm), (i64 2)>; - -defm : regoff_pats<"Wm", (add i64:$Rn, (shl (zext i32:$Rm), SHIFT)), - (i64 i64:$Rn), (i32 i32:$Rm), (i64 3)>; - -defm : regoff_pats<"Xm", (add i64:$Rn, i64:$Rm), - (i64 i64:$Rn), (i64 i64:$Rm), (i64 2)>; - -defm : regoff_pats<"Xm", (add i64:$Rn, (shl i64:$Rm, SHIFT)), - (i64 i64:$Rn), (i64 i64:$Rm), (i64 3)>; - -//===----------------------------------------------------------------------===// -// Advanced SIMD (NEON) Support -// - -include "AArch64InstrNEON.td" diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td deleted file mode 100644 index 01a59a1a6a8b..000000000000 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ /dev/null @@ -1,9474 +0,0 @@ -//===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes the AArch64 NEON instruction set. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// NEON-specific DAG Nodes. -//===----------------------------------------------------------------------===// - -// (outs Result), (ins Imm, OpCmode) -def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>; - -def Neon_movi : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>; - -def Neon_mvni : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>; - -// (outs Result), (ins Imm) -def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1, - [SDTCisVec<0>, SDTCisVT<1, i32>]>>; - -// (outs Result), (ins LHS, RHS, CondCode) -def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3, - [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>; - -// (outs Result), (ins LHS, 0/0.0 constant, CondCode) -def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3, - [SDTCisVec<0>, SDTCisVec<1>]>>; - -// (outs Result), (ins LHS, RHS) -def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2, - [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>; - -def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, - SDTCisVT<2, i32>]>; -def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>; -def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>; - -def SDTPERMUTE : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>]>; -def Neon_uzp1 : SDNode<"AArch64ISD::NEON_UZP1", SDTPERMUTE>; -def Neon_uzp2 : SDNode<"AArch64ISD::NEON_UZP2", SDTPERMUTE>; -def Neon_zip1 : SDNode<"AArch64ISD::NEON_ZIP1", SDTPERMUTE>; -def Neon_zip2 : SDNode<"AArch64ISD::NEON_ZIP2", SDTPERMUTE>; -def Neon_trn1 : SDNode<"AArch64ISD::NEON_TRN1", SDTPERMUTE>; -def Neon_trn2 : SDNode<"AArch64ISD::NEON_TRN2", SDTPERMUTE>; - -def SDTVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; -def Neon_rev64 : SDNode<"AArch64ISD::NEON_REV64", SDTVSHUF>; -def Neon_rev32 : SDNode<"AArch64ISD::NEON_REV32", SDTVSHUF>; -def Neon_rev16 : SDNode<"AArch64ISD::NEON_REV16", SDTVSHUF>; -def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1, - [SDTCisVec<0>]>>; -def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2, - [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>; -def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3, - [SDTCisVec<0>, SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>; - -//===----------------------------------------------------------------------===// -// Addressing-mode instantiations -//===----------------------------------------------------------------------===// - -multiclass ls_64_pats { -defm : ls_neutral_pats; -} - -multiclass ls_128_pats { -defm : ls_neutral_pats; -} - -multiclass uimm12_neon_pats { - defm : ls_64_pats; - defm : ls_64_pats; - defm : ls_64_pats; - defm : ls_64_pats; - defm : ls_64_pats; - defm : ls_64_pats; - - defm : ls_128_pats; - defm : ls_128_pats; - defm : ls_128_pats; - defm : ls_128_pats; - defm : ls_128_pats; - defm : ls_128_pats; -} - -defm : uimm12_neon_pats<(A64WrapperSmall - tconstpool:$Hi, tconstpool:$Lo12, ALIGN), - (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>; - -//===----------------------------------------------------------------------===// -// Multiclasses -//===----------------------------------------------------------------------===// - -multiclass NeonI_3VSame_B_sizes size, bits<5> opcode, - string asmop, SDPatternOperator opnode8B, - SDPatternOperator opnode16B, - bit Commutable = 0> { - let isCommutable = Commutable in { - def _8B : NeonI_3VSame<0b0, u, size, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), - asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b", - [(set (v8i8 VPR64:$Rd), - (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def _16B : NeonI_3VSame<0b1, u, size, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b", - [(set (v16i8 VPR128:$Rd), - (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } - -} - -multiclass NeonI_3VSame_HS_sizes opcode, - string asmop, SDPatternOperator opnode, - bit Commutable = 0> { - let isCommutable = Commutable in { - def _4H : NeonI_3VSame<0b0, u, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), - asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h", - [(set (v4i16 VPR64:$Rd), - (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def _8H : NeonI_3VSame<0b1, u, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h", - [(set (v8i16 VPR128:$Rd), - (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def _2S : NeonI_3VSame<0b0, u, 0b10, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), - asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s", - [(set (v2i32 VPR64:$Rd), - (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def _4S : NeonI_3VSame<0b1, u, 0b10, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s", - [(set (v4i32 VPR128:$Rd), - (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } -} -multiclass NeonI_3VSame_BHS_sizes opcode, - string asmop, SDPatternOperator opnode, - bit Commutable = 0> - : NeonI_3VSame_HS_sizes { - let isCommutable = Commutable in { - def _8B : NeonI_3VSame<0b0, u, 0b00, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), - asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b", - [(set (v8i8 VPR64:$Rd), - (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def _16B : NeonI_3VSame<0b1, u, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b", - [(set (v16i8 VPR128:$Rd), - (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } -} - -multiclass NeonI_3VSame_BHSD_sizes opcode, - string asmop, SDPatternOperator opnode, - bit Commutable = 0> - : NeonI_3VSame_BHS_sizes { - let isCommutable = Commutable in { - def _2D : NeonI_3VSame<0b1, u, 0b11, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d", - [(set (v2i64 VPR128:$Rd), - (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } -} - -// Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types, -// but Result types can be integer or floating point types. -multiclass NeonI_3VSame_SD_sizes opcode, - string asmop, SDPatternOperator opnode, - ValueType ResTy2S, ValueType ResTy4S, - ValueType ResTy2D, bit Commutable = 0> { - let isCommutable = Commutable in { - def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), - asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s", - [(set (ResTy2S VPR64:$Rd), - (ResTy2S (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s", - [(set (ResTy4S VPR128:$Rd), - (ResTy4S (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d", - [(set (ResTy2D VPR128:$Rd), - (ResTy2D (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } -} - -//===----------------------------------------------------------------------===// -// Instruction Definitions -//===----------------------------------------------------------------------===// - -// Vector Arithmetic Instructions - -// Vector Add (Integer and Floating-Point) - -defm ADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>; -defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, - v2f32, v4f32, v2f64, 1>; - -// Patterns to match add of v1i8/v1i16/v1i32 types -def : Pat<(v1i8 (add FPR8:$Rn, FPR8:$Rm)), - (EXTRACT_SUBREG - (ADDvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)), - sub_8)>; -def : Pat<(v1i16 (add FPR16:$Rn, FPR16:$Rm)), - (EXTRACT_SUBREG - (ADDvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)), - sub_16)>; -def : Pat<(v1i32 (add FPR32:$Rn, FPR32:$Rm)), - (EXTRACT_SUBREG - (ADDvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)), - sub_32)>; - -// Vector Sub (Integer and Floating-Point) - -defm SUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>; -defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, - v2f32, v4f32, v2f64, 0>; - -// Patterns to match sub of v1i8/v1i16/v1i32 types -def : Pat<(v1i8 (sub FPR8:$Rn, FPR8:$Rm)), - (EXTRACT_SUBREG - (SUBvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)), - sub_8)>; -def : Pat<(v1i16 (sub FPR16:$Rn, FPR16:$Rm)), - (EXTRACT_SUBREG - (SUBvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)), - sub_16)>; -def : Pat<(v1i32 (sub FPR32:$Rn, FPR32:$Rm)), - (EXTRACT_SUBREG - (SUBvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)), - sub_32)>; - -// Vector Multiply (Integer and Floating-Point) - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { -defm MULvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>; -defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, - v2f32, v4f32, v2f64, 1>; -} - -// Patterns to match mul of v1i8/v1i16/v1i32 types -def : Pat<(v1i8 (mul FPR8:$Rn, FPR8:$Rm)), - (EXTRACT_SUBREG - (MULvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)), - sub_8)>; -def : Pat<(v1i16 (mul FPR16:$Rn, FPR16:$Rm)), - (EXTRACT_SUBREG - (MULvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)), - sub_16)>; -def : Pat<(v1i32 (mul FPR32:$Rn, FPR32:$Rm)), - (EXTRACT_SUBREG - (MULvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)), - sub_32)>; - -// Vector Multiply (Polynomial) - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { -defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul", - int_arm_neon_vmulp, int_arm_neon_vmulp, 1>; -} - -// Vector Multiply-accumulate and Multiply-subtract (Integer) - -// class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and -// two operands constraints. -class NeonI_3VSame_Constraint_impl size, - bits<5> opcode, SDPatternOperator opnode> - : NeonI_3VSame, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; -} - -def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (add node:$Ra, (mul node:$Rn, node:$Rm))>; - -def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (sub node:$Ra, (mul node:$Rn, node:$Rm))>; - - -let SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC] in { -def MLAvvv_8B: NeonI_3VSame_Constraint_impl<"mla", ".8b", VPR64, v8i8, - 0b0, 0b0, 0b00, 0b10010, Neon_mla>; -def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8, - 0b1, 0b0, 0b00, 0b10010, Neon_mla>; -def MLAvvv_4H: NeonI_3VSame_Constraint_impl<"mla", ".4h", VPR64, v4i16, - 0b0, 0b0, 0b01, 0b10010, Neon_mla>; -def MLAvvv_8H: NeonI_3VSame_Constraint_impl<"mla", ".8h", VPR128, v8i16, - 0b1, 0b0, 0b01, 0b10010, Neon_mla>; -def MLAvvv_2S: NeonI_3VSame_Constraint_impl<"mla", ".2s", VPR64, v2i32, - 0b0, 0b0, 0b10, 0b10010, Neon_mla>; -def MLAvvv_4S: NeonI_3VSame_Constraint_impl<"mla", ".4s", VPR128, v4i32, - 0b1, 0b0, 0b10, 0b10010, Neon_mla>; - -def MLSvvv_8B: NeonI_3VSame_Constraint_impl<"mls", ".8b", VPR64, v8i8, - 0b0, 0b1, 0b00, 0b10010, Neon_mls>; -def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8, - 0b1, 0b1, 0b00, 0b10010, Neon_mls>; -def MLSvvv_4H: NeonI_3VSame_Constraint_impl<"mls", ".4h", VPR64, v4i16, - 0b0, 0b1, 0b01, 0b10010, Neon_mls>; -def MLSvvv_8H: NeonI_3VSame_Constraint_impl<"mls", ".8h", VPR128, v8i16, - 0b1, 0b1, 0b01, 0b10010, Neon_mls>; -def MLSvvv_2S: NeonI_3VSame_Constraint_impl<"mls", ".2s", VPR64, v2i32, - 0b0, 0b1, 0b10, 0b10010, Neon_mls>; -def MLSvvv_4S: NeonI_3VSame_Constraint_impl<"mls", ".4s", VPR128, v4i32, - 0b1, 0b1, 0b10, 0b10010, Neon_mls>; -} - -// Vector Multiply-accumulate and Multiply-subtract (Floating Point) - -def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (fadd node:$Ra, (fmul_su node:$Rn, node:$Rm))>; - -def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (fsub node:$Ra, (fmul_su node:$Rn, node:$Rm))>; - -let Predicates = [HasNEON, UseFusedMAC], - SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC] in { -def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s", VPR64, v2f32, - 0b0, 0b0, 0b00, 0b11001, Neon_fmla>; -def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s", VPR128, v4f32, - 0b1, 0b0, 0b00, 0b11001, Neon_fmla>; -def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d", VPR128, v2f64, - 0b1, 0b0, 0b01, 0b11001, Neon_fmla>; - -def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s", VPR64, v2f32, - 0b0, 0b0, 0b10, 0b11001, Neon_fmls>; -def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s", VPR128, v4f32, - 0b1, 0b0, 0b10, 0b11001, Neon_fmls>; -def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d", VPR128, v2f64, - 0b1, 0b0, 0b11, 0b11001, Neon_fmls>; -} - -// We're also allowed to match the fma instruction regardless of compile -// options. -def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)), - (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>; -def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)), - (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>; -def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)), - (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>; - -def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)), - (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>; -def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)), - (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>; -def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)), - (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>; - -// Vector Divide (Floating-Point) - -let SchedRW = [WriteFPDiv, ReadFPDiv, ReadFPDiv] in { -defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, - v2f32, v4f32, v2f64, 0>; -} - -// Vector Bitwise Operations - -// Vector Bitwise AND - -defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>; - -// Vector Bitwise Exclusive OR - -defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>; - -// Vector Bitwise OR - -defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>; - -// ORR disassembled as MOV if Vn==Vm - -// Vector Move - register -// Alias for ORR if Vn=Vm. -def : NeonInstAlias<"mov $Rd.8b, $Rn.8b", - (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn)>; -def : NeonInstAlias<"mov $Rd.16b, $Rn.16b", - (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn)>; - -// The MOVI instruction takes two immediate operands. The first is the -// immediate encoding, while the second is the cmode. A cmode of 14, or -// 0b1110, produces a MOVI operation, rather than a MVNI, ORR, or BIC. -def Neon_AllZero : PatFrag<(ops), (Neon_movi (i32 0), (i32 14))>; -def Neon_AllOne : PatFrag<(ops), (Neon_movi (i32 255), (i32 14))>; - -def Neon_not8B : PatFrag<(ops node:$in), - (xor node:$in, (bitconvert (v8i8 Neon_AllOne)))>; -def Neon_not16B : PatFrag<(ops node:$in), - (xor node:$in, (bitconvert (v16i8 Neon_AllOne)))>; - -def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm), - (or node:$Rn, (Neon_not8B node:$Rm))>; - -def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm), - (or node:$Rn, (Neon_not16B node:$Rm))>; - -def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm), - (and node:$Rn, (Neon_not8B node:$Rm))>; - -def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm), - (and node:$Rn, (Neon_not16B node:$Rm))>; - - -// Vector Bitwise OR NOT - register - -defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn", - Neon_orn8B, Neon_orn16B, 0>; - -// Vector Bitwise Bit Clear (AND NOT) - register - -defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic", - Neon_bic8B, Neon_bic16B, 0>; - -multiclass Neon_bitwise2V_patterns { - def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$Rn, VPR128:$Rm)>; -} - -// Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN -defm : Neon_bitwise2V_patterns; -defm : Neon_bitwise2V_patterns; -defm : Neon_bitwise2V_patterns; -defm : Neon_bitwise2V_patterns; -defm : Neon_bitwise2V_patterns; - -// Vector Bitwise Select -def BSLvvv_8B : NeonI_3VSame_Constraint_impl<"bsl", ".8b", VPR64, v8i8, - 0b0, 0b1, 0b01, 0b00011, vselect>; - -def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8, - 0b1, 0b1, 0b01, 0b00011, vselect>; - -multiclass Neon_bitwise3V_patterns { - // Disassociate type from instruction definition - def : Pat<(v8i8 (opnode (v8i8 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v2i32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v2f32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v4i16 (opnode (v4i16 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v1i64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v1f64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v16i8 (opnode (v16i8 VPR128:$src), VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v4i32 (opnode (v4i32 VPR128:$src), VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v8i16 (opnode (v8i16 VPR128:$src), VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v2i64 (opnode (v2i64 VPR128:$src), VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v2f64 (opnode (v2i64 VPR128:$src), VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v4f32 (opnode (v4i32 VPR128:$src), VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - - // Allow to match BSL instruction pattern with non-constant operand - def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd), - (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))), - (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd), - (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))), - (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd), - (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))), - (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd), - (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))), - (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd), - (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))), - (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd), - (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))), - (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd), - (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))), - (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd), - (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))), - (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>; - - // Allow to match llvm.arm.* intrinsics. - def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src), - (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src), - (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src), - (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src), - (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src), - (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v1f64 (int_arm_neon_vbsl (v1f64 VPR64:$src), - (v1f64 VPR64:$Rn), (v1f64 VPR64:$Rm))), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src), - (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src), - (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src), - (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src), - (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src), - (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src), - (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; -} - -// Additional patterns for bitwise instruction BSL -defm: Neon_bitwise3V_patterns; - -def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm), - (vselect node:$src, node:$Rn, node:$Rm), - [{ (void)N; return false; }]>; - -// Vector Bitwise Insert if True - -def BITvvv_8B : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64, v8i8, - 0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>; -def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8, - 0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>; - -// Vector Bitwise Insert if False - -def BIFvvv_8B : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64, v8i8, - 0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>; -def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8, - 0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>; - -// Vector Absolute Difference and Accumulate (Signed, Unsigned) - -def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>; -def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>; - -// Vector Absolute Difference and Accumulate (Unsigned) -def UABAvvv_8B : NeonI_3VSame_Constraint_impl<"uaba", ".8b", VPR64, v8i8, - 0b0, 0b1, 0b00, 0b01111, Neon_uaba>; -def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8, - 0b1, 0b1, 0b00, 0b01111, Neon_uaba>; -def UABAvvv_4H : NeonI_3VSame_Constraint_impl<"uaba", ".4h", VPR64, v4i16, - 0b0, 0b1, 0b01, 0b01111, Neon_uaba>; -def UABAvvv_8H : NeonI_3VSame_Constraint_impl<"uaba", ".8h", VPR128, v8i16, - 0b1, 0b1, 0b01, 0b01111, Neon_uaba>; -def UABAvvv_2S : NeonI_3VSame_Constraint_impl<"uaba", ".2s", VPR64, v2i32, - 0b0, 0b1, 0b10, 0b01111, Neon_uaba>; -def UABAvvv_4S : NeonI_3VSame_Constraint_impl<"uaba", ".4s", VPR128, v4i32, - 0b1, 0b1, 0b10, 0b01111, Neon_uaba>; - -// Vector Absolute Difference and Accumulate (Signed) -def SABAvvv_8B : NeonI_3VSame_Constraint_impl<"saba", ".8b", VPR64, v8i8, - 0b0, 0b0, 0b00, 0b01111, Neon_saba>; -def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8, - 0b1, 0b0, 0b00, 0b01111, Neon_saba>; -def SABAvvv_4H : NeonI_3VSame_Constraint_impl<"saba", ".4h", VPR64, v4i16, - 0b0, 0b0, 0b01, 0b01111, Neon_saba>; -def SABAvvv_8H : NeonI_3VSame_Constraint_impl<"saba", ".8h", VPR128, v8i16, - 0b1, 0b0, 0b01, 0b01111, Neon_saba>; -def SABAvvv_2S : NeonI_3VSame_Constraint_impl<"saba", ".2s", VPR64, v2i32, - 0b0, 0b0, 0b10, 0b01111, Neon_saba>; -def SABAvvv_4S : NeonI_3VSame_Constraint_impl<"saba", ".4s", VPR128, v4i32, - 0b1, 0b0, 0b10, 0b01111, Neon_saba>; - - -// Vector Absolute Difference (Signed, Unsigned) -defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>; -defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>; - -// Vector Absolute Difference (Floating Point) -defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd", - int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>; - -// Vector Reciprocal Step (Floating Point) -defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps", - int_arm_neon_vrecps, - v2f32, v4f32, v2f64, 0>; - -// Vector Reciprocal Square Root Step (Floating Point) -defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", - int_arm_neon_vrsqrts, - v2f32, v4f32, v2f64, 0>; - -// Vector Comparisons - -def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs), - (Neon_cmp node:$lhs, node:$rhs, SETEQ)>; -def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs), - (Neon_cmp node:$lhs, node:$rhs, SETUGE)>; -def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs), - (Neon_cmp node:$lhs, node:$rhs, SETGE)>; -def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs), - (Neon_cmp node:$lhs, node:$rhs, SETUGT)>; -def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs), - (Neon_cmp node:$lhs, node:$rhs, SETGT)>; - -// NeonI_compare_aliases class: swaps register operands to implement -// comparison aliases, e.g., CMLE is alias for CMGE with operands reversed. -class NeonI_compare_aliases - : NeonInstAlias; - -// Vector Comparisons (Integer) - -// Vector Compare Mask Equal (Integer) -let isCommutable =1 in { -defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>; -} - -// Vector Compare Mask Higher or Same (Unsigned Integer) -defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>; - -// Vector Compare Mask Greater Than or Equal (Integer) -defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>; - -// Vector Compare Mask Higher (Unsigned Integer) -defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>; - -// Vector Compare Mask Greater Than (Integer) -defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>; - -// Vector Compare Mask Bitwise Test (Integer) -defm CMTSTvvv: NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>; - -// Vector Compare Mask Less or Same (Unsigned Integer) -// CMLS is alias for CMHS with operands reversed. -def CMLSvvv_8B : NeonI_compare_aliases<"cmls", ".8b", CMHSvvv_8B, VPR64>; -def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>; -def CMLSvvv_4H : NeonI_compare_aliases<"cmls", ".4h", CMHSvvv_4H, VPR64>; -def CMLSvvv_8H : NeonI_compare_aliases<"cmls", ".8h", CMHSvvv_8H, VPR128>; -def CMLSvvv_2S : NeonI_compare_aliases<"cmls", ".2s", CMHSvvv_2S, VPR64>; -def CMLSvvv_4S : NeonI_compare_aliases<"cmls", ".4s", CMHSvvv_4S, VPR128>; -def CMLSvvv_2D : NeonI_compare_aliases<"cmls", ".2d", CMHSvvv_2D, VPR128>; - -// Vector Compare Mask Less Than or Equal (Integer) -// CMLE is alias for CMGE with operands reversed. -def CMLEvvv_8B : NeonI_compare_aliases<"cmle", ".8b", CMGEvvv_8B, VPR64>; -def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>; -def CMLEvvv_4H : NeonI_compare_aliases<"cmle", ".4h", CMGEvvv_4H, VPR64>; -def CMLEvvv_8H : NeonI_compare_aliases<"cmle", ".8h", CMGEvvv_8H, VPR128>; -def CMLEvvv_2S : NeonI_compare_aliases<"cmle", ".2s", CMGEvvv_2S, VPR64>; -def CMLEvvv_4S : NeonI_compare_aliases<"cmle", ".4s", CMGEvvv_4S, VPR128>; -def CMLEvvv_2D : NeonI_compare_aliases<"cmle", ".2d", CMGEvvv_2D, VPR128>; - -// Vector Compare Mask Lower (Unsigned Integer) -// CMLO is alias for CMHI with operands reversed. -def CMLOvvv_8B : NeonI_compare_aliases<"cmlo", ".8b", CMHIvvv_8B, VPR64>; -def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>; -def CMLOvvv_4H : NeonI_compare_aliases<"cmlo", ".4h", CMHIvvv_4H, VPR64>; -def CMLOvvv_8H : NeonI_compare_aliases<"cmlo", ".8h", CMHIvvv_8H, VPR128>; -def CMLOvvv_2S : NeonI_compare_aliases<"cmlo", ".2s", CMHIvvv_2S, VPR64>; -def CMLOvvv_4S : NeonI_compare_aliases<"cmlo", ".4s", CMHIvvv_4S, VPR128>; -def CMLOvvv_2D : NeonI_compare_aliases<"cmlo", ".2d", CMHIvvv_2D, VPR128>; - -// Vector Compare Mask Less Than (Integer) -// CMLT is alias for CMGT with operands reversed. -def CMLTvvv_8B : NeonI_compare_aliases<"cmlt", ".8b", CMGTvvv_8B, VPR64>; -def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>; -def CMLTvvv_4H : NeonI_compare_aliases<"cmlt", ".4h", CMGTvvv_4H, VPR64>; -def CMLTvvv_8H : NeonI_compare_aliases<"cmlt", ".8h", CMGTvvv_8H, VPR128>; -def CMLTvvv_2S : NeonI_compare_aliases<"cmlt", ".2s", CMGTvvv_2S, VPR64>; -def CMLTvvv_4S : NeonI_compare_aliases<"cmlt", ".4s", CMGTvvv_4S, VPR128>; -def CMLTvvv_2D : NeonI_compare_aliases<"cmlt", ".2d", CMGTvvv_2D, VPR128>; - - -def neon_uimm0_asmoperand : AsmOperandClass -{ - let Name = "UImm0"; - let PredicateMethod = "isUImm<0>"; - let RenderMethod = "addImmOperands"; -} - -def neon_uimm0 : Operand, ImmLeaf { - let ParserMatchClass = neon_uimm0_asmoperand; - let PrintMethod = "printNeonUImm0Operand"; - -} - -multiclass NeonI_cmpz_sizes opcode, string asmop, CondCode CC> -{ - def _8B : NeonI_2VMisc<0b0, u, 0b00, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm), - asmop # "\t$Rd.8b, $Rn.8b, $Imm", - [(set (v8i8 VPR64:$Rd), - (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), - asmop # "\t$Rd.16b, $Rn.16b, $Imm", - [(set (v16i8 VPR128:$Rd), - (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm), - asmop # "\t$Rd.4h, $Rn.4h, $Imm", - [(set (v4i16 VPR64:$Rd), - (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), - asmop # "\t$Rd.8h, $Rn.8h, $Imm", - [(set (v8i16 VPR128:$Rd), - (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm), - asmop # "\t$Rd.2s, $Rn.2s, $Imm", - [(set (v2i32 VPR64:$Rd), - (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), - asmop # "\t$Rd.4s, $Rn.4s, $Imm", - [(set (v4i32 VPR128:$Rd), - (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), - asmop # "\t$Rd.2d, $Rn.2d, $Imm", - [(set (v2i64 VPR128:$Rd), - (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -// Vector Compare Mask Equal to Zero (Integer) -defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>; - -// Vector Compare Mask Greater Than or Equal to Zero (Signed Integer) -defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>; - -// Vector Compare Mask Greater Than Zero (Signed Integer) -defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>; - -// Vector Compare Mask Less Than or Equal To Zero (Signed Integer) -defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>; - -// Vector Compare Mask Less Than Zero (Signed Integer) -defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>; - -// Vector Comparisons (Floating Point) - -// Vector Compare Mask Equal (Floating Point) -let isCommutable =1 in { -defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq, - v2i32, v4i32, v2i64, 0>; -} - -// Vector Compare Mask Greater Than Or Equal (Floating Point) -defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge, - v2i32, v4i32, v2i64, 0>; - -// Vector Compare Mask Greater Than (Floating Point) -defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt, - v2i32, v4i32, v2i64, 0>; - -// Vector Compare Mask Less Than Or Equal (Floating Point) -// FCMLE is alias for FCMGE with operands reversed. -def FCMLEvvv_2S : NeonI_compare_aliases<"fcmle", ".2s", FCMGEvvv_2S, VPR64>; -def FCMLEvvv_4S : NeonI_compare_aliases<"fcmle", ".4s", FCMGEvvv_4S, VPR128>; -def FCMLEvvv_2D : NeonI_compare_aliases<"fcmle", ".2d", FCMGEvvv_2D, VPR128>; - -// Vector Compare Mask Less Than (Floating Point) -// FCMLT is alias for FCMGT with operands reversed. -def FCMLTvvv_2S : NeonI_compare_aliases<"fcmlt", ".2s", FCMGTvvv_2S, VPR64>; -def FCMLTvvv_4S : NeonI_compare_aliases<"fcmlt", ".4s", FCMGTvvv_4S, VPR128>; -def FCMLTvvv_2D : NeonI_compare_aliases<"fcmlt", ".2d", FCMGTvvv_2D, VPR128>; - -def fpzero_izero_asmoperand : AsmOperandClass { - let Name = "FPZeroIZero"; - let ParserMethod = "ParseFPImm0AndImm0Operand"; - let DiagnosticType = "FPZero"; -} - -def fpzz32 : Operand, - ComplexPattern { - let ParserMatchClass = fpzero_izero_asmoperand; - let PrintMethod = "printFPZeroOperand"; - let DecoderMethod = "DecodeFPZeroOperand"; -} - -multiclass NeonI_fpcmpz_sizes opcode, - string asmop, CondCode CC> -{ - def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, fpzz32:$FPImm), - asmop # "\t$Rd.2s, $Rn.2s, $FPImm", - [(set (v2i32 VPR64:$Rd), - (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpzz32:$FPImm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, fpzz32:$FPImm), - asmop # "\t$Rd.4s, $Rn.4s, $FPImm", - [(set (v4i32 VPR128:$Rd), - (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpzz32:$FPImm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, fpzz32:$FPImm), - asmop # "\t$Rd.2d, $Rn.2d, $FPImm", - [(set (v2i64 VPR128:$Rd), - (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpzz32:$FPImm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -// Vector Compare Mask Equal to Zero (Floating Point) -defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>; - -// Vector Compare Mask Greater Than or Equal to Zero (Floating Point) -defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>; - -// Vector Compare Mask Greater Than Zero (Floating Point) -defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>; - -// Vector Compare Mask Less Than or Equal To Zero (Floating Point) -defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>; - -// Vector Compare Mask Less Than Zero (Floating Point) -defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>; - -// Vector Absolute Comparisons (Floating Point) - -// Vector Absolute Compare Mask Greater Than Or Equal (Floating Point) -defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge", - int_arm_neon_vacge, - v2i32, v4i32, v2i64, 0>; - -// Vector Absolute Compare Mask Greater Than (Floating Point) -defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt", - int_arm_neon_vacgt, - v2i32, v4i32, v2i64, 0>; - -// Vector Absolute Compare Mask Less Than Or Equal (Floating Point) -// FACLE is alias for FACGE with operands reversed. -def FACLEvvv_2S : NeonI_compare_aliases<"facle", ".2s", FACGEvvv_2S, VPR64>; -def FACLEvvv_4S : NeonI_compare_aliases<"facle", ".4s", FACGEvvv_4S, VPR128>; -def FACLEvvv_2D : NeonI_compare_aliases<"facle", ".2d", FACGEvvv_2D, VPR128>; - -// Vector Absolute Compare Mask Less Than (Floating Point) -// FACLT is alias for FACGT with operands reversed. -def FACLTvvv_2S : NeonI_compare_aliases<"faclt", ".2s", FACGTvvv_2S, VPR64>; -def FACLTvvv_4S : NeonI_compare_aliases<"faclt", ".4s", FACGTvvv_4S, VPR128>; -def FACLTvvv_2D : NeonI_compare_aliases<"faclt", ".2d", FACGTvvv_2D, VPR128>; - -// Vector halving add (Integer Signed, Unsigned) -defm SHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd", - int_arm_neon_vhadds, 1>; -defm UHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd", - int_arm_neon_vhaddu, 1>; - -// Vector halving sub (Integer Signed, Unsigned) -defm SHSUBvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub", - int_arm_neon_vhsubs, 0>; -defm UHSUBvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub", - int_arm_neon_vhsubu, 0>; - -// Vector rouding halving add (Integer Signed, Unsigned) -defm SRHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd", - int_arm_neon_vrhadds, 1>; -defm URHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd", - int_arm_neon_vrhaddu, 1>; - -// Vector Saturating add (Integer Signed, Unsigned) -defm SQADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd", - int_arm_neon_vqadds, 1>; -defm UQADDvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd", - int_arm_neon_vqaddu, 1>; - -// Vector Saturating sub (Integer Signed, Unsigned) -defm SQSUBvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub", - int_arm_neon_vqsubs, 1>; -defm UQSUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub", - int_arm_neon_vqsubu, 1>; - -// Vector Shift Left (Signed and Unsigned Integer) -defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl", - int_arm_neon_vshifts, 1>; -defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl", - int_arm_neon_vshiftu, 1>; - -// Vector Saturating Shift Left (Signed and Unsigned Integer) -defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl", - int_arm_neon_vqshifts, 1>; -defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl", - int_arm_neon_vqshiftu, 1>; - -// Vector Rouding Shift Left (Signed and Unsigned Integer) -defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl", - int_arm_neon_vrshifts, 1>; -defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl", - int_arm_neon_vrshiftu, 1>; - -// Vector Saturating Rouding Shift Left (Signed and Unsigned Integer) -defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl", - int_arm_neon_vqrshifts, 1>; -defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl", - int_arm_neon_vqrshiftu, 1>; - -// Vector Maximum (Signed and Unsigned Integer) -defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>; -defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>; - -// Vector Minimum (Signed and Unsigned Integer) -defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>; -defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>; - -// Vector Maximum (Floating Point) -defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax", - int_arm_neon_vmaxs, - v2f32, v4f32, v2f64, 1>; - -// Vector Minimum (Floating Point) -defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin", - int_arm_neon_vmins, - v2f32, v4f32, v2f64, 1>; - -// Vector maxNum (Floating Point) - prefer a number over a quiet NaN) -defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm", - int_aarch64_neon_vmaxnm, - v2f32, v4f32, v2f64, 1>; - -// Vector minNum (Floating Point) - prefer a number over a quiet NaN) -defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm", - int_aarch64_neon_vminnm, - v2f32, v4f32, v2f64, 1>; - -// Vector Maximum Pairwise (Signed and Unsigned Integer) -defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>; -defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>; - -// Vector Minimum Pairwise (Signed and Unsigned Integer) -defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>; -defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>; - -// Vector Maximum Pairwise (Floating Point) -defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp", - int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>; - -// Vector Minimum Pairwise (Floating Point) -defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp", - int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>; - -// Vector maxNum Pairwise (Floating Point) - prefer a number over a quiet NaN) -defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp", - int_aarch64_neon_vpmaxnm, - v2f32, v4f32, v2f64, 1>; - -// Vector minNum Pairwise (Floating Point) - prefer a number over a quiet NaN) -defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp", - int_aarch64_neon_vpminnm, - v2f32, v4f32, v2f64, 1>; - -// Vector Addition Pairwise (Integer) -defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>; - -// Vector Addition Pairwise (Floating Point) -defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp", - int_arm_neon_vpadd, - v2f32, v4f32, v2f64, 1>; - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { -// Vector Saturating Doubling Multiply High -defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh", - int_arm_neon_vqdmulh, 1>; - -// Vector Saturating Rouding Doubling Multiply High -defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh", - int_arm_neon_vqrdmulh, 1>; - -// Vector Multiply Extended (Floating Point) -defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx", - int_aarch64_neon_vmulx, - v2f32, v4f32, v2f64, 1>; -} - -// Patterns to match llvm.aarch64.* intrinsic for -// ADDP, SMINP, UMINP, SMAXP, UMAXP having i32 as output -class Neon_VectorPair_v2i32_pattern - : Pat<(v1i32 (opnode (v2i32 VPR64:$Rn))), - (EXTRACT_SUBREG - (v2i32 (INST (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rn))), - sub_32)>; - -def : Neon_VectorPair_v2i32_pattern; -def : Neon_VectorPair_v2i32_pattern; -def : Neon_VectorPair_v2i32_pattern; -def : Neon_VectorPair_v2i32_pattern; -def : Neon_VectorPair_v2i32_pattern; - -// Vector Immediate Instructions - -multiclass neon_mov_imm_shift_asmoperands -{ - def _asmoperand : AsmOperandClass - { - let Name = "NeonMovImmShift" # PREFIX; - let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands"; - let PredicateMethod = "isNeonMovImmShift" # PREFIX; - } -} - -// Definition of vector immediates shift operands - -// The selectable use-cases extract the shift operation -// information from the OpCmode fields encoded in the immediate. -def neon_mod_shift_imm_XFORM : SDNodeXFormgetZExtValue(); - unsigned ShiftImm; - unsigned ShiftOnesIn; - unsigned HasShift = - A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn); - if (!HasShift) return SDValue(); - return CurDAG->getTargetConstant(ShiftImm, MVT::i32); -}]>; - -// Vector immediates shift operands which accept LSL and MSL -// shift operators with shift value in the range of 0, 8, 16, 24 (LSL), -// or 0, 8 (LSLH) or 8, 16 (MSL). -defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">; -defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">; -// LSLH restricts shift amount to 0, 8 out of 0, 8, 16, 24 -defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">; - -multiclass neon_mov_imm_shift_operands -{ - def _operand : Operand, ImmLeaf - { - let PrintMethod = - "printNeonMovImmShiftOperand"; - let DecoderMethod = - "DecodeNeonMovImmShiftOperand"; - let ParserMatchClass = - !cast("neon_mov_imm_" # PREFIX # HALF # "_asmoperand"); - } -} - -defm neon_mov_imm_LSL : neon_mov_imm_shift_operands<"LSL", "", "false", [{ - unsigned ShiftImm; - unsigned ShiftOnesIn; - unsigned HasShift = - A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn); - return (HasShift && !ShiftOnesIn); -}]>; - -defm neon_mov_imm_MSL : neon_mov_imm_shift_operands<"MSL", "", "false", [{ - unsigned ShiftImm; - unsigned ShiftOnesIn; - unsigned HasShift = - A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn); - return (HasShift && ShiftOnesIn); -}]>; - -defm neon_mov_imm_LSLH : neon_mov_imm_shift_operands<"LSL", "H", "true", [{ - unsigned ShiftImm; - unsigned ShiftOnesIn; - unsigned HasShift = - A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn); - return (HasShift && !ShiftOnesIn); -}]>; - -def neon_uimm1_asmoperand : AsmOperandClass -{ - let Name = "UImm1"; - let PredicateMethod = "isUImm<1>"; - let RenderMethod = "addImmOperands"; -} - -def neon_uimm2_asmoperand : AsmOperandClass -{ - let Name = "UImm2"; - let PredicateMethod = "isUImm<2>"; - let RenderMethod = "addImmOperands"; -} - -def neon_uimm8_asmoperand : AsmOperandClass -{ - let Name = "UImm8"; - let PredicateMethod = "isUImm<8>"; - let RenderMethod = "addImmOperands"; -} - -def neon_uimm8 : Operand, ImmLeaf { - let ParserMatchClass = neon_uimm8_asmoperand; - let PrintMethod = "printUImmHexOperand"; -} - -def neon_uimm64_mask_asmoperand : AsmOperandClass -{ - let Name = "NeonUImm64Mask"; - let PredicateMethod = "isNeonUImm64Mask"; - let RenderMethod = "addNeonUImm64MaskOperands"; -} - -// MCOperand for 64-bit bytemask with each byte having only the -// value 0x00 and 0xff is encoded as an unsigned 8-bit value -def neon_uimm64_mask : Operand, ImmLeaf { - let ParserMatchClass = neon_uimm64_mask_asmoperand; - let PrintMethod = "printNeonUImm64MaskOperand"; -} - -multiclass NeonI_mov_imm_lsl_sizes -{ - // shift zeros, per word - def _2S : NeonI_1VModImm<0b0, op, - (outs VPR64:$Rd), - (ins neon_uimm8:$Imm, - neon_mov_imm_LSL_operand:$Simm), - !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"), - [(set (v2i32 VPR64:$Rd), - (v2i32 (opnode (timm:$Imm), - (neon_mov_imm_LSL_operand:$Simm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - bits<2> Simm; - let cmode = {0b0, Simm{1}, Simm{0}, 0b0}; - } - - def _4S : NeonI_1VModImm<0b1, op, - (outs VPR128:$Rd), - (ins neon_uimm8:$Imm, - neon_mov_imm_LSL_operand:$Simm), - !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"), - [(set (v4i32 VPR128:$Rd), - (v4i32 (opnode (timm:$Imm), - (neon_mov_imm_LSL_operand:$Simm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - bits<2> Simm; - let cmode = {0b0, Simm{1}, Simm{0}, 0b0}; - } - - // shift zeros, per halfword - def _4H : NeonI_1VModImm<0b0, op, - (outs VPR64:$Rd), - (ins neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm), - !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"), - [(set (v4i16 VPR64:$Rd), - (v4i16 (opnode (timm:$Imm), - (neon_mov_imm_LSLH_operand:$Simm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - bit Simm; - let cmode = {0b1, 0b0, Simm, 0b0}; - } - - def _8H : NeonI_1VModImm<0b1, op, - (outs VPR128:$Rd), - (ins neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm), - !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"), - [(set (v8i16 VPR128:$Rd), - (v8i16 (opnode (timm:$Imm), - (neon_mov_imm_LSLH_operand:$Simm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - bit Simm; - let cmode = {0b1, 0b0, Simm, 0b0}; - } -} - -multiclass NeonI_mov_imm_with_constraint_lsl_sizes -{ - let Constraints = "$src = $Rd" in { - // shift zeros, per word - def _2S : NeonI_1VModImm<0b0, op, - (outs VPR64:$Rd), - (ins VPR64:$src, neon_uimm8:$Imm, - neon_mov_imm_LSL_operand:$Simm), - !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"), - [(set (v2i32 VPR64:$Rd), - (v2i32 (opnode (v2i32 VPR64:$src), - (v2i32 (neonopnode timm:$Imm, - neon_mov_imm_LSL_operand:$Simm)))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]> { - bits<2> Simm; - let cmode = {0b0, Simm{1}, Simm{0}, 0b1}; - } - - def _4S : NeonI_1VModImm<0b1, op, - (outs VPR128:$Rd), - (ins VPR128:$src, neon_uimm8:$Imm, - neon_mov_imm_LSL_operand:$Simm), - !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"), - [(set (v4i32 VPR128:$Rd), - (v4i32 (opnode (v4i32 VPR128:$src), - (v4i32 (neonopnode timm:$Imm, - neon_mov_imm_LSL_operand:$Simm)))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]> { - bits<2> Simm; - let cmode = {0b0, Simm{1}, Simm{0}, 0b1}; - } - - // shift zeros, per halfword - def _4H : NeonI_1VModImm<0b0, op, - (outs VPR64:$Rd), - (ins VPR64:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm), - !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"), - [(set (v4i16 VPR64:$Rd), - (v4i16 (opnode (v4i16 VPR64:$src), - (v4i16 (neonopnode timm:$Imm, - neon_mov_imm_LSL_operand:$Simm)))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]> { - bit Simm; - let cmode = {0b1, 0b0, Simm, 0b1}; - } - - def _8H : NeonI_1VModImm<0b1, op, - (outs VPR128:$Rd), - (ins VPR128:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm), - !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"), - [(set (v8i16 VPR128:$Rd), - (v8i16 (opnode (v8i16 VPR128:$src), - (v8i16 (neonopnode timm:$Imm, - neon_mov_imm_LSL_operand:$Simm)))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]> { - bit Simm; - let cmode = {0b1, 0b0, Simm, 0b1}; - } - } -} - -multiclass NeonI_mov_imm_msl_sizes -{ - // shift ones, per word - def _2S : NeonI_1VModImm<0b0, op, - (outs VPR64:$Rd), - (ins neon_uimm8:$Imm, - neon_mov_imm_MSL_operand:$Simm), - !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"), - [(set (v2i32 VPR64:$Rd), - (v2i32 (opnode (timm:$Imm), - (neon_mov_imm_MSL_operand:$Simm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - bit Simm; - let cmode = {0b1, 0b1, 0b0, Simm}; - } - - def _4S : NeonI_1VModImm<0b1, op, - (outs VPR128:$Rd), - (ins neon_uimm8:$Imm, - neon_mov_imm_MSL_operand:$Simm), - !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"), - [(set (v4i32 VPR128:$Rd), - (v4i32 (opnode (timm:$Imm), - (neon_mov_imm_MSL_operand:$Simm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - bit Simm; - let cmode = {0b1, 0b1, 0b0, Simm}; - } -} - -// Vector Move Immediate Shifted -let isReMaterializable = 1 in { -defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>; -} - -// Vector Move Inverted Immediate Shifted -let isReMaterializable = 1 in { -defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>; -} - -// Vector Bitwise Bit Clear (AND NOT) - immediate -let isReMaterializable = 1 in { -defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1, - and, Neon_mvni>; -} - -// Vector Bitwise OR - immedidate - -let isReMaterializable = 1 in { -defm ORRvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0, - or, Neon_movi>; -} - -// Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate -// LowerBUILD_VECTOR favors lowering MOVI over MVNI. -// BIC immediate instructions selection requires additional patterns to -// transform Neon_movi operands into BIC immediate operands - -def neon_mov_imm_LSLH_transform_XFORM : SDNodeXFormgetZExtValue(); - unsigned ShiftImm; - unsigned ShiftOnesIn; - (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn); - // LSLH restricts shift amount to 0, 8 which are encoded as 0 and 1 - // Transform encoded shift amount 0 to 1 and 1 to 0. - return CurDAG->getTargetConstant(!ShiftImm, MVT::i32); -}]>; - -def neon_mov_imm_LSLH_transform_operand - : ImmLeaf; - -// Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0xff, LSL 8) -// Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0xff) -def : Pat<(v4i16 (and VPR64:$src, - (v4i16 (Neon_movi 255, - neon_mov_imm_LSLH_transform_operand:$Simm)))), - (BICvi_lsl_4H VPR64:$src, 255, - neon_mov_imm_LSLH_transform_operand:$Simm)>; - -// Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0xff, LSL 8) -// Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0xff) -def : Pat<(v8i16 (and VPR128:$src, - (v8i16 (Neon_movi 255, - neon_mov_imm_LSLH_transform_operand:$Simm)))), - (BICvi_lsl_8H VPR128:$src, 255, - neon_mov_imm_LSLH_transform_operand:$Simm)>; - -def : Pat<(v8i8 (and VPR64:$src, - (bitconvert(v4i16 (Neon_movi 255, - neon_mov_imm_LSLH_transform_operand:$Simm))))), - (BICvi_lsl_4H VPR64:$src, 255, - neon_mov_imm_LSLH_transform_operand:$Simm)>; -def : Pat<(v2i32 (and VPR64:$src, - (bitconvert(v4i16 (Neon_movi 255, - neon_mov_imm_LSLH_transform_operand:$Simm))))), - (BICvi_lsl_4H VPR64:$src, 255, - neon_mov_imm_LSLH_transform_operand:$Simm)>; -def : Pat<(v1i64 (and VPR64:$src, - (bitconvert(v4i16 (Neon_movi 255, - neon_mov_imm_LSLH_transform_operand:$Simm))))), - (BICvi_lsl_4H VPR64:$src, 255, - neon_mov_imm_LSLH_transform_operand:$Simm)>; - -def : Pat<(v16i8 (and VPR128:$src, - (bitconvert(v8i16 (Neon_movi 255, - neon_mov_imm_LSLH_transform_operand:$Simm))))), - (BICvi_lsl_8H VPR128:$src, 255, - neon_mov_imm_LSLH_transform_operand:$Simm)>; -def : Pat<(v4i32 (and VPR128:$src, - (bitconvert(v8i16 (Neon_movi 255, - neon_mov_imm_LSLH_transform_operand:$Simm))))), - (BICvi_lsl_8H VPR128:$src, 255, - neon_mov_imm_LSLH_transform_operand:$Simm)>; -def : Pat<(v2i64 (and VPR128:$src, - (bitconvert(v8i16 (Neon_movi 255, - neon_mov_imm_LSLH_transform_operand:$Simm))))), - (BICvi_lsl_8H VPR128:$src, 255, - neon_mov_imm_LSLH_transform_operand:$Simm)>; - -multiclass Neon_bitwiseVi_patterns { - def : Pat<(v8i8 (opnode VPR64:$src, - (bitconvert(v4i16 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST4H VPR64:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - def : Pat<(v2i32 (opnode VPR64:$src, - (bitconvert(v4i16 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST4H VPR64:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - def : Pat<(v1i64 (opnode VPR64:$src, - (bitconvert(v4i16 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST4H VPR64:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - - def : Pat<(v16i8 (opnode VPR128:$src, - (bitconvert(v8i16 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST8H VPR128:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - def : Pat<(v4i32 (opnode VPR128:$src, - (bitconvert(v8i16 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST8H VPR128:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - def : Pat<(v2i64 (opnode VPR128:$src, - (bitconvert(v8i16 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST8H VPR128:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - - def : Pat<(v8i8 (opnode VPR64:$src, - (bitconvert(v2i32 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST2S VPR64:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - def : Pat<(v4i16 (opnode VPR64:$src, - (bitconvert(v2i32 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST2S VPR64:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - def : Pat<(v1i64 (opnode VPR64:$src, - (bitconvert(v2i32 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST2S VPR64:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - - def : Pat<(v16i8 (opnode VPR128:$src, - (bitconvert(v4i32 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST4S VPR128:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - def : Pat<(v8i16 (opnode VPR128:$src, - (bitconvert(v4i32 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST4S VPR128:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - def : Pat<(v2i64 (opnode VPR128:$src, - (bitconvert(v4i32 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST4S VPR128:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; -} - -// Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate -defm : Neon_bitwiseVi_patterns; - -// Additional patterns for Vector Bitwise OR - immedidate -defm : Neon_bitwiseVi_patterns; - - -// Vector Move Immediate Masked -let isReMaterializable = 1 in { -defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>; -} - -// Vector Move Inverted Immediate Masked -let isReMaterializable = 1 in { -defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>; -} - -class NeonI_mov_imm_lsl_aliases - : NeonInstAlias; - -// Aliases for Vector Move Immediate Shifted -def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>; -def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>; -def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>; -def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>; - -// Aliases for Vector Move Inverted Immediate Shifted -def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>; -def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>; -def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>; -def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>; - -// Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate -def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>; -def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>; -def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>; -def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>; - -// Aliases for Vector Bitwise OR - immedidate -def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>; -def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>; -def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>; -def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>; - -// Vector Move Immediate - per byte -let isReMaterializable = 1 in { -def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0, - (outs VPR64:$Rd), (ins neon_uimm8:$Imm), - "movi\t$Rd.8b, $Imm", - [(set (v8i8 VPR64:$Rd), - (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - let cmode = 0b1110; -} - -def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0, - (outs VPR128:$Rd), (ins neon_uimm8:$Imm), - "movi\t$Rd.16b, $Imm", - [(set (v16i8 VPR128:$Rd), - (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - let cmode = 0b1110; -} -} - -// Vector Move Immediate - bytemask, per double word -let isReMaterializable = 1 in { -def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1, - (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm), - "movi\t $Rd.2d, $Imm", - [(set (v2i64 VPR128:$Rd), - (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - let cmode = 0b1110; -} -} - -// Vector Move Immediate - bytemask, one doubleword - -let isReMaterializable = 1 in { -def MOVIdi : NeonI_1VModImm<0b0, 0b1, - (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm), - "movi\t $Rd, $Imm", - [(set (v1i64 FPR64:$Rd), - (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - let cmode = 0b1110; -} -} - -// Vector Floating Point Move Immediate - -class NeonI_FMOV_impl - : NeonI_1VModImm, - Sched<[WriteFPALU]> { - let cmode = 0b1111; - } - -let isReMaterializable = 1 in { -def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64, v2f32, fmov32_operand, 0b0, 0b0>; -def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>; -def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>; -} - -// Vector Shift (Immediate) - -// Shift Right/Left Immediate - The immh:immb field of these shifts are encoded -// as follows: -// -// Offset Encoding -// 8 immh:immb<6:3> = '0001xxx', is encoded in immh:immb<2:0> -// 16 immh:immb<6:4> = '001xxxx', is encoded in immh:immb<3:0> -// 32 immh:immb<6:5> = '01xxxxx', is encoded in immh:immb<4:0> -// 64 immh:immb<6> = '1xxxxxx', is encoded in immh:immb<5:0> -// -// The shift right immediate amount, in the range 1 to element bits, is computed -// as Offset - UInt(immh:immb). The shift left immediate amount, in the range 0 -// to element bits - 1, is computed as UInt(immh:immb) - Offset. - -class shr_imm_asmoperands : AsmOperandClass { - let Name = "ShrImm" # OFFSET; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "ShrImm" # OFFSET; -} - -class shr_imm : Operand { - let EncoderMethod = "getShiftRightImm" # OFFSET; - let DecoderMethod = "DecodeShiftRightImm" # OFFSET; - let ParserMatchClass = - !cast("shr_imm" # OFFSET # "_asmoperand"); -} - -def shr_imm8_asmoperand : shr_imm_asmoperands<"8">; -def shr_imm16_asmoperand : shr_imm_asmoperands<"16">; -def shr_imm32_asmoperand : shr_imm_asmoperands<"32">; -def shr_imm64_asmoperand : shr_imm_asmoperands<"64">; - -def shr_imm8 : shr_imm<"8">, ImmLeaf 0 && Imm <= 8;}]>; -def shr_imm16 : shr_imm<"16">, ImmLeaf 0 && Imm <= 16;}]>; -def shr_imm32 : shr_imm<"32">, ImmLeaf 0 && Imm <= 32;}]>; -def shr_imm64 : shr_imm<"64">, ImmLeaf 0 && Imm <= 64;}]>; - -class shl_imm_asmoperands : AsmOperandClass { - let Name = "ShlImm" # OFFSET; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "ShlImm" # OFFSET; -} - -class shl_imm : Operand { - let EncoderMethod = "getShiftLeftImm" # OFFSET; - let DecoderMethod = "DecodeShiftLeftImm" # OFFSET; - let ParserMatchClass = - !cast("shl_imm" # OFFSET # "_asmoperand"); -} - -def shl_imm8_asmoperand : shl_imm_asmoperands<"8">; -def shl_imm16_asmoperand : shl_imm_asmoperands<"16">; -def shl_imm32_asmoperand : shl_imm_asmoperands<"32">; -def shl_imm64_asmoperand : shl_imm_asmoperands<"64">; - -def shl_imm8 : shl_imm<"8">, ImmLeaf= 0 && Imm < 8;}]>; -def shl_imm16 : shl_imm<"16">, ImmLeaf= 0 && Imm < 16;}]>; -def shl_imm32 : shl_imm<"32">, ImmLeaf= 0 && Imm < 32;}]>; -def shl_imm64 : shl_imm<"64">, ImmLeaf= 0 && Imm < 64;}]>; - -class N2VShift opcode, string asmop, string T, - RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode> - : NeonI_2VShiftImm, - Sched<[WriteFPALU, ReadFPALU]>; - -multiclass NeonI_N2VShL opcode, string asmop> { - // 64-bit vector types. - def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8, shl> { - let Inst{22-19} = 0b0001; // immh:immb = 0001xxx - } - - def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16, shl> { - let Inst{22-20} = 0b001; // immh:immb = 001xxxx - } - - def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32, shl> { - let Inst{22-21} = 0b01; // immh:immb = 01xxxxx - } - - // 128-bit vector types. - def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8, shl> { - let Inst{22-19} = 0b0001; // immh:immb = 0001xxx - } - - def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16, shl> { - let Inst{22-20} = 0b001; // immh:immb = 001xxxx - } - - def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32, shl> { - let Inst{22-21} = 0b01; // immh:immb = 01xxxxx - } - - def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64, shl> { - let Inst{22} = 0b1; // immh:immb = 1xxxxxx - } -} - -multiclass NeonI_N2VShR opcode, string asmop, SDNode OpNode> { - def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, - OpNode> { - let Inst{22} = 0b1; - } -} - -// Shift left - -defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">; - -// Additional patterns to match vector shift left by immediate. -// (v1i8/v1i16/v1i32 types) -def : Pat<(v1i8 (shl (v1i8 FPR8:$Rn), - (v1i8 (Neon_vdup (i32 (shl_imm8:$Imm)))))), - (EXTRACT_SUBREG - (SHLvvi_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - shl_imm8:$Imm), - sub_8)>; -def : Pat<(v1i16 (shl (v1i16 FPR16:$Rn), - (v1i16 (Neon_vdup (i32 (shl_imm16:$Imm)))))), - (EXTRACT_SUBREG - (SHLvvi_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - shl_imm16:$Imm), - sub_16)>; -def : Pat<(v1i32 (shl (v1i32 FPR32:$Rn), - (v1i32 (Neon_vdup (i32 (shl_imm32:$Imm)))))), - (EXTRACT_SUBREG - (SHLvvi_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - shl_imm32:$Imm), - sub_32)>; - -// Shift right -defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>; -defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>; - -// Additional patterns to match vector shift right by immediate. -// (v1i8/v1i16/v1i32 types) -def : Pat<(v1i8 (sra (v1i8 FPR8:$Rn), - (v1i8 (Neon_vdup (i32 (shr_imm8:$Imm)))))), - (EXTRACT_SUBREG - (SSHRvvi_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - shr_imm8:$Imm), - sub_8)>; -def : Pat<(v1i16 (sra (v1i16 FPR16:$Rn), - (v1i16 (Neon_vdup (i32 (shr_imm16:$Imm)))))), - (EXTRACT_SUBREG - (SSHRvvi_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - shr_imm16:$Imm), - sub_16)>; -def : Pat<(v1i32 (sra (v1i32 FPR32:$Rn), - (v1i32 (Neon_vdup (i32 (shr_imm32:$Imm)))))), - (EXTRACT_SUBREG - (SSHRvvi_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - shr_imm32:$Imm), - sub_32)>; -def : Pat<(v1i8 (srl (v1i8 FPR8:$Rn), - (v1i8 (Neon_vdup (i32 (shr_imm8:$Imm)))))), - (EXTRACT_SUBREG - (USHRvvi_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - shr_imm8:$Imm), - sub_8)>; -def : Pat<(v1i16 (srl (v1i16 FPR16:$Rn), - (v1i16 (Neon_vdup (i32 (shr_imm16:$Imm)))))), - (EXTRACT_SUBREG - (USHRvvi_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - shr_imm16:$Imm), - sub_16)>; -def : Pat<(v1i32 (srl (v1i32 FPR32:$Rn), - (v1i32 (Neon_vdup (i32 (shr_imm32:$Imm)))))), - (EXTRACT_SUBREG - (USHRvvi_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - shr_imm32:$Imm), - sub_32)>; - -def Neon_High16B : PatFrag<(ops node:$in), - (extract_subvector (v16i8 node:$in), (iPTR 8))>; -def Neon_High8H : PatFrag<(ops node:$in), - (extract_subvector (v8i16 node:$in), (iPTR 4))>; -def Neon_High4S : PatFrag<(ops node:$in), - (extract_subvector (v4i32 node:$in), (iPTR 2))>; -def Neon_High2D : PatFrag<(ops node:$in), - (extract_subvector (v2i64 node:$in), (iPTR 1))>; -def Neon_High4float : PatFrag<(ops node:$in), - (extract_subvector (v4f32 node:$in), (iPTR 2))>; -def Neon_High2double : PatFrag<(ops node:$in), - (extract_subvector (v2f64 node:$in), (iPTR 1))>; - -def Neon_Low16B : PatFrag<(ops node:$in), - (v8i8 (extract_subvector (v16i8 node:$in), - (iPTR 0)))>; -def Neon_Low8H : PatFrag<(ops node:$in), - (v4i16 (extract_subvector (v8i16 node:$in), - (iPTR 0)))>; -def Neon_Low4S : PatFrag<(ops node:$in), - (v2i32 (extract_subvector (v4i32 node:$in), - (iPTR 0)))>; -def Neon_Low2D : PatFrag<(ops node:$in), - (v1i64 (extract_subvector (v2i64 node:$in), - (iPTR 0)))>; -def Neon_Low4float : PatFrag<(ops node:$in), - (v2f32 (extract_subvector (v4f32 node:$in), - (iPTR 0)))>; -def Neon_Low2double : PatFrag<(ops node:$in), - (v1f64 (extract_subvector (v2f64 node:$in), - (iPTR 0)))>; - -class N2VShiftLong opcode, string asmop, string DestT, - string SrcT, ValueType DestTy, ValueType SrcTy, - Operand ImmTy, SDPatternOperator ExtOp> - : NeonI_2VShiftImm, - Sched<[WriteFPALU, ReadFPALU]>; - -class N2VShiftLongHigh opcode, string asmop, string DestT, - string SrcT, ValueType DestTy, ValueType SrcTy, - int StartIndex, Operand ImmTy, - SDPatternOperator ExtOp, PatFrag getTop> - : NeonI_2VShiftImm, - Sched<[WriteFPALU, ReadFPALU]>; - -multiclass NeonI_N2VShLL opcode, string asmop, - SDNode ExtOp> { - // 64-bit vector types. - def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8, - shl_imm8, ExtOp> { - let Inst{22-19} = 0b0001; // immh:immb = 0001xxx - } - - def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16, - shl_imm16, ExtOp> { - let Inst{22-20} = 0b001; // immh:immb = 001xxxx - } - - def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32, - shl_imm32, ExtOp> { - let Inst{22-21} = 0b01; // immh:immb = 01xxxxx - } - - // 128-bit vector types - def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b", v8i16, v8i8, - 8, shl_imm8, ExtOp, Neon_High16B> { - let Inst{22-19} = 0b0001; // immh:immb = 0001xxx - } - - def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h", v4i32, v4i16, - 4, shl_imm16, ExtOp, Neon_High8H> { - let Inst{22-20} = 0b001; // immh:immb = 001xxxx - } - - def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s", v2i64, v2i32, - 2, shl_imm32, ExtOp, Neon_High4S> { - let Inst{22-21} = 0b01; // immh:immb = 01xxxxx - } - - // Use other patterns to match when the immediate is 0. - def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))), - (!cast(prefix # "_8B") VPR64:$Rn, 0)>; - - def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))), - (!cast(prefix # "_4H") VPR64:$Rn, 0)>; - - def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))), - (!cast(prefix # "_2S") VPR64:$Rn, 0)>; - - def : Pat<(v8i16 (ExtOp (v8i8 (Neon_High16B VPR128:$Rn)))), - (!cast(prefix # "_16B") VPR128:$Rn, 0)>; - - def : Pat<(v4i32 (ExtOp (v4i16 (Neon_High8H VPR128:$Rn)))), - (!cast(prefix # "_8H") VPR128:$Rn, 0)>; - - def : Pat<(v2i64 (ExtOp (v2i32 (Neon_High4S VPR128:$Rn)))), - (!cast(prefix # "_4S") VPR128:$Rn, 0)>; -} - -// Shift left long -defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>; -defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>; - -class NeonI_ext_len_alias - : NeonInstAlias; - -// Signed integer lengthen (vector) is alias for SSHLL Vd, Vn, #0 -// Signed integer lengthen (vector, second part) is alias for SSHLL2 Vd, Vn, #0 -// FIXME: This is actually the preferred syntax but TableGen can't deal with -// custom printing of aliases. -def SXTLvv_8B : NeonI_ext_len_alias<"sxtl", ".8h", ".8b", SSHLLvvi_8B, VPR128, VPR64>; -def SXTLvv_4H : NeonI_ext_len_alias<"sxtl", ".4s", ".4h", SSHLLvvi_4H, VPR128, VPR64>; -def SXTLvv_2S : NeonI_ext_len_alias<"sxtl", ".2d", ".2s", SSHLLvvi_2S, VPR128, VPR64>; -def SXTL2vv_16B : NeonI_ext_len_alias<"sxtl2", ".8h", ".16b", SSHLLvvi_16B, VPR128, VPR128>; -def SXTL2vv_8H : NeonI_ext_len_alias<"sxtl2", ".4s", ".8h", SSHLLvvi_8H, VPR128, VPR128>; -def SXTL2vv_4S : NeonI_ext_len_alias<"sxtl2", ".2d", ".4s", SSHLLvvi_4S, VPR128, VPR128>; - -// Unsigned integer lengthen (vector) is alias for USHLL Vd, Vn, #0 -// Unsigned integer lengthen (vector, second part) is alias for USHLL2 Vd, Vn, #0 -// FIXME: This is actually the preferred syntax but TableGen can't deal with -// custom printing of aliases. -def UXTLvv_8B : NeonI_ext_len_alias<"uxtl", ".8h", ".8b", USHLLvvi_8B, VPR128, VPR64>; -def UXTLvv_4H : NeonI_ext_len_alias<"uxtl", ".4s", ".4h", USHLLvvi_4H, VPR128, VPR64>; -def UXTLvv_2S : NeonI_ext_len_alias<"uxtl", ".2d", ".2s", USHLLvvi_2S, VPR128, VPR64>; -def UXTL2vv_16B : NeonI_ext_len_alias<"uxtl2", ".8h", ".16b", USHLLvvi_16B, VPR128, VPR128>; -def UXTL2vv_8H : NeonI_ext_len_alias<"uxtl2", ".4s", ".8h", USHLLvvi_8H, VPR128, VPR128>; -def UXTL2vv_4S : NeonI_ext_len_alias<"uxtl2", ".2d", ".4s", USHLLvvi_4S, VPR128, VPR128>; - -def : Pat<(v8i16 (anyext (v8i8 VPR64:$Rn))), (USHLLvvi_8B VPR64:$Rn, 0)>; -def : Pat<(v4i32 (anyext (v4i16 VPR64:$Rn))), (USHLLvvi_4H VPR64:$Rn, 0)>; -def : Pat<(v2i64 (anyext (v2i32 VPR64:$Rn))), (USHLLvvi_2S VPR64:$Rn, 0)>; - -// Rounding/Saturating shift -class N2VShift_RQ opcode, string asmop, string T, - RegisterOperand VPRC, ValueType Ty, Operand ImmTy, - SDPatternOperator OpNode> - : NeonI_2VShiftImm, - Sched<[WriteFPALU, ReadFPALU]>; - -// shift right (vector by immediate) -multiclass NeonI_N2VShR_RQ opcode, string asmop, - SDPatternOperator OpNode> { - def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, - OpNode> { - let Inst{22} = 0b1; - } -} - -multiclass NeonI_N2VShL_Q opcode, string asmop, - SDPatternOperator OpNode> { - // 64-bit vector types. - def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - // 128-bit vector types. - def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64, - OpNode> { - let Inst{22} = 0b1; - } -} - -// Rounding shift right -defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr", - int_aarch64_neon_vsrshr>; -defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr", - int_aarch64_neon_vurshr>; - -// Saturating shift left unsigned -defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>; - -// Saturating shift left -defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>; -defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>; - -class N2VShiftAdd opcode, string asmop, string T, - RegisterOperand VPRC, ValueType Ty, Operand ImmTy, - SDNode OpNode> - : NeonI_2VShiftImm, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; -} - -// Shift Right accumulate -multiclass NeonI_N2VShRAdd opcode, string asmop, SDNode OpNode> { - def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, - OpNode> { - let Inst{22} = 0b1; - } -} - -// Shift right and accumulate -defm SSRAvvi : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>; -defm USRAvvi : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>; - -// Rounding shift accumulate -class N2VShiftAdd_R opcode, string asmop, string T, - RegisterOperand VPRC, ValueType Ty, Operand ImmTy, - SDPatternOperator OpNode> - : NeonI_2VShiftImm, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; -} - -multiclass NeonI_N2VShRAdd_R opcode, string asmop, - SDPatternOperator OpNode> { - def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, - OpNode> { - let Inst{22} = 0b1; - } -} - -// Rounding shift right and accumulate -defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>; -defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>; - -// Shift insert by immediate -class N2VShiftIns opcode, string asmop, string T, - RegisterOperand VPRC, ValueType Ty, Operand ImmTy, - SDPatternOperator OpNode> - : NeonI_2VShiftImm, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; -} - -// shift left insert (vector by immediate) -multiclass NeonI_N2VShLIns opcode, string asmop> { - def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8, - int_aarch64_neon_vsli> { - let Inst{22-19} = 0b0001; - } - - def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16, - int_aarch64_neon_vsli> { - let Inst{22-20} = 0b001; - } - - def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32, - int_aarch64_neon_vsli> { - let Inst{22-21} = 0b01; - } - - // 128-bit vector types - def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8, - int_aarch64_neon_vsli> { - let Inst{22-19} = 0b0001; - } - - def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16, - int_aarch64_neon_vsli> { - let Inst{22-20} = 0b001; - } - - def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32, - int_aarch64_neon_vsli> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64, - int_aarch64_neon_vsli> { - let Inst{22} = 0b1; - } -} - -// shift right insert (vector by immediate) -multiclass NeonI_N2VShRIns opcode, string asmop> { - // 64-bit vector types. - def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, - int_aarch64_neon_vsri> { - let Inst{22-19} = 0b0001; - } - - def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, - int_aarch64_neon_vsri> { - let Inst{22-20} = 0b001; - } - - def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, - int_aarch64_neon_vsri> { - let Inst{22-21} = 0b01; - } - - // 128-bit vector types - def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, - int_aarch64_neon_vsri> { - let Inst{22-19} = 0b0001; - } - - def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, - int_aarch64_neon_vsri> { - let Inst{22-20} = 0b001; - } - - def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, - int_aarch64_neon_vsri> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, - int_aarch64_neon_vsri> { - let Inst{22} = 0b1; - } -} - -// Shift left and insert -defm SLIvvi : NeonI_N2VShLIns<0b1, 0b01010, "sli">; - -// Shift right and insert -defm SRIvvi : NeonI_N2VShRIns<0b1, 0b01000, "sri">; - -class N2VShR_Narrow opcode, string asmop, string DestT, - string SrcT, Operand ImmTy> - : NeonI_2VShiftImm, - Sched<[WriteFPALU, ReadFPALU]>; - -class N2VShR_Narrow_Hi opcode, string asmop, string DestT, - string SrcT, Operand ImmTy> - : NeonI_2VShiftImm, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; -} - -// left long shift by immediate -multiclass NeonI_N2VShR_Narrow opcode, string asmop> { - def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> { - let Inst{22-19} = 0b0001; - } - - def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> { - let Inst{22-20} = 0b001; - } - - def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> { - let Inst{22-21} = 0b01; - } - - // Shift Narrow High - def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h", - shr_imm8> { - let Inst{22-19} = 0b0001; - } - - def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s", - shr_imm16> { - let Inst{22-20} = 0b001; - } - - def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d", - shr_imm32> { - let Inst{22-21} = 0b01; - } -} - -// Shift right narrow -defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">; - -// Shift right narrow (prefix Q is saturating, prefix R is rounding) -defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">; -defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">; -defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">; -defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">; -defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">; -defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">; -defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">; - -def Neon_combine_2D : PatFrag<(ops node:$Rm, node:$Rn), - (v2i64 (concat_vectors (v1i64 node:$Rm), - (v1i64 node:$Rn)))>; -def Neon_combine_8H : PatFrag<(ops node:$Rm, node:$Rn), - (v8i16 (concat_vectors (v4i16 node:$Rm), - (v4i16 node:$Rn)))>; -def Neon_combine_4S : PatFrag<(ops node:$Rm, node:$Rn), - (v4i32 (concat_vectors (v2i32 node:$Rm), - (v2i32 node:$Rn)))>; -def Neon_combine_4f : PatFrag<(ops node:$Rm, node:$Rn), - (v4f32 (concat_vectors (v2f32 node:$Rm), - (v2f32 node:$Rn)))>; -def Neon_combine_2d : PatFrag<(ops node:$Rm, node:$Rn), - (v2f64 (concat_vectors (v1f64 node:$Rm), - (v1f64 node:$Rn)))>; - -def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs), - (v8i16 (srl (v8i16 node:$lhs), - (v8i16 (Neon_vdup (i32 node:$rhs)))))>; -def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs), - (v4i32 (srl (v4i32 node:$lhs), - (v4i32 (Neon_vdup (i32 node:$rhs)))))>; -def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs), - (v2i64 (srl (v2i64 node:$lhs), - (v2i64 (Neon_vdup (i32 node:$rhs)))))>; -def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs), - (v8i16 (sra (v8i16 node:$lhs), - (v8i16 (Neon_vdup (i32 node:$rhs)))))>; -def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs), - (v4i32 (sra (v4i32 node:$lhs), - (v4i32 (Neon_vdup (i32 node:$rhs)))))>; -def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs), - (v2i64 (sra (v2i64 node:$lhs), - (v2i64 (Neon_vdup (i32 node:$rhs)))))>; - -// Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors) -multiclass Neon_shiftNarrow_patterns { - def : Pat<(v8i8 (trunc (!cast("Neon_" # shr # "Imm8H") VPR128:$Rn, - (i32 shr_imm8:$Imm)))), - (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>; - def : Pat<(v4i16 (trunc (!cast("Neon_" # shr # "Imm4S") VPR128:$Rn, - (i32 shr_imm16:$Imm)))), - (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>; - def : Pat<(v2i32 (trunc (!cast("Neon_" # shr # "Imm2D") VPR128:$Rn, - (i32 shr_imm32:$Imm)))), - (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>; - - def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert - (v8i8 (trunc (!cast("Neon_" # shr # "Imm8H") - VPR128:$Rn, (i32 shr_imm8:$Imm))))))), - (SHRNvvi_16B (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)), - VPR128:$Rn, imm:$Imm)>; - def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert - (v4i16 (trunc (!cast("Neon_" # shr # "Imm4S") - VPR128:$Rn, (i32 shr_imm16:$Imm))))))), - (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), - VPR128:$Rn, imm:$Imm)>; - def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert - (v2i32 (trunc (!cast("Neon_" # shr # "Imm2D") - VPR128:$Rn, (i32 shr_imm32:$Imm))))))), - (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), - VPR128:$Rn, imm:$Imm)>; -} - -multiclass Neon_shiftNarrow_QR_patterns { - def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), shr_imm8:$Imm)), - (!cast(prefix # "_8B") VPR128:$Rn, imm:$Imm)>; - def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), shr_imm16:$Imm)), - (!cast(prefix # "_4H") VPR128:$Rn, imm:$Imm)>; - def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), shr_imm32:$Imm)), - (!cast(prefix # "_2S") VPR128:$Rn, imm:$Imm)>; - - def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), - (v1i64 (bitconvert (v8i8 - (op (v8i16 VPR128:$Rn), shr_imm8:$Imm))))), - (!cast(prefix # "_16B") - (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), - VPR128:$Rn, imm:$Imm)>; - def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), - (v1i64 (bitconvert (v4i16 - (op (v4i32 VPR128:$Rn), shr_imm16:$Imm))))), - (!cast(prefix # "_8H") - (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), - VPR128:$Rn, imm:$Imm)>; - def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), - (v1i64 (bitconvert (v2i32 - (op (v2i64 VPR128:$Rn), shr_imm32:$Imm))))), - (!cast(prefix # "_4S") - (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), - VPR128:$Rn, imm:$Imm)>; -} - -defm : Neon_shiftNarrow_patterns<"lshr">; -defm : Neon_shiftNarrow_patterns<"ashr">; - -defm : Neon_shiftNarrow_QR_patterns; -defm : Neon_shiftNarrow_QR_patterns; -defm : Neon_shiftNarrow_QR_patterns; -defm : Neon_shiftNarrow_QR_patterns; -defm : Neon_shiftNarrow_QR_patterns; -defm : Neon_shiftNarrow_QR_patterns; -defm : Neon_shiftNarrow_QR_patterns; - -// Convert fix-point and float-pointing -class N2VCvt_Fx opcode, string asmop, string T, - RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy, - Operand ImmTy, SDPatternOperator IntOp> - : NeonI_2VShiftImm, - Sched<[WriteFPALU, ReadFPALU]>; - -multiclass NeonI_N2VCvt_Fx2fp opcode, string asmop, - SDPatternOperator IntOp> { - def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32, - shr_imm32, IntOp> { - let Inst{22-21} = 0b01; - } - - def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32, - shr_imm32, IntOp> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64, - shr_imm64, IntOp> { - let Inst{22} = 0b1; - } -} - -multiclass NeonI_N2VCvt_Fp2fx opcode, string asmop, - SDPatternOperator IntOp> { - def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32, - shr_imm32, IntOp> { - let Inst{22-21} = 0b01; - } - - def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32, - shr_imm32, IntOp> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64, - shr_imm64, IntOp> { - let Inst{22} = 0b1; - } -} - -// Convert fixed-point to floating-point -defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf", - int_arm_neon_vcvtfxs2fp>; -defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf", - int_arm_neon_vcvtfxu2fp>; - -// Convert floating-point to fixed-point -defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs", - int_arm_neon_vcvtfp2fxs>; -defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu", - int_arm_neon_vcvtfp2fxu>; - -multiclass Neon_sshll2_0 -{ - def _v8i8 : PatFrag<(ops node:$Rn), - (v8i16 (ext (v8i8 (Neon_High16B node:$Rn))))>; - def _v4i16 : PatFrag<(ops node:$Rn), - (v4i32 (ext (v4i16 (Neon_High8H node:$Rn))))>; - def _v2i32 : PatFrag<(ops node:$Rn), - (v2i64 (ext (v2i32 (Neon_High4S node:$Rn))))>; -} - -defm NI_sext_high : Neon_sshll2_0; -defm NI_zext_high : Neon_sshll2_0; - - -//===----------------------------------------------------------------------===// -// Multiclasses for NeonI_Across -//===----------------------------------------------------------------------===// - -// Variant 1 - -multiclass NeonI_2VAcross_1 opcode, - string asmop, SDPatternOperator opnode> -{ - def _1h8b: NeonI_2VAcross<0b0, u, 0b00, opcode, - (outs FPR16:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd, $Rn.8b", - [(set (v1i16 FPR16:$Rd), - (v1i16 (opnode (v8i8 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode, - (outs FPR16:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd, $Rn.16b", - [(set (v1i16 FPR16:$Rd), - (v1i16 (opnode (v16i8 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _1s4h: NeonI_2VAcross<0b0, u, 0b01, opcode, - (outs FPR32:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd, $Rn.4h", - [(set (v1i32 FPR32:$Rd), - (v1i32 (opnode (v4i16 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _1s8h: NeonI_2VAcross<0b1, u, 0b01, opcode, - (outs FPR32:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd, $Rn.8h", - [(set (v1i32 FPR32:$Rd), - (v1i32 (opnode (v8i16 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - // _1d2s doesn't exist! - - def _1d4s: NeonI_2VAcross<0b1, u, 0b10, opcode, - (outs FPR64:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd, $Rn.4s", - [(set (v1i64 FPR64:$Rd), - (v1i64 (opnode (v4i32 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>; -defm UADDLV : NeonI_2VAcross_1<0b1, 0b00011, "uaddlv", int_aarch64_neon_uaddlv>; - -// Variant 2 - -multiclass NeonI_2VAcross_2 opcode, - string asmop, SDPatternOperator opnode> -{ - def _1b8b: NeonI_2VAcross<0b0, u, 0b00, opcode, - (outs FPR8:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd, $Rn.8b", - [(set (v1i8 FPR8:$Rd), - (v1i8 (opnode (v8i8 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode, - (outs FPR8:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd, $Rn.16b", - [(set (v1i8 FPR8:$Rd), - (v1i8 (opnode (v16i8 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _1h4h: NeonI_2VAcross<0b0, u, 0b01, opcode, - (outs FPR16:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd, $Rn.4h", - [(set (v1i16 FPR16:$Rd), - (v1i16 (opnode (v4i16 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _1h8h: NeonI_2VAcross<0b1, u, 0b01, opcode, - (outs FPR16:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd, $Rn.8h", - [(set (v1i16 FPR16:$Rd), - (v1i16 (opnode (v8i16 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - // _1s2s doesn't exist! - - def _1s4s: NeonI_2VAcross<0b1, u, 0b10, opcode, - (outs FPR32:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd, $Rn.4s", - [(set (v1i32 FPR32:$Rd), - (v1i32 (opnode (v4i32 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>; -defm UMAXV : NeonI_2VAcross_2<0b1, 0b01010, "umaxv", int_aarch64_neon_umaxv>; - -defm SMINV : NeonI_2VAcross_2<0b0, 0b11010, "sminv", int_aarch64_neon_sminv>; -defm UMINV : NeonI_2VAcross_2<0b1, 0b11010, "uminv", int_aarch64_neon_uminv>; - -defm ADDV : NeonI_2VAcross_2<0b0, 0b11011, "addv", int_aarch64_neon_vaddv>; - -// Variant 3 - -multiclass NeonI_2VAcross_3 opcode, bits<2> size, - string asmop, SDPatternOperator opnode> { - def _1s4s: NeonI_2VAcross<0b1, u, size, opcode, - (outs FPR32:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd, $Rn.4s", - [(set (f32 FPR32:$Rd), - (f32 (opnode (v4f32 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv", - int_aarch64_neon_vmaxnmv>; -defm FMINNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b10, "fminnmv", - int_aarch64_neon_vminnmv>; - -defm FMAXV : NeonI_2VAcross_3<0b1, 0b01111, 0b00, "fmaxv", - int_aarch64_neon_vmaxv>; -defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv", - int_aarch64_neon_vminv>; - -// The followings are for instruction class (Perm) - -class NeonI_Permute size, bits<3> opcode, - string asmop, RegisterOperand OpVPR, string OpS, - SDPatternOperator opnode, ValueType Ty> - : NeonI_Perm, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -multiclass NeonI_Perm_pat opcode, string asmop, - SDPatternOperator opnode> { - def _8b : NeonI_Permute<0b0, 0b00, opcode, asmop, - VPR64, "8b", opnode, v8i8>; - def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop, - VPR128, "16b",opnode, v16i8>; - def _4h : NeonI_Permute<0b0, 0b01, opcode, asmop, - VPR64, "4h", opnode, v4i16>; - def _8h : NeonI_Permute<0b1, 0b01, opcode, asmop, - VPR128, "8h", opnode, v8i16>; - def _2s : NeonI_Permute<0b0, 0b10, opcode, asmop, - VPR64, "2s", opnode, v2i32>; - def _4s : NeonI_Permute<0b1, 0b10, opcode, asmop, - VPR128, "4s", opnode, v4i32>; - def _2d : NeonI_Permute<0b1, 0b11, opcode, asmop, - VPR128, "2d", opnode, v2i64>; -} - -defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1", Neon_uzp1>; -defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1", Neon_trn1>; -defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1", Neon_zip1>; -defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2", Neon_uzp2>; -defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2", Neon_trn2>; -defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2", Neon_zip2>; - -multiclass NeonI_Perm_float_pat { - def : Pat<(v2f32 (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))), - (!cast(INS # "_2s") VPR64:$Rn, VPR64:$Rm)>; - - def : Pat<(v4f32 (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))), - (!cast(INS # "_4s") VPR128:$Rn, VPR128:$Rm)>; - - def : Pat<(v2f64 (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))), - (!cast(INS # "_2d") VPR128:$Rn, VPR128:$Rm)>; -} - -defm : NeonI_Perm_float_pat<"UZP1vvv", Neon_uzp1>; -defm : NeonI_Perm_float_pat<"UZP2vvv", Neon_uzp2>; -defm : NeonI_Perm_float_pat<"ZIP1vvv", Neon_zip1>; -defm : NeonI_Perm_float_pat<"ZIP2vvv", Neon_zip2>; -defm : NeonI_Perm_float_pat<"TRN1vvv", Neon_trn1>; -defm : NeonI_Perm_float_pat<"TRN2vvv", Neon_trn2>; - -// The followings are for instruction class (3V Diff) - -// normal long/long2 pattern -class NeonI_3VDL size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, SDPatternOperator ext, - RegisterOperand OpVPR, - ValueType ResTy, ValueType OpTy> - : NeonI_3VDiff, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -multiclass NeonI_3VDL_s opcode, - string asmop, SDPatternOperator opnode, - bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode, sext, VPR64, v8i16, v8i8>; - def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, sext, VPR64, v4i32, v4i16>; - def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, sext, VPR64, v2i64, v2i32>; - } -} - -multiclass NeonI_3VDL2_s opcode, string asmop, - SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b", - opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>; - def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h", - opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>; - def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s", - opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>; - } -} - -multiclass NeonI_3VDL_u opcode, string asmop, - SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode, zext, VPR64, v8i16, v8i8>; - def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, zext, VPR64, v4i32, v4i16>; - def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, zext, VPR64, v2i64, v2i32>; - } -} - -multiclass NeonI_3VDL2_u opcode, string asmop, - SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b", - opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>; - def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h", - opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>; - def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s", - opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>; - } -} - -defm SADDLvvv : NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>; -defm UADDLvvv : NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>; - -defm SADDL2vvv : NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>; -defm UADDL2vvv : NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>; - -defm SSUBLvvv : NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>; -defm USUBLvvv : NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>; - -defm SSUBL2vvv : NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>; -defm USUBL2vvv : NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>; - -// normal wide/wide2 pattern -class NeonI_3VDW size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, SDPatternOperator ext, - RegisterOperand OpVPR, - ValueType ResTy, ValueType OpTy> - : NeonI_3VDiff, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -multiclass NeonI_3VDW_s opcode, string asmop, - SDPatternOperator opnode> { - def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode, sext, VPR64, v8i16, v8i8>; - def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, sext, VPR64, v4i32, v4i16>; - def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, sext, VPR64, v2i64, v2i32>; -} - -defm SADDWvvv : NeonI_3VDW_s<0b0, 0b0001, "saddw", add>; -defm SSUBWvvv : NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>; - -multiclass NeonI_3VDW2_s opcode, string asmop, - SDPatternOperator opnode> { - def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b", - opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>; - def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h", - opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>; - def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s", - opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>; -} - -defm SADDW2vvv : NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>; -defm SSUBW2vvv : NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>; - -multiclass NeonI_3VDW_u opcode, string asmop, - SDPatternOperator opnode> { - def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode, zext, VPR64, v8i16, v8i8>; - def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, zext, VPR64, v4i32, v4i16>; - def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, zext, VPR64, v2i64, v2i32>; -} - -defm UADDWvvv : NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>; -defm USUBWvvv : NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>; - -multiclass NeonI_3VDW2_u opcode, string asmop, - SDPatternOperator opnode> { - def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b", - opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>; - def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h", - opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>; - def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s", - opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>; -} - -defm UADDW2vvv : NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>; -defm USUBW2vvv : NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>; - -// Get the high half part of the vector element. -multiclass NeonI_get_high { - def _8h : PatFrag<(ops node:$Rn), - (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn), - (v8i16 (Neon_vdup (i32 8)))))))>; - def _4s : PatFrag<(ops node:$Rn), - (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn), - (v4i32 (Neon_vdup (i32 16)))))))>; - def _2d : PatFrag<(ops node:$Rn), - (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn), - (v2i64 (Neon_vdup (i32 32)))))))>; -} - -defm NI_get_hi : NeonI_get_high; - -// pattern for addhn/subhn with 2 operands -class NeonI_3VDN_addhn_2Op size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, SDPatternOperator get_hi, - ValueType ResTy, ValueType OpTy> - : NeonI_3VDiff, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -multiclass NeonI_3VDN_addhn_2Op opcode, string asmop, - SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h", - opnode, NI_get_hi_8h, v8i8, v8i16>; - def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s", - opnode, NI_get_hi_4s, v4i16, v4i32>; - def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d", - opnode, NI_get_hi_2d, v2i32, v2i64>; - } -} - -defm ADDHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>; -defm SUBHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>; - -// pattern for operation with 2 operands -class NeonI_3VD_2Op size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, - RegisterOperand ResVPR, RegisterOperand OpVPR, - ValueType ResTy, ValueType OpTy> - : NeonI_3VDiff, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -// normal narrow pattern -multiclass NeonI_3VDN_2Op opcode, string asmop, - SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h", - opnode, VPR64, VPR128, v8i8, v8i16>; - def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s", - opnode, VPR64, VPR128, v4i16, v4i32>; - def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d", - opnode, VPR64, VPR128, v2i32, v2i64>; - } -} - -defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>; -defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>; - -// pattern for acle intrinsic with 3 operands -class NeonI_3VDN_3Op size, bits<4> opcode, - string asmop, string ResS, string OpS> - : NeonI_3VDiff, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; - let neverHasSideEffects = 1; -} - -multiclass NeonI_3VDN_3Op_v1 opcode, string asmop> { - def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">; - def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">; - def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">; -} - -defm ADDHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">; -defm SUBHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">; - -defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">; -defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">; - -// Patterns have to be separate because there's a SUBREG_TO_REG in the output -// part. -class NarrowHighHalfPat - : Pat<(Neon_combine_2D (v1i64 VPR64:$src), - (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn), - (SrcTy VPR128:$Rm)))))), - (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), - VPR128:$Rn, VPR128:$Rm)>; - -// addhn2 patterns -def : NarrowHighHalfPat>; -def : NarrowHighHalfPat>; -def : NarrowHighHalfPat>; - -// subhn2 patterns -def : NarrowHighHalfPat>; -def : NarrowHighHalfPat>; -def : NarrowHighHalfPat>; - -// raddhn2 patterns -def : NarrowHighHalfPat; -def : NarrowHighHalfPat; -def : NarrowHighHalfPat; - -// rsubhn2 patterns -def : NarrowHighHalfPat; -def : NarrowHighHalfPat; -def : NarrowHighHalfPat; - -// pattern that need to extend result -class NeonI_3VDL_Ext size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, - RegisterOperand OpVPR, - ValueType ResTy, ValueType OpTy, ValueType OpSTy> - : NeonI_3VDiff, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -multiclass NeonI_3VDL_zext opcode, string asmop, - SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode, VPR64, v8i16, v8i8, v8i8>; - def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, VPR64, v4i32, v4i16, v4i16>; - def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, VPR64, v2i64, v2i32, v2i32>; - } -} - -defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>; -defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>; - -multiclass NeonI_Op_High { - def _16B : PatFrag<(ops node:$Rn, node:$Rm), - (op (v8i8 (Neon_High16B node:$Rn)), - (v8i8 (Neon_High16B node:$Rm)))>; - def _8H : PatFrag<(ops node:$Rn, node:$Rm), - (op (v4i16 (Neon_High8H node:$Rn)), - (v4i16 (Neon_High8H node:$Rm)))>; - def _4S : PatFrag<(ops node:$Rn, node:$Rm), - (op (v2i32 (Neon_High4S node:$Rn)), - (v2i32 (Neon_High4S node:$Rm)))>; -} - -defm NI_sabdl_hi : NeonI_Op_High; -defm NI_uabdl_hi : NeonI_Op_High; -defm NI_smull_hi : NeonI_Op_High; -defm NI_umull_hi : NeonI_Op_High; -defm NI_qdmull_hi : NeonI_Op_High; -defm NI_pmull_hi : NeonI_Op_High; - -multiclass NeonI_3VDL_Abd_u opcode, string asmop, string opnode, - bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h8b : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b", - !cast(opnode # "_16B"), - VPR128, v8i16, v16i8, v8i8>; - def _4s4h : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h", - !cast(opnode # "_8H"), - VPR128, v4i32, v8i16, v4i16>; - def _2d2s : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s", - !cast(opnode # "_4S"), - VPR128, v2i64, v4i32, v2i32>; - } -} - -defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>; -defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>; - -// For pattern that need two operators being chained. -class NeonI_3VDL_Aba size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, SDPatternOperator subop, - RegisterOperand OpVPR, - ValueType ResTy, ValueType OpTy, ValueType OpSTy> - : NeonI_3VDiff, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; -} - -multiclass NeonI_3VDL_Aba_v1 opcode, string asmop, - SDPatternOperator opnode, SDPatternOperator subop>{ - def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode, subop, VPR64, v8i16, v8i8, v8i8>; - def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, subop, VPR64, v4i32, v4i16, v4i16>; - def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, subop, VPR64, v2i64, v2i32, v2i32>; -} - -defm SABALvvv : NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal", - add, int_arm_neon_vabds>; -defm UABALvvv : NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal", - add, int_arm_neon_vabdu>; - -multiclass NeonI_3VDL2_Aba_v1 opcode, string asmop, - SDPatternOperator opnode, string subop> { - def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b", - opnode, !cast(subop # "_16B"), - VPR128, v8i16, v16i8, v8i8>; - def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h", - opnode, !cast(subop # "_8H"), - VPR128, v4i32, v8i16, v4i16>; - def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s", - opnode, !cast(subop # "_4S"), - VPR128, v2i64, v4i32, v2i32>; -} - -defm SABAL2vvv : NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add, - "NI_sabdl_hi">; -defm UABAL2vvv : NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add, - "NI_uabdl_hi">; - -// Long pattern with 2 operands -multiclass NeonI_3VDL_2Op opcode, string asmop, - SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable, - SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { - def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode, VPR128, VPR64, v8i16, v8i8>; - def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, VPR128, VPR64, v4i32, v4i16>; - def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, VPR128, VPR64, v2i64, v2i32>; - } -} - -defm SMULLvvv : NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>; -defm UMULLvvv : NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>; - -class NeonI_3VDL2_2Op_mull size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, - ValueType ResTy, ValueType OpTy> - : NeonI_3VDiff, - Sched<[WriteFPMul, ReadFPMul, ReadFPMul]>; - -multiclass NeonI_3VDL2_2Op_mull_v1 opcode, string asmop, - string opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b", - !cast(opnode # "_16B"), - v8i16, v16i8>; - def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h", - !cast(opnode # "_8H"), - v4i32, v8i16>; - def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s", - !cast(opnode # "_4S"), - v2i64, v4i32>; - } -} - -defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2", - "NI_smull_hi", 1>; -defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2", - "NI_umull_hi", 1>; - -// Long pattern with 3 operands -class NeonI_3VDL_3Op size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, - ValueType ResTy, ValueType OpTy> - : NeonI_3VDiff, - Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> { - let Constraints = "$src = $Rd"; -} - -multiclass NeonI_3VDL_3Op_v1 opcode, string asmop, - SDPatternOperator opnode> { - def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode, v8i16, v8i8>; - def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, v4i32, v4i16>; - def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, v2i64, v2i32>; -} - -def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm), - (add node:$Rd, - (int_arm_neon_vmulls node:$Rn, node:$Rm))>; - -def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm), - (add node:$Rd, - (int_arm_neon_vmullu node:$Rn, node:$Rm))>; - -def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm), - (sub node:$Rd, - (int_arm_neon_vmulls node:$Rn, node:$Rm))>; - -def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm), - (sub node:$Rd, - (int_arm_neon_vmullu node:$Rn, node:$Rm))>; - -defm SMLALvvv : NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>; -defm UMLALvvv : NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>; - -defm SMLSLvvv : NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>; -defm UMLSLvvv : NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>; - -class NeonI_3VDL2_3Op_mlas size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator subop, SDPatternOperator opnode, - RegisterOperand OpVPR, - ValueType ResTy, ValueType OpTy> - : NeonI_3VDiff, - Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> { - let Constraints = "$src = $Rd"; -} - -multiclass NeonI_3VDL2_3Op_mlas_v1 opcode, string asmop, - SDPatternOperator subop, string opnode> { - def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b", - subop, !cast(opnode # "_16B"), - VPR128, v8i16, v16i8>; - def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h", - subop, !cast(opnode # "_8H"), - VPR128, v4i32, v8i16>; - def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s", - subop, !cast(opnode # "_4S"), - VPR128, v2i64, v4i32>; -} - -defm SMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2", - add, "NI_smull_hi">; -defm UMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2", - add, "NI_umull_hi">; - -defm SMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2", - sub, "NI_smull_hi">; -defm UMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2", - sub, "NI_umull_hi">; - -multiclass NeonI_3VDL_qdmlal_3Op_v2 opcode, string asmop, - SDPatternOperator opnode> { - def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, int_arm_neon_vqdmull, - VPR64, v4i32, v4i16>; - def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, int_arm_neon_vqdmull, - VPR64, v2i64, v2i32>; -} - -defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal", - int_arm_neon_vqadds>; -defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl", - int_arm_neon_vqsubs>; - -multiclass NeonI_3VDL_v2 opcode, string asmop, - SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, VPR128, VPR64, v4i32, v4i16>; - def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, VPR128, VPR64, v2i64, v2i32>; - } -} - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { -defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull", - int_arm_neon_vqdmull, 1>; -} - -multiclass NeonI_3VDL2_2Op_mull_v2 opcode, string asmop, - string opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h", - !cast(opnode # "_8H"), - v4i32, v8i16>; - def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s", - !cast(opnode # "_4S"), - v2i64, v4i32>; - } -} - -defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2", - "NI_qdmull_hi", 1>; - -multiclass NeonI_3VDL2_3Op_qdmlal_v2 opcode, string asmop, - SDPatternOperator opnode> { - def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h", - opnode, NI_qdmull_hi_8H, - VPR128, v4i32, v8i16>; - def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s", - opnode, NI_qdmull_hi_4S, - VPR128, v2i64, v4i32>; -} - -defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2", - int_arm_neon_vqadds>; -defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2", - int_arm_neon_vqsubs>; - -multiclass NeonI_3VDL_v3 opcode, string asmop, - SDPatternOperator opnode_8h8b, - SDPatternOperator opnode_1q1d, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode_8h8b, VPR128, VPR64, v8i16, v8i8>; - - def _1q1d : NeonI_3VD_2Op<0b0, u, 0b11, opcode, asmop, "1q", "1d", - opnode_1q1d, VPR128, VPR64, v16i8, v1i64>; - } -} - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in -defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp, - int_aarch64_neon_vmull_p64, 1>; - -multiclass NeonI_3VDL2_2Op_mull_v3 opcode, string asmop, - string opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b", - !cast(opnode # "_16B"), - v8i16, v16i8>; - - def _1q2d : - NeonI_3VDiff<0b1, u, 0b11, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.1q, $Rn.2d, $Rm.2d", - [(set (v16i8 VPR128:$Rd), - (v16i8 (int_aarch64_neon_vmull_p64 - (v1i64 (scalar_to_vector - (i64 (vector_extract (v2i64 VPR128:$Rn), 1)))), - (v1i64 (scalar_to_vector - (i64 (vector_extract (v2i64 VPR128:$Rm), 1)))))))], - NoItinerary>, - Sched<[WriteFPMul, ReadFPMul, ReadFPMul]>; - } - - def : Pat<(v16i8 (int_aarch64_neon_vmull_p64 - (v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 1))), - (v1i64 (extract_subvector (v2i64 VPR128:$Rm), (i64 1))))), - (!cast(NAME # "_1q2d") VPR128:$Rn, VPR128:$Rm)>; -} - -defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", "NI_pmull_hi", - 1>; - -// End of implementation for instruction class (3V Diff) - -// The followings are vector load/store multiple N-element structure -// (class SIMD lselem). - -// ld1: load multiple 1-element structure to 1/2/3/4 registers. -// ld2/ld3/ld4: load multiple N-element structure to N registers (N = 2, 3, 4). -// The structure consists of a sequence of sets of N values. -// The first element of the structure is placed in the first lane -// of the first first vector, the second element in the first lane -// of the second vector, and so on. -// E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into -// the three 64-bit vectors list {BA, DC, FE}. -// E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three -// 64-bit vectors list {DA, EB, FC}. -// Store instructions store multiple structure to N registers like load. - - -class NeonI_LDVList opcode, bits<2> size, - RegisterOperand VecList, string asmop> - : NeonI_LdStMult, - Sched<[WriteVecLd, ReadVecLd]> { - let mayLoad = 1; - let neverHasSideEffects = 1; -} - -multiclass LDVList_BHSD opcode, string List, string asmop> { - def _8B : NeonI_LDVList<0, opcode, 0b00, - !cast(List # "8B_operand"), asmop>; - - def _4H : NeonI_LDVList<0, opcode, 0b01, - !cast(List # "4H_operand"), asmop>; - - def _2S : NeonI_LDVList<0, opcode, 0b10, - !cast(List # "2S_operand"), asmop>; - - def _16B : NeonI_LDVList<1, opcode, 0b00, - !cast(List # "16B_operand"), asmop>; - - def _8H : NeonI_LDVList<1, opcode, 0b01, - !cast(List # "8H_operand"), asmop>; - - def _4S : NeonI_LDVList<1, opcode, 0b10, - !cast(List # "4S_operand"), asmop>; - - def _2D : NeonI_LDVList<1, opcode, 0b11, - !cast(List # "2D_operand"), asmop>; -} - -// Load multiple N-element structure to N consecutive registers (N = 1,2,3,4) -defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">; -def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">; - -defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">; - -defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">; - -defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">; - -// Load multiple 1-element structure to N consecutive registers (N = 2,3,4) -defm LD1x2 : LDVList_BHSD<0b1010, "VPair", "ld1">; -def LD1x2_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">; - -defm LD1x3 : LDVList_BHSD<0b0110, "VTriple", "ld1">; -def LD1x3_1D : NeonI_LDVList<0, 0b0110, 0b11, VTriple1D_operand, "ld1">; - -defm LD1x4 : LDVList_BHSD<0b0010, "VQuad", "ld1">; -def LD1x4_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">; - -class NeonI_STVList opcode, bits<2> size, - RegisterOperand VecList, string asmop> - : NeonI_LdStMult, - Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> { - let mayStore = 1; - let neverHasSideEffects = 1; -} - -multiclass STVList_BHSD opcode, string List, string asmop> { - def _8B : NeonI_STVList<0, opcode, 0b00, - !cast(List # "8B_operand"), asmop>; - - def _4H : NeonI_STVList<0, opcode, 0b01, - !cast(List # "4H_operand"), asmop>; - - def _2S : NeonI_STVList<0, opcode, 0b10, - !cast(List # "2S_operand"), asmop>; - - def _16B : NeonI_STVList<1, opcode, 0b00, - !cast(List # "16B_operand"), asmop>; - - def _8H : NeonI_STVList<1, opcode, 0b01, - !cast(List # "8H_operand"), asmop>; - - def _4S : NeonI_STVList<1, opcode, 0b10, - !cast(List # "4S_operand"), asmop>; - - def _2D : NeonI_STVList<1, opcode, 0b11, - !cast(List # "2D_operand"), asmop>; -} - -// Store multiple N-element structures from N registers (N = 1,2,3,4) -defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">; -def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">; - -defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">; - -defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">; - -defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">; - -// Store multiple 1-element structures from N consecutive registers (N = 2,3,4) -defm ST1x2 : STVList_BHSD<0b1010, "VPair", "st1">; -def ST1x2_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">; - -defm ST1x3 : STVList_BHSD<0b0110, "VTriple", "st1">; -def ST1x3_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">; - -defm ST1x4 : STVList_BHSD<0b0010, "VQuad", "st1">; -def ST1x4_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">; - -def : Pat<(v2f64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>; -def : Pat<(v2i64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>; - -def : Pat<(v4f32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>; -def : Pat<(v4i32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>; - -def : Pat<(v8i16 (load GPR64xsp:$addr)), (LD1_8H GPR64xsp:$addr)>; -def : Pat<(v16i8 (load GPR64xsp:$addr)), (LD1_16B GPR64xsp:$addr)>; - -def : Pat<(v1f64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>; -def : Pat<(v1i64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>; - -def : Pat<(v2f32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>; -def : Pat<(v2i32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>; - -def : Pat<(v4i16 (load GPR64xsp:$addr)), (LD1_4H GPR64xsp:$addr)>; -def : Pat<(v8i8 (load GPR64xsp:$addr)), (LD1_8B GPR64xsp:$addr)>; - -def : Pat<(store (v2i64 VPR128:$value), GPR64xsp:$addr), - (ST1_2D GPR64xsp:$addr, VPR128:$value)>; -def : Pat<(store (v2f64 VPR128:$value), GPR64xsp:$addr), - (ST1_2D GPR64xsp:$addr, VPR128:$value)>; - -def : Pat<(store (v4i32 VPR128:$value), GPR64xsp:$addr), - (ST1_4S GPR64xsp:$addr, VPR128:$value)>; -def : Pat<(store (v4f32 VPR128:$value), GPR64xsp:$addr), - (ST1_4S GPR64xsp:$addr, VPR128:$value)>; - -def : Pat<(store (v8i16 VPR128:$value), GPR64xsp:$addr), - (ST1_8H GPR64xsp:$addr, VPR128:$value)>; -def : Pat<(store (v16i8 VPR128:$value), GPR64xsp:$addr), - (ST1_16B GPR64xsp:$addr, VPR128:$value)>; - -def : Pat<(store (v1i64 VPR64:$value), GPR64xsp:$addr), - (ST1_1D GPR64xsp:$addr, VPR64:$value)>; -def : Pat<(store (v1f64 VPR64:$value), GPR64xsp:$addr), - (ST1_1D GPR64xsp:$addr, VPR64:$value)>; - -def : Pat<(store (v2i32 VPR64:$value), GPR64xsp:$addr), - (ST1_2S GPR64xsp:$addr, VPR64:$value)>; -def : Pat<(store (v2f32 VPR64:$value), GPR64xsp:$addr), - (ST1_2S GPR64xsp:$addr, VPR64:$value)>; - -def : Pat<(store (v4i16 VPR64:$value), GPR64xsp:$addr), - (ST1_4H GPR64xsp:$addr, VPR64:$value)>; -def : Pat<(store (v8i8 VPR64:$value), GPR64xsp:$addr), - (ST1_8B GPR64xsp:$addr, VPR64:$value)>; - -// Match load/store of v1i8/v1i16/v1i32 type to FPR8/FPR16/FPR32 load/store. -// FIXME: for now we have v1i8, v1i16, v1i32 legal types, if they are illegal, -// these patterns are not needed any more. -def : Pat<(v1i8 (load GPR64xsp:$addr)), (LSFP8_LDR $addr, 0)>; -def : Pat<(v1i16 (load GPR64xsp:$addr)), (LSFP16_LDR $addr, 0)>; -def : Pat<(v1i32 (load GPR64xsp:$addr)), (LSFP32_LDR $addr, 0)>; - -def : Pat<(store (v1i8 FPR8:$value), GPR64xsp:$addr), - (LSFP8_STR $value, $addr, 0)>; -def : Pat<(store (v1i16 FPR16:$value), GPR64xsp:$addr), - (LSFP16_STR $value, $addr, 0)>; -def : Pat<(store (v1i32 FPR32:$value), GPR64xsp:$addr), - (LSFP32_STR $value, $addr, 0)>; - - -// End of vector load/store multiple N-element structure(class SIMD lselem) - -// The followings are post-index vector load/store multiple N-element -// structure(class SIMD lselem-post) -def exact1_asmoperand : AsmOperandClass { - let Name = "Exact1"; - let PredicateMethod = "isExactImm<1>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact1 : Operand, ImmLeaf { - let ParserMatchClass = exact1_asmoperand; -} - -def exact2_asmoperand : AsmOperandClass { - let Name = "Exact2"; - let PredicateMethod = "isExactImm<2>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact2 : Operand, ImmLeaf { - let ParserMatchClass = exact2_asmoperand; -} - -def exact3_asmoperand : AsmOperandClass { - let Name = "Exact3"; - let PredicateMethod = "isExactImm<3>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact3 : Operand, ImmLeaf { - let ParserMatchClass = exact3_asmoperand; -} - -def exact4_asmoperand : AsmOperandClass { - let Name = "Exact4"; - let PredicateMethod = "isExactImm<4>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact4 : Operand, ImmLeaf { - let ParserMatchClass = exact4_asmoperand; -} - -def exact6_asmoperand : AsmOperandClass { - let Name = "Exact6"; - let PredicateMethod = "isExactImm<6>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact6 : Operand, ImmLeaf { - let ParserMatchClass = exact6_asmoperand; -} - -def exact8_asmoperand : AsmOperandClass { - let Name = "Exact8"; - let PredicateMethod = "isExactImm<8>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact8 : Operand, ImmLeaf { - let ParserMatchClass = exact8_asmoperand; -} - -def exact12_asmoperand : AsmOperandClass { - let Name = "Exact12"; - let PredicateMethod = "isExactImm<12>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact12 : Operand, ImmLeaf { - let ParserMatchClass = exact12_asmoperand; -} - -def exact16_asmoperand : AsmOperandClass { - let Name = "Exact16"; - let PredicateMethod = "isExactImm<16>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact16 : Operand, ImmLeaf { - let ParserMatchClass = exact16_asmoperand; -} - -def exact24_asmoperand : AsmOperandClass { - let Name = "Exact24"; - let PredicateMethod = "isExactImm<24>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact24 : Operand, ImmLeaf { - let ParserMatchClass = exact24_asmoperand; -} - -def exact32_asmoperand : AsmOperandClass { - let Name = "Exact32"; - let PredicateMethod = "isExactImm<32>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact32 : Operand, ImmLeaf { - let ParserMatchClass = exact32_asmoperand; -} - -def exact48_asmoperand : AsmOperandClass { - let Name = "Exact48"; - let PredicateMethod = "isExactImm<48>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact48 : Operand, ImmLeaf { - let ParserMatchClass = exact48_asmoperand; -} - -def exact64_asmoperand : AsmOperandClass { - let Name = "Exact64"; - let PredicateMethod = "isExactImm<64>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact64 : Operand, ImmLeaf { - let ParserMatchClass = exact64_asmoperand; -} - -multiclass NeonI_LDWB_VList opcode, bits<2> size, - RegisterOperand VecList, Operand ImmTy, - string asmop> { - let Constraints = "$Rn = $wb", mayLoad = 1, neverHasSideEffects = 1, - DecoderMethod = "DecodeVLDSTPostInstruction" in { - def _fixed : NeonI_LdStMult_Post, - Sched<[WriteVecLd, WriteVecLd, ReadVecLd]> { - let Rm = 0b11111; - } - - def _register : NeonI_LdStMult_Post, - Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd]>; - } -} - -multiclass LDWB_VList_BHSD opcode, string List, Operand ImmTy, - Operand ImmTy2, string asmop> { - defm _8B : NeonI_LDWB_VList<0, opcode, 0b00, - !cast(List # "8B_operand"), - ImmTy, asmop>; - - defm _4H : NeonI_LDWB_VList<0, opcode, 0b01, - !cast(List # "4H_operand"), - ImmTy, asmop>; - - defm _2S : NeonI_LDWB_VList<0, opcode, 0b10, - !cast(List # "2S_operand"), - ImmTy, asmop>; - - defm _16B : NeonI_LDWB_VList<1, opcode, 0b00, - !cast(List # "16B_operand"), - ImmTy2, asmop>; - - defm _8H : NeonI_LDWB_VList<1, opcode, 0b01, - !cast(List # "8H_operand"), - ImmTy2, asmop>; - - defm _4S : NeonI_LDWB_VList<1, opcode, 0b10, - !cast(List # "4S_operand"), - ImmTy2, asmop>; - - defm _2D : NeonI_LDWB_VList<1, opcode, 0b11, - !cast(List # "2D_operand"), - ImmTy2, asmop>; -} - -// Post-index load multiple N-element structures from N registers (N = 1,2,3,4) -defm LD1WB : LDWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "ld1">; -defm LD1WB_1D : NeonI_LDWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8, - "ld1">; - -defm LD2WB : LDWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "ld2">; - -defm LD3WB : LDWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48, - "ld3">; - -defm LD4WB : LDWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "ld4">; - -// Post-index load multiple 1-element structures from N consecutive registers -// (N = 2,3,4) -defm LD1x2WB : LDWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32, - "ld1">; -defm LD1x2WB_1D : NeonI_LDWB_VList<0, 0b1010, 0b11, VPair1D_operand, - uimm_exact16, "ld1">; - -defm LD1x3WB : LDWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48, - "ld1">; -defm LD1x3WB_1D : NeonI_LDWB_VList<0, 0b0110, 0b11, VTriple1D_operand, - uimm_exact24, "ld1">; - -defm LD1x4WB : LDWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64, - "ld1">; -defm LD1x4WB_1D : NeonI_LDWB_VList<0, 0b0010, 0b11, VQuad1D_operand, - uimm_exact32, "ld1">; - -multiclass NeonI_STWB_VList opcode, bits<2> size, - RegisterOperand VecList, Operand ImmTy, - string asmop> { - let Constraints = "$Rn = $wb", mayStore = 1, neverHasSideEffects = 1, - DecoderMethod = "DecodeVLDSTPostInstruction" in { - def _fixed : NeonI_LdStMult_Post, - Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> { - let Rm = 0b11111; - } - - def _register : NeonI_LdStMult_Post, - Sched<[WriteVecSt, ReadVecSt, ReadVecSt, ReadVecSt]>; - } -} - -multiclass STWB_VList_BHSD opcode, string List, Operand ImmTy, - Operand ImmTy2, string asmop> { - defm _8B : NeonI_STWB_VList<0, opcode, 0b00, - !cast(List # "8B_operand"), ImmTy, asmop>; - - defm _4H : NeonI_STWB_VList<0, opcode, 0b01, - !cast(List # "4H_operand"), - ImmTy, asmop>; - - defm _2S : NeonI_STWB_VList<0, opcode, 0b10, - !cast(List # "2S_operand"), - ImmTy, asmop>; - - defm _16B : NeonI_STWB_VList<1, opcode, 0b00, - !cast(List # "16B_operand"), - ImmTy2, asmop>; - - defm _8H : NeonI_STWB_VList<1, opcode, 0b01, - !cast(List # "8H_operand"), - ImmTy2, asmop>; - - defm _4S : NeonI_STWB_VList<1, opcode, 0b10, - !cast(List # "4S_operand"), - ImmTy2, asmop>; - - defm _2D : NeonI_STWB_VList<1, opcode, 0b11, - !cast(List # "2D_operand"), - ImmTy2, asmop>; -} - -// Post-index load multiple N-element structures from N registers (N = 1,2,3,4) -defm ST1WB : STWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "st1">; -defm ST1WB_1D : NeonI_STWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8, - "st1">; - -defm ST2WB : STWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "st2">; - -defm ST3WB : STWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48, - "st3">; - -defm ST4WB : STWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "st4">; - -// Post-index load multiple 1-element structures from N consecutive registers -// (N = 2,3,4) -defm ST1x2WB : STWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32, - "st1">; -defm ST1x2WB_1D : NeonI_STWB_VList<0, 0b1010, 0b11, VPair1D_operand, - uimm_exact16, "st1">; - -defm ST1x3WB : STWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48, - "st1">; -defm ST1x3WB_1D : NeonI_STWB_VList<0, 0b0110, 0b11, VTriple1D_operand, - uimm_exact24, "st1">; - -defm ST1x4WB : STWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64, - "st1">; -defm ST1x4WB_1D : NeonI_STWB_VList<0, 0b0010, 0b11, VQuad1D_operand, - uimm_exact32, "st1">; - -// End of post-index vector load/store multiple N-element structure -// (class SIMD lselem-post) - -// The followings are vector load/store single N-element structure -// (class SIMD lsone). -def neon_uimm0_bare : Operand, - ImmLeaf { - let ParserMatchClass = neon_uimm0_asmoperand; - let PrintMethod = "printUImmBareOperand"; -} - -def neon_uimm1_bare : Operand, - ImmLeaf { - let ParserMatchClass = neon_uimm1_asmoperand; - let PrintMethod = "printUImmBareOperand"; -} - -def neon_uimm2_bare : Operand, - ImmLeaf { - let ParserMatchClass = neon_uimm2_asmoperand; - let PrintMethod = "printUImmBareOperand"; -} - -def neon_uimm3_bare : Operand, - ImmLeaf { - let ParserMatchClass = uimm3_asmoperand; - let PrintMethod = "printUImmBareOperand"; -} - -def neon_uimm4_bare : Operand, - ImmLeaf { - let ParserMatchClass = uimm4_asmoperand; - let PrintMethod = "printUImmBareOperand"; -} - -class NeonI_LDN_Dup opcode, bits<2> size, - RegisterOperand VecList, string asmop> - : NeonI_LdOne_Dup, - Sched<[WriteVecLd, ReadVecLd]> { - let mayLoad = 1; - let neverHasSideEffects = 1; -} - -multiclass LDN_Dup_BHSD opcode, string List, string asmop> { - def _8B : NeonI_LDN_Dup<0, r, opcode, 0b00, - !cast(List # "8B_operand"), asmop>; - - def _4H : NeonI_LDN_Dup<0, r, opcode, 0b01, - !cast(List # "4H_operand"), asmop>; - - def _2S : NeonI_LDN_Dup<0, r, opcode, 0b10, - !cast(List # "2S_operand"), asmop>; - - def _1D : NeonI_LDN_Dup<0, r, opcode, 0b11, - !cast(List # "1D_operand"), asmop>; - - def _16B : NeonI_LDN_Dup<1, r, opcode, 0b00, - !cast(List # "16B_operand"), asmop>; - - def _8H : NeonI_LDN_Dup<1, r, opcode, 0b01, - !cast(List # "8H_operand"), asmop>; - - def _4S : NeonI_LDN_Dup<1, r, opcode, 0b10, - !cast(List # "4S_operand"), asmop>; - - def _2D : NeonI_LDN_Dup<1, r, opcode, 0b11, - !cast(List # "2D_operand"), asmop>; -} - -// Load single 1-element structure to all lanes of 1 register -defm LD1R : LDN_Dup_BHSD<0b0, 0b110, "VOne", "ld1r">; - -// Load single N-element structure to all lanes of N consecutive -// registers (N = 2,3,4) -defm LD2R : LDN_Dup_BHSD<0b1, 0b110, "VPair", "ld2r">; -defm LD3R : LDN_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r">; -defm LD4R : LDN_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r">; - - -class LD1R_pattern - : Pat<(VTy (Neon_vdup (DTy (LoadOp GPR64xsp:$Rn)))), - (VTy (INST GPR64xsp:$Rn))>; - -// Match all LD1R instructions -def : LD1R_pattern; - -def : LD1R_pattern; - -def : LD1R_pattern; - -def : LD1R_pattern; - -def : LD1R_pattern; -def : LD1R_pattern; - -def : LD1R_pattern; -def : LD1R_pattern; - -def : LD1R_pattern; -def : LD1R_pattern; - -class LD1R_pattern_v1 - : Pat<(VTy (scalar_to_vector (DTy (LoadOp GPR64xsp:$Rn)))), - (VTy (INST GPR64xsp:$Rn))>; - -def : LD1R_pattern_v1; -def : LD1R_pattern_v1; - -multiclass VectorList_Bare_BHSD { - defm B : VectorList_operands; - defm H : VectorList_operands; - defm S : VectorList_operands; - defm D : VectorList_operands; -} - -// Special vector list operand of 128-bit vectors with bare layout. -// i.e. only show ".b", ".h", ".s", ".d" -defm VOne : VectorList_Bare_BHSD<"VOne", 1, FPR128>; -defm VPair : VectorList_Bare_BHSD<"VPair", 2, QPair>; -defm VTriple : VectorList_Bare_BHSD<"VTriple", 3, QTriple>; -defm VQuad : VectorList_Bare_BHSD<"VQuad", 4, QQuad>; - -class NeonI_LDN_Lane op2_1, bit op0, RegisterOperand VList, - Operand ImmOp, string asmop> - : NeonI_LdStOne_Lane<1, r, op2_1, op0, - (outs VList:$Rt), - (ins GPR64xsp:$Rn, VList:$src, ImmOp:$lane), - asmop # "\t$Rt[$lane], [$Rn]", - [], - NoItinerary>, - Sched<[WriteVecLd, ReadVecLd, ReadVecLd]> { - let mayLoad = 1; - let neverHasSideEffects = 1; - let hasExtraDefRegAllocReq = 1; - let Constraints = "$src = $Rt"; -} - -multiclass LDN_Lane_BHSD { - def _B : NeonI_LDN_Lane(List # "B_operand"), - neon_uimm4_bare, asmop> { - let Inst{12-10} = lane{2-0}; - let Inst{30} = lane{3}; - } - - def _H : NeonI_LDN_Lane(List # "H_operand"), - neon_uimm3_bare, asmop> { - let Inst{12-10} = {lane{1}, lane{0}, 0b0}; - let Inst{30} = lane{2}; - } - - def _S : NeonI_LDN_Lane(List # "S_operand"), - neon_uimm2_bare, asmop> { - let Inst{12-10} = {lane{0}, 0b0, 0b0}; - let Inst{30} = lane{1}; - } - - def _D : NeonI_LDN_Lane(List # "D_operand"), - neon_uimm1_bare, asmop> { - let Inst{12-10} = 0b001; - let Inst{30} = lane{0}; - } -} - -// Load single 1-element structure to one lane of 1 register. -defm LD1LN : LDN_Lane_BHSD<0b0, 0b0, "VOne", "ld1">; - -// Load single N-element structure to one lane of N consecutive registers -// (N = 2,3,4) -defm LD2LN : LDN_Lane_BHSD<0b1, 0b0, "VPair", "ld2">; -defm LD3LN : LDN_Lane_BHSD<0b0, 0b1, "VTriple", "ld3">; -defm LD4LN : LDN_Lane_BHSD<0b1, 0b1, "VQuad", "ld4">; - -multiclass LD1LN_patterns { - def : Pat<(VTy (vector_insert (VTy VPR64:$src), - (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp:$lane))), - (VTy (EXTRACT_SUBREG - (INST GPR64xsp:$Rn, - (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), - ImmOp:$lane), - sub_64))>; - - def : Pat<(VTy2 (vector_insert (VTy2 VPR128:$src), - (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp2:$lane))), - (VTy2 (INST GPR64xsp:$Rn, VPR128:$src, ImmOp2:$lane))>; -} - -// Match all LD1LN instructions -defm : LD1LN_patterns; - -defm : LD1LN_patterns; - -defm : LD1LN_patterns; -defm : LD1LN_patterns; - -defm : LD1LN_patterns; -defm : LD1LN_patterns; - -class NeonI_STN_Lane op2_1, bit op0, RegisterOperand VList, - Operand ImmOp, string asmop> - : NeonI_LdStOne_Lane<0, r, op2_1, op0, - (outs), (ins GPR64xsp:$Rn, VList:$Rt, ImmOp:$lane), - asmop # "\t$Rt[$lane], [$Rn]", - [], - NoItinerary>, - Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> { - let mayStore = 1; - let neverHasSideEffects = 1; - let hasExtraDefRegAllocReq = 1; -} - -multiclass STN_Lane_BHSD { - def _B : NeonI_STN_Lane(List # "B_operand"), - neon_uimm4_bare, asmop> { - let Inst{12-10} = lane{2-0}; - let Inst{30} = lane{3}; - } - - def _H : NeonI_STN_Lane(List # "H_operand"), - neon_uimm3_bare, asmop> { - let Inst{12-10} = {lane{1}, lane{0}, 0b0}; - let Inst{30} = lane{2}; - } - - def _S : NeonI_STN_Lane(List # "S_operand"), - neon_uimm2_bare, asmop> { - let Inst{12-10} = {lane{0}, 0b0, 0b0}; - let Inst{30} = lane{1}; - } - - def _D : NeonI_STN_Lane(List # "D_operand"), - neon_uimm1_bare, asmop>{ - let Inst{12-10} = 0b001; - let Inst{30} = lane{0}; - } -} - -// Store single 1-element structure from one lane of 1 register. -defm ST1LN : STN_Lane_BHSD<0b0, 0b0, "VOne", "st1">; - -// Store single N-element structure from one lane of N consecutive registers -// (N = 2,3,4) -defm ST2LN : STN_Lane_BHSD<0b1, 0b0, "VPair", "st2">; -defm ST3LN : STN_Lane_BHSD<0b0, 0b1, "VTriple", "st3">; -defm ST4LN : STN_Lane_BHSD<0b1, 0b1, "VQuad", "st4">; - -multiclass ST1LN_patterns { - def : Pat<(StoreOp (DTy (vector_extract (VTy VPR64:$Rt), ImmOp:$lane)), - GPR64xsp:$Rn), - (INST GPR64xsp:$Rn, - (SUBREG_TO_REG (i64 0), VPR64:$Rt, sub_64), - ImmOp:$lane)>; - - def : Pat<(StoreOp (DTy (vector_extract (VTy2 VPR128:$Rt), ImmOp2:$lane)), - GPR64xsp:$Rn), - (INST GPR64xsp:$Rn, VPR128:$Rt, ImmOp2:$lane)>; -} - -// Match all ST1LN instructions -defm : ST1LN_patterns; - -defm : ST1LN_patterns; - -defm : ST1LN_patterns; -defm : ST1LN_patterns; - -defm : ST1LN_patterns; -defm : ST1LN_patterns; - -// End of vector load/store single N-element structure (class SIMD lsone). - - -// The following are post-index load/store single N-element instructions -// (class SIMD lsone-post) - -multiclass NeonI_LDN_WB_Dup opcode, bits<2> size, - RegisterOperand VecList, Operand ImmTy, - string asmop> { - let mayLoad = 1, neverHasSideEffects = 1, Constraints = "$wb = $Rn", - DecoderMethod = "DecodeVLDSTLanePostInstruction" in { - def _fixed : NeonI_LdOne_Dup_Post, - Sched<[WriteVecLd, WriteVecLd, ReadVecLd]> { - let Rm = 0b11111; - } - - def _register : NeonI_LdOne_Dup_Post, - Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd]>; - } -} - -multiclass LDWB_Dup_BHSD opcode, string List, string asmop, - Operand uimm_b, Operand uimm_h, - Operand uimm_s, Operand uimm_d> { - defm _8B : NeonI_LDN_WB_Dup<0, r, opcode, 0b00, - !cast(List # "8B_operand"), - uimm_b, asmop>; - - defm _4H : NeonI_LDN_WB_Dup<0, r, opcode, 0b01, - !cast(List # "4H_operand"), - uimm_h, asmop>; - - defm _2S : NeonI_LDN_WB_Dup<0, r, opcode, 0b10, - !cast(List # "2S_operand"), - uimm_s, asmop>; - - defm _1D : NeonI_LDN_WB_Dup<0, r, opcode, 0b11, - !cast(List # "1D_operand"), - uimm_d, asmop>; - - defm _16B : NeonI_LDN_WB_Dup<1, r, opcode, 0b00, - !cast(List # "16B_operand"), - uimm_b, asmop>; - - defm _8H : NeonI_LDN_WB_Dup<1, r, opcode, 0b01, - !cast(List # "8H_operand"), - uimm_h, asmop>; - - defm _4S : NeonI_LDN_WB_Dup<1, r, opcode, 0b10, - !cast(List # "4S_operand"), - uimm_s, asmop>; - - defm _2D : NeonI_LDN_WB_Dup<1, r, opcode, 0b11, - !cast(List # "2D_operand"), - uimm_d, asmop>; -} - -// Post-index load single 1-element structure to all lanes of 1 register -defm LD1R_WB : LDWB_Dup_BHSD<0b0, 0b110, "VOne", "ld1r", uimm_exact1, - uimm_exact2, uimm_exact4, uimm_exact8>; - -// Post-index load single N-element structure to all lanes of N consecutive -// registers (N = 2,3,4) -defm LD2R_WB : LDWB_Dup_BHSD<0b1, 0b110, "VPair", "ld2r", uimm_exact2, - uimm_exact4, uimm_exact8, uimm_exact16>; -defm LD3R_WB : LDWB_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r", uimm_exact3, - uimm_exact6, uimm_exact12, uimm_exact24>; -defm LD4R_WB : LDWB_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r", uimm_exact4, - uimm_exact8, uimm_exact16, uimm_exact32>; - -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1, - Constraints = "$Rn = $wb, $Rt = $src", - DecoderMethod = "DecodeVLDSTLanePostInstruction" in { - class LDN_WBFx_Lane op2_1, bit op0, RegisterOperand VList, - Operand ImmTy, Operand ImmOp, string asmop> - : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0, - (outs VList:$Rt, GPR64xsp:$wb), - (ins GPR64xsp:$Rn, ImmTy:$amt, - VList:$src, ImmOp:$lane), - asmop # "\t$Rt[$lane], [$Rn], $amt", - [], - NoItinerary>, - Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd]> { - let Rm = 0b11111; - } - - class LDN_WBReg_Lane op2_1, bit op0, RegisterOperand VList, - Operand ImmTy, Operand ImmOp, string asmop> - : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0, - (outs VList:$Rt, GPR64xsp:$wb), - (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, - VList:$src, ImmOp:$lane), - asmop # "\t$Rt[$lane], [$Rn], $Rm", - [], - NoItinerary>, - Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd, ReadVecLd]>; -} - -multiclass LD_Lane_WB_BHSD { - def _B_fixed : LDN_WBFx_Lane(List # "B_operand"), - uimm_b, neon_uimm4_bare, asmop> { - let Inst{12-10} = lane{2-0}; - let Inst{30} = lane{3}; - } - - def _B_register : LDN_WBReg_Lane(List # "B_operand"), - uimm_b, neon_uimm4_bare, asmop> { - let Inst{12-10} = lane{2-0}; - let Inst{30} = lane{3}; - } - - def _H_fixed : LDN_WBFx_Lane(List # "H_operand"), - uimm_h, neon_uimm3_bare, asmop> { - let Inst{12-10} = {lane{1}, lane{0}, 0b0}; - let Inst{30} = lane{2}; - } - - def _H_register : LDN_WBReg_Lane(List # "H_operand"), - uimm_h, neon_uimm3_bare, asmop> { - let Inst{12-10} = {lane{1}, lane{0}, 0b0}; - let Inst{30} = lane{2}; - } - - def _S_fixed : LDN_WBFx_Lane(List # "S_operand"), - uimm_s, neon_uimm2_bare, asmop> { - let Inst{12-10} = {lane{0}, 0b0, 0b0}; - let Inst{30} = lane{1}; - } - - def _S_register : LDN_WBReg_Lane(List # "S_operand"), - uimm_s, neon_uimm2_bare, asmop> { - let Inst{12-10} = {lane{0}, 0b0, 0b0}; - let Inst{30} = lane{1}; - } - - def _D_fixed : LDN_WBFx_Lane(List # "D_operand"), - uimm_d, neon_uimm1_bare, asmop> { - let Inst{12-10} = 0b001; - let Inst{30} = lane{0}; - } - - def _D_register : LDN_WBReg_Lane(List # "D_operand"), - uimm_d, neon_uimm1_bare, asmop> { - let Inst{12-10} = 0b001; - let Inst{30} = lane{0}; - } -} - -// Post-index load single 1-element structure to one lane of 1 register. -defm LD1LN_WB : LD_Lane_WB_BHSD<0b0, 0b0, "VOne", "ld1", uimm_exact1, - uimm_exact2, uimm_exact4, uimm_exact8>; - -// Post-index load single N-element structure to one lane of N consecutive -// registers -// (N = 2,3,4) -defm LD2LN_WB : LD_Lane_WB_BHSD<0b1, 0b0, "VPair", "ld2", uimm_exact2, - uimm_exact4, uimm_exact8, uimm_exact16>; -defm LD3LN_WB : LD_Lane_WB_BHSD<0b0, 0b1, "VTriple", "ld3", uimm_exact3, - uimm_exact6, uimm_exact12, uimm_exact24>; -defm LD4LN_WB : LD_Lane_WB_BHSD<0b1, 0b1, "VQuad", "ld4", uimm_exact4, - uimm_exact8, uimm_exact16, uimm_exact32>; - -let mayStore = 1, neverHasSideEffects = 1, - hasExtraDefRegAllocReq = 1, Constraints = "$Rn = $wb", - DecoderMethod = "DecodeVLDSTLanePostInstruction" in { - class STN_WBFx_Lane op2_1, bit op0, RegisterOperand VList, - Operand ImmTy, Operand ImmOp, string asmop> - : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0, - (outs GPR64xsp:$wb), - (ins GPR64xsp:$Rn, ImmTy:$amt, - VList:$Rt, ImmOp:$lane), - asmop # "\t$Rt[$lane], [$Rn], $amt", - [], - NoItinerary>, - Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> { - let Rm = 0b11111; - } - - class STN_WBReg_Lane op2_1, bit op0, RegisterOperand VList, - Operand ImmTy, Operand ImmOp, string asmop> - : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0, - (outs GPR64xsp:$wb), - (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VList:$Rt, - ImmOp:$lane), - asmop # "\t$Rt[$lane], [$Rn], $Rm", - [], - NoItinerary>, - Sched<[WriteVecSt, ReadVecSt, ReadVecSt, ReadVecSt]>; -} - -multiclass ST_Lane_WB_BHSD { - def _B_fixed : STN_WBFx_Lane(List # "B_operand"), - uimm_b, neon_uimm4_bare, asmop> { - let Inst{12-10} = lane{2-0}; - let Inst{30} = lane{3}; - } - - def _B_register : STN_WBReg_Lane(List # "B_operand"), - uimm_b, neon_uimm4_bare, asmop> { - let Inst{12-10} = lane{2-0}; - let Inst{30} = lane{3}; - } - - def _H_fixed : STN_WBFx_Lane(List # "H_operand"), - uimm_h, neon_uimm3_bare, asmop> { - let Inst{12-10} = {lane{1}, lane{0}, 0b0}; - let Inst{30} = lane{2}; - } - - def _H_register : STN_WBReg_Lane(List # "H_operand"), - uimm_h, neon_uimm3_bare, asmop> { - let Inst{12-10} = {lane{1}, lane{0}, 0b0}; - let Inst{30} = lane{2}; - } - - def _S_fixed : STN_WBFx_Lane(List # "S_operand"), - uimm_s, neon_uimm2_bare, asmop> { - let Inst{12-10} = {lane{0}, 0b0, 0b0}; - let Inst{30} = lane{1}; - } - - def _S_register : STN_WBReg_Lane(List # "S_operand"), - uimm_s, neon_uimm2_bare, asmop> { - let Inst{12-10} = {lane{0}, 0b0, 0b0}; - let Inst{30} = lane{1}; - } - - def _D_fixed : STN_WBFx_Lane(List # "D_operand"), - uimm_d, neon_uimm1_bare, asmop> { - let Inst{12-10} = 0b001; - let Inst{30} = lane{0}; - } - - def _D_register : STN_WBReg_Lane(List # "D_operand"), - uimm_d, neon_uimm1_bare, asmop> { - let Inst{12-10} = 0b001; - let Inst{30} = lane{0}; - } -} - -// Post-index store single 1-element structure from one lane of 1 register. -defm ST1LN_WB : ST_Lane_WB_BHSD<0b0, 0b0, "VOne", "st1", uimm_exact1, - uimm_exact2, uimm_exact4, uimm_exact8>; - -// Post-index store single N-element structure from one lane of N consecutive -// registers (N = 2,3,4) -defm ST2LN_WB : ST_Lane_WB_BHSD<0b1, 0b0, "VPair", "st2", uimm_exact2, - uimm_exact4, uimm_exact8, uimm_exact16>; -defm ST3LN_WB : ST_Lane_WB_BHSD<0b0, 0b1, "VTriple", "st3", uimm_exact3, - uimm_exact6, uimm_exact12, uimm_exact24>; -defm ST4LN_WB : ST_Lane_WB_BHSD<0b1, 0b1, "VQuad", "st4", uimm_exact4, - uimm_exact8, uimm_exact16, uimm_exact32>; - -// End of post-index load/store single N-element instructions -// (class SIMD lsone-post) - -// Neon Scalar instructions implementation -// Scalar Three Same - -class NeonI_Scalar3Same_size size, bits<5> opcode, string asmop, - RegisterClass FPRC> - : NeonI_Scalar3Same, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -class NeonI_Scalar3Same_D_size opcode, string asmop> - : NeonI_Scalar3Same_size; - -multiclass NeonI_Scalar3Same_HS_sizes opcode, string asmop, - bit Commutable = 0> { - let isCommutable = Commutable in { - def hhh : NeonI_Scalar3Same_size; - def sss : NeonI_Scalar3Same_size; - } -} - -multiclass NeonI_Scalar3Same_SD_sizes opcode, - string asmop, bit Commutable = 0> { - let isCommutable = Commutable in { - def sss : NeonI_Scalar3Same_size; - def ddd : NeonI_Scalar3Same_size; - } -} - -multiclass NeonI_Scalar3Same_BHSD_sizes opcode, - string asmop, bit Commutable = 0> { - let isCommutable = Commutable in { - def bbb : NeonI_Scalar3Same_size; - def hhh : NeonI_Scalar3Same_size; - def sss : NeonI_Scalar3Same_size; - def ddd : NeonI_Scalar3Same_size; - } -} - -multiclass Neon_Scalar3Same_D_size_patterns { - def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), - (INSTD FPR64:$Rn, FPR64:$Rm)>; -} - -multiclass Neon_Scalar3Same_BHSD_size_patterns - : Neon_Scalar3Same_D_size_patterns { - def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))), - (INSTB FPR8:$Rn, FPR8:$Rm)>; - def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), - (INSTH FPR16:$Rn, FPR16:$Rm)>; - def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), - (INSTS FPR32:$Rn, FPR32:$Rm)>; -} - -multiclass Neon_Scalar3Same_HS_size_patterns { - def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), - (INSTH FPR16:$Rn, FPR16:$Rm)>; - def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), - (INSTS FPR32:$Rn, FPR32:$Rm)>; -} - -multiclass Neon_Scalar3Same_SD_size_patterns { - def : Pat<(SResTy (opnode (STy FPR32:$Rn), (STy FPR32:$Rm))), - (INSTS FPR32:$Rn, FPR32:$Rm)>; - def : Pat<(DResTy (opnode (DTy FPR64:$Rn), (DTy FPR64:$Rm))), - (INSTD FPR64:$Rn, FPR64:$Rm)>; -} - -class Neon_Scalar3Same_cmp_V1_D_size_patterns - : Pat<(v1i64 (Neon_cmp (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), CC)), - (INSTD FPR64:$Rn, FPR64:$Rm)>; - -// Scalar Three Different - -class NeonI_Scalar3Diff_size size, bits<4> opcode, string asmop, - RegisterClass FPRCD, RegisterClass FPRCS> - : NeonI_Scalar3Diff, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -multiclass NeonI_Scalar3Diff_HS_size opcode, string asmop> { - def shh : NeonI_Scalar3Diff_size; - def dss : NeonI_Scalar3Diff_size; -} - -multiclass NeonI_Scalar3Diff_ml_HS_size opcode, string asmop> { - let Constraints = "$Src = $Rd" in { - def shh : NeonI_Scalar3Diff, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]>; - def dss : NeonI_Scalar3Diff, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]>; - } -} - -multiclass Neon_Scalar3Diff_HS_size_patterns { - def : Pat<(v1i32 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), - (INSTH FPR16:$Rn, FPR16:$Rm)>; - def : Pat<(v1i64 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), - (INSTS FPR32:$Rn, FPR32:$Rm)>; -} - -multiclass Neon_Scalar3Diff_ml_HS_size_patterns { - def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), - (INSTH FPR32:$Src, FPR16:$Rn, FPR16:$Rm)>; - def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), - (INSTS FPR64:$Src, FPR32:$Rn, FPR32:$Rm)>; -} - -// Scalar Two Registers Miscellaneous - -class NeonI_Scalar2SameMisc_size size, bits<5> opcode, string asmop, - RegisterClass FPRCD, RegisterClass FPRCS> - : NeonI_Scalar2SameMisc, - Sched<[WriteFPALU, ReadFPALU]>; - -multiclass NeonI_Scalar2SameMisc_SD_size opcode, - string asmop> { - def ss : NeonI_Scalar2SameMisc_size; - def dd : NeonI_Scalar2SameMisc_size; -} - -multiclass NeonI_Scalar2SameMisc_D_size opcode, string asmop> { - def dd : NeonI_Scalar2SameMisc_size; -} - -multiclass NeonI_Scalar2SameMisc_BHSD_size opcode, string asmop> - : NeonI_Scalar2SameMisc_D_size { - def bb : NeonI_Scalar2SameMisc_size; - def hh : NeonI_Scalar2SameMisc_size; - def ss : NeonI_Scalar2SameMisc_size; -} - -class NeonI_Scalar2SameMisc_fcvtxn_D_size opcode, string asmop> - : NeonI_Scalar2SameMisc_size; - -multiclass NeonI_Scalar2SameMisc_narrow_HSD_size opcode, - string asmop> { - def bh : NeonI_Scalar2SameMisc_size; - def hs : NeonI_Scalar2SameMisc_size; - def sd : NeonI_Scalar2SameMisc_size; -} - -class NeonI_Scalar2SameMisc_accum_size size, bits<5> opcode, - string asmop, RegisterClass FPRC> - : NeonI_Scalar2SameMisc, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -multiclass NeonI_Scalar2SameMisc_accum_BHSD_size opcode, - string asmop> { - - let Constraints = "$Src = $Rd" in { - def bb : NeonI_Scalar2SameMisc_accum_size; - def hh : NeonI_Scalar2SameMisc_accum_size; - def ss : NeonI_Scalar2SameMisc_accum_size; - def dd : NeonI_Scalar2SameMisc_accum_size; - } -} - -class Neon_Scalar2SameMisc_fcvtxn_D_size_patterns - : Pat<(f32 (opnode (f64 FPR64:$Rn))), - (INSTD FPR64:$Rn)>; - -multiclass Neon_Scalar2SameMisc_fcvt_SD_size_patterns { - def : Pat<(v1i32 (opnode (f32 FPR32:$Rn))), - (INSTS FPR32:$Rn)>; - def : Pat<(v1i64 (opnode (f64 FPR64:$Rn))), - (INSTD FPR64:$Rn)>; -} - -class Neon_Scalar2SameMisc_vcvt_D_size_patterns - : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), - (INSTD FPR64:$Rn)>; - -multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns { - def : Pat<(f32 (opnode (v1i32 FPR32:$Rn))), - (INSTS FPR32:$Rn)>; - def : Pat<(f64 (opnode (v1i64 FPR64:$Rn))), - (INSTD FPR64:$Rn)>; -} - -multiclass Neon_Scalar2SameMisc_SD_size_patterns { - def : Pat<(f32 (opnode (f32 FPR32:$Rn))), - (INSTS FPR32:$Rn)>; - def : Pat<(f64 (opnode (f64 FPR64:$Rn))), - (INSTD FPR64:$Rn)>; -} - -class Neon_Scalar2SameMisc_V1_D_size_patterns - : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), - (INSTD FPR64:$Rn)>; - -class NeonI_Scalar2SameMisc_cmpz_D_size opcode, string asmop> - : NeonI_Scalar2SameMisc, - Sched<[WriteFPALU, ReadFPALU]>; - -multiclass NeonI_Scalar2SameMisc_cmpz_SD_size opcode, - string asmop> { - def ssi : NeonI_Scalar2SameMisc, - Sched<[WriteFPALU, ReadFPALU]>; - def ddi : NeonI_Scalar2SameMisc, - Sched<[WriteFPALU, ReadFPALU]>; -} - -class Neon_Scalar2SameMisc_cmpz_D_size_patterns - : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), - (v1i64 (bitconvert (v8i8 Neon_AllZero))))), - (INSTD FPR64:$Rn, 0)>; - -class Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns - : Pat<(v1i64 (Neon_cmpz (v1i64 FPR64:$Rn), - (i32 neon_uimm0:$Imm), CC)), - (INSTD FPR64:$Rn, neon_uimm0:$Imm)>; - -multiclass Neon_Scalar2SameMisc_cmpz_SD_size_patterns { - def : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (f32 fpzz32:$FPImm))), - (INSTS FPR32:$Rn, fpzz32:$FPImm)>; - def : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (f32 fpzz32:$FPImm))), - (INSTD FPR64:$Rn, fpzz32:$FPImm)>; - def : Pat<(v1i64 (Neon_cmpz (v1f64 FPR64:$Rn), (f32 fpzz32:$FPImm), CC)), - (INSTD FPR64:$Rn, fpzz32:$FPImm)>; -} - -multiclass Neon_Scalar2SameMisc_D_size_patterns { - def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn))), - (INSTD FPR64:$Rn)>; -} - -multiclass Neon_Scalar2SameMisc_BHSD_size_patterns - : Neon_Scalar2SameMisc_D_size_patterns { - def : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn))), - (INSTB FPR8:$Rn)>; - def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn))), - (INSTH FPR16:$Rn)>; - def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn))), - (INSTS FPR32:$Rn)>; -} - -multiclass Neon_Scalar2SameMisc_narrow_HSD_size_patterns< - SDPatternOperator opnode, - Instruction INSTH, - Instruction INSTS, - Instruction INSTD> { - def : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn))), - (INSTH FPR16:$Rn)>; - def : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn))), - (INSTS FPR32:$Rn)>; - def : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn))), - (INSTD FPR64:$Rn)>; - -} - -multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns< - SDPatternOperator opnode, - Instruction INSTB, - Instruction INSTH, - Instruction INSTS, - Instruction INSTD> { - def : Pat<(v1i8 (opnode (v1i8 FPR8:$Src), (v1i8 FPR8:$Rn))), - (INSTB FPR8:$Src, FPR8:$Rn)>; - def : Pat<(v1i16 (opnode (v1i16 FPR16:$Src), (v1i16 FPR16:$Rn))), - (INSTH FPR16:$Src, FPR16:$Rn)>; - def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i32 FPR32:$Rn))), - (INSTS FPR32:$Src, FPR32:$Rn)>; - def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn))), - (INSTD FPR64:$Src, FPR64:$Rn)>; -} - -// Scalar Shift By Immediate - -class NeonI_ScalarShiftImm_size opcode, string asmop, - RegisterClass FPRC, Operand ImmTy> - : NeonI_ScalarShiftImm, - Sched<[WriteFPALU, ReadFPALU]>; - -multiclass NeonI_ScalarShiftRightImm_D_size opcode, - string asmop> { - def ddi : NeonI_ScalarShiftImm_size { - bits<6> Imm; - let Inst{22} = 0b1; // immh:immb = 1xxxxxx - let Inst{21-16} = Imm; - } -} - -multiclass NeonI_ScalarShiftRightImm_BHSD_size opcode, - string asmop> - : NeonI_ScalarShiftRightImm_D_size { - def bbi : NeonI_ScalarShiftImm_size { - bits<3> Imm; - let Inst{22-19} = 0b0001; // immh:immb = 0001xxx - let Inst{18-16} = Imm; - } - def hhi : NeonI_ScalarShiftImm_size { - bits<4> Imm; - let Inst{22-20} = 0b001; // immh:immb = 001xxxx - let Inst{19-16} = Imm; - } - def ssi : NeonI_ScalarShiftImm_size { - bits<5> Imm; - let Inst{22-21} = 0b01; // immh:immb = 01xxxxx - let Inst{20-16} = Imm; - } -} - -multiclass NeonI_ScalarShiftLeftImm_D_size opcode, - string asmop> { - def ddi : NeonI_ScalarShiftImm_size { - bits<6> Imm; - let Inst{22} = 0b1; // immh:immb = 1xxxxxx - let Inst{21-16} = Imm; - } -} - -multiclass NeonI_ScalarShiftLeftImm_BHSD_size opcode, - string asmop> - : NeonI_ScalarShiftLeftImm_D_size { - def bbi : NeonI_ScalarShiftImm_size { - bits<3> Imm; - let Inst{22-19} = 0b0001; // immh:immb = 0001xxx - let Inst{18-16} = Imm; - } - def hhi : NeonI_ScalarShiftImm_size { - bits<4> Imm; - let Inst{22-20} = 0b001; // immh:immb = 001xxxx - let Inst{19-16} = Imm; - } - def ssi : NeonI_ScalarShiftImm_size { - bits<5> Imm; - let Inst{22-21} = 0b01; // immh:immb = 01xxxxx - let Inst{20-16} = Imm; - } -} - -class NeonI_ScalarShiftRightImm_accum_D_size opcode, string asmop> - : NeonI_ScalarShiftImm, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - bits<6> Imm; - let Inst{22} = 0b1; // immh:immb = 1xxxxxx - let Inst{21-16} = Imm; - let Constraints = "$Src = $Rd"; -} - -class NeonI_ScalarShiftLeftImm_accum_D_size opcode, string asmop> - : NeonI_ScalarShiftImm, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - bits<6> Imm; - let Inst{22} = 0b1; // immh:immb = 1xxxxxx - let Inst{21-16} = Imm; - let Constraints = "$Src = $Rd"; -} - -class NeonI_ScalarShiftImm_narrow_size opcode, string asmop, - RegisterClass FPRCD, RegisterClass FPRCS, - Operand ImmTy> - : NeonI_ScalarShiftImm, - Sched<[WriteFPALU, ReadFPALU]>; - -multiclass NeonI_ScalarShiftImm_narrow_HSD_size opcode, - string asmop> { - def bhi : NeonI_ScalarShiftImm_narrow_size { - bits<3> Imm; - let Inst{22-19} = 0b0001; // immh:immb = 0001xxx - let Inst{18-16} = Imm; - } - def hsi : NeonI_ScalarShiftImm_narrow_size { - bits<4> Imm; - let Inst{22-20} = 0b001; // immh:immb = 001xxxx - let Inst{19-16} = Imm; - } - def sdi : NeonI_ScalarShiftImm_narrow_size { - bits<5> Imm; - let Inst{22-21} = 0b01; // immh:immb = 01xxxxx - let Inst{20-16} = Imm; - } -} - -multiclass NeonI_ScalarShiftImm_cvt_SD_size opcode, string asmop> { - def ssi : NeonI_ScalarShiftImm_size { - bits<5> Imm; - let Inst{22-21} = 0b01; // immh:immb = 01xxxxx - let Inst{20-16} = Imm; - } - def ddi : NeonI_ScalarShiftImm_size { - bits<6> Imm; - let Inst{22} = 0b1; // immh:immb = 1xxxxxx - let Inst{21-16} = Imm; - } -} - -multiclass Neon_ScalarShiftRImm_D_size_patterns { - def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))), - (INSTD FPR64:$Rn, imm:$Imm)>; -} - -multiclass Neon_ScalarShiftLImm_D_size_patterns { - def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shl_imm64:$Imm))), - (INSTD FPR64:$Rn, imm:$Imm)>; -} - -class Neon_ScalarShiftLImm_V1_D_size_patterns - : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), - (v1i64 (Neon_vdup (i32 shl_imm64:$Imm))))), - (INSTD FPR64:$Rn, imm:$Imm)>; - -class Neon_ScalarShiftRImm_V1_D_size_patterns - : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), - (v1i64 (Neon_vdup (i32 shr_imm64:$Imm))))), - (INSTD FPR64:$Rn, imm:$Imm)>; - -multiclass Neon_ScalarShiftLImm_BHSD_size_patterns - : Neon_ScalarShiftLImm_D_size_patterns { - def bbi : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (i32 shl_imm8:$Imm))), - (INSTB FPR8:$Rn, imm:$Imm)>; - def hhi : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (i32 shl_imm16:$Imm))), - (INSTH FPR16:$Rn, imm:$Imm)>; - def ssi : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (i32 shl_imm32:$Imm))), - (INSTS FPR32:$Rn, imm:$Imm)>; -} - -class Neon_ScalarShiftLImm_accum_D_size_patterns - : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn), - (i32 shl_imm64:$Imm))), - (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>; - -class Neon_ScalarShiftRImm_accum_D_size_patterns - : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn), - (i32 shr_imm64:$Imm))), - (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>; - -multiclass Neon_ScalarShiftImm_narrow_HSD_size_patterns< - SDPatternOperator opnode, - Instruction INSTH, - Instruction INSTS, - Instruction INSTD> { - def bhi : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn), (i32 shr_imm16:$Imm))), - (INSTH FPR16:$Rn, imm:$Imm)>; - def hsi : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))), - (INSTS FPR32:$Rn, imm:$Imm)>; - def sdi : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))), - (INSTD FPR64:$Rn, imm:$Imm)>; -} - -multiclass Neon_ScalarShiftImm_scvtf_SD_size_patterns { - def ssi : Pat<(f32 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))), - (INSTS FPR32:$Rn, imm:$Imm)>; - def ddi : Pat<(f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))), - (INSTD FPR64:$Rn, imm:$Imm)>; -} - -multiclass Neon_ScalarShiftImm_fcvts_SD_size_patterns { - def ssi : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (i32 shr_imm32:$Imm))), - (INSTS FPR32:$Rn, imm:$Imm)>; - def ddi : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (i32 shr_imm64:$Imm))), - (INSTD FPR64:$Rn, imm:$Imm)>; -} - -// Scalar Signed Shift Right (Immediate) -defm SSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00000, "sshr">; -defm : Neon_ScalarShiftRImm_D_size_patterns; -// Pattern to match llvm.arm.* intrinsic. -def : Neon_ScalarShiftRImm_V1_D_size_patterns; - -// Scalar Unsigned Shift Right (Immediate) -defm USHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00000, "ushr">; -defm : Neon_ScalarShiftRImm_D_size_patterns; -// Pattern to match llvm.arm.* intrinsic. -def : Neon_ScalarShiftRImm_V1_D_size_patterns; - -// Scalar Signed Rounding Shift Right (Immediate) -defm SRSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00100, "srshr">; -defm : Neon_ScalarShiftRImm_D_size_patterns; - -// Scalar Unigned Rounding Shift Right (Immediate) -defm URSHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00100, "urshr">; -defm : Neon_ScalarShiftRImm_D_size_patterns; - -// Scalar Signed Shift Right and Accumulate (Immediate) -def SSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00010, "ssra">; -def : Neon_ScalarShiftRImm_accum_D_size_patterns - ; - -// Scalar Unsigned Shift Right and Accumulate (Immediate) -def USRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00010, "usra">; -def : Neon_ScalarShiftRImm_accum_D_size_patterns - ; - -// Scalar Signed Rounding Shift Right and Accumulate (Immediate) -def SRSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00110, "srsra">; -def : Neon_ScalarShiftRImm_accum_D_size_patterns - ; - -// Scalar Unsigned Rounding Shift Right and Accumulate (Immediate) -def URSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00110, "ursra">; -def : Neon_ScalarShiftRImm_accum_D_size_patterns - ; - -// Scalar Shift Left (Immediate) -defm SHL : NeonI_ScalarShiftLeftImm_D_size<0b0, 0b01010, "shl">; -defm : Neon_ScalarShiftLImm_D_size_patterns; -// Pattern to match llvm.arm.* intrinsic. -def : Neon_ScalarShiftLImm_V1_D_size_patterns; - -// Signed Saturating Shift Left (Immediate) -defm SQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b0, 0b01110, "sqshl">; -defm : Neon_ScalarShiftLImm_BHSD_size_patterns; -// Pattern to match llvm.arm.* intrinsic. -defm : Neon_ScalarShiftLImm_D_size_patterns; - -// Unsigned Saturating Shift Left (Immediate) -defm UQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01110, "uqshl">; -defm : Neon_ScalarShiftLImm_BHSD_size_patterns; -// Pattern to match llvm.arm.* intrinsic. -defm : Neon_ScalarShiftLImm_D_size_patterns; - -// Signed Saturating Shift Left Unsigned (Immediate) -defm SQSHLU : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01100, "sqshlu">; -defm : Neon_ScalarShiftLImm_BHSD_size_patterns; - -// Shift Right And Insert (Immediate) -def SRI : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b01000, "sri">; -def : Neon_ScalarShiftRImm_accum_D_size_patterns - ; - -// Shift Left And Insert (Immediate) -def SLI : NeonI_ScalarShiftLeftImm_accum_D_size<0b1, 0b01010, "sli">; -def : Neon_ScalarShiftLImm_accum_D_size_patterns - ; - -// Signed Saturating Shift Right Narrow (Immediate) -defm SQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10010, "sqshrn">; -defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns; - -// Unsigned Saturating Shift Right Narrow (Immediate) -defm UQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10010, "uqshrn">; -defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns; - -// Signed Saturating Rounded Shift Right Narrow (Immediate) -defm SQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10011, "sqrshrn">; -defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns; - -// Unsigned Saturating Rounded Shift Right Narrow (Immediate) -defm UQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10011, "uqrshrn">; -defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns; - -// Signed Saturating Shift Right Unsigned Narrow (Immediate) -defm SQSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10000, "sqshrun">; -defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns; - -// Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate) -defm SQRSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10001, "sqrshrun">; -defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns; - -// Scalar Signed Fixed-point Convert To Floating-Point (Immediate) -defm SCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11100, "scvtf">; -defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns; - -// Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate) -defm UCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11100, "ucvtf">; -defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns; - -// Scalar Floating-point Convert To Signed Fixed-point (Immediate) -defm FCVTZS_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11111, "fcvtzs">; -defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns; - -// Scalar Floating-point Convert To Unsigned Fixed-point (Immediate) -defm FCVTZU_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11111, "fcvtzu">; -defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns; - -// Patterns For Convert Instructions Between v1f64 and v1i64 -class Neon_ScalarShiftImm_cvtf_v1f64_pattern - : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))), - (INST FPR64:$Rn, imm:$Imm)>; - -class Neon_ScalarShiftImm_fcvt_v1f64_pattern - : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (i32 shr_imm64:$Imm))), - (INST FPR64:$Rn, imm:$Imm)>; - -def : Neon_ScalarShiftImm_cvtf_v1f64_pattern; - -def : Neon_ScalarShiftImm_cvtf_v1f64_pattern; - -def : Neon_ScalarShiftImm_fcvt_v1f64_pattern; - -def : Neon_ScalarShiftImm_fcvt_v1f64_pattern; - -// Scalar Integer Add -let isCommutable = 1 in { -def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">; -} - -// Scalar Integer Sub -def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">; - -// Pattern for Scalar Integer Add and Sub with D register only -defm : Neon_Scalar3Same_D_size_patterns; -defm : Neon_Scalar3Same_D_size_patterns; - -// Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub -defm : Neon_Scalar3Same_D_size_patterns; -defm : Neon_Scalar3Same_D_size_patterns; -defm : Neon_Scalar3Same_D_size_patterns; -defm : Neon_Scalar3Same_D_size_patterns; - -// Scalar Integer Saturating Add (Signed, Unsigned) -defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>; -defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>; - -// Scalar Integer Saturating Sub (Signed, Unsigned) -defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>; -defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>; - - -// Patterns to match llvm.aarch64.* intrinsic for -// Scalar Integer Saturating Add, Sub (Signed, Unsigned) -defm : Neon_Scalar3Same_BHSD_size_patterns; -defm : Neon_Scalar3Same_BHSD_size_patterns; -defm : Neon_Scalar3Same_BHSD_size_patterns; -defm : Neon_Scalar3Same_BHSD_size_patterns; - -// Scalar Integer Saturating Doubling Multiply Half High -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul, ReadFPMul] in -defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>; - -// Scalar Integer Saturating Rounding Doubling Multiply Half High -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { -defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>; -} - -// Patterns to match llvm.arm.* intrinsic for -// Scalar Integer Saturating Doubling Multiply Half High and -// Scalar Integer Saturating Rounding Doubling Multiply Half High -defm : Neon_Scalar3Same_HS_size_patterns; -defm : Neon_Scalar3Same_HS_size_patterns; - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul, ReadFPMul] in { -// Scalar Floating-point Multiply Extended -defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>; -} - -// Scalar Floating-point Reciprocal Step -defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>; -defm : Neon_Scalar3Same_SD_size_patterns; -def : Pat<(v1f64 (int_arm_neon_vrecps (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FRECPSddd FPR64:$Rn, FPR64:$Rm)>; - -// Scalar Floating-point Reciprocal Square Root Step -defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>; -defm : Neon_Scalar3Same_SD_size_patterns; -def : Pat<(v1f64 (int_arm_neon_vrsqrts (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FRSQRTSddd FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(v1f64 (fsqrt (v1f64 FPR64:$Rn))), (FSQRTdd FPR64:$Rn)>; - -// Patterns to match llvm.aarch64.* intrinsic for -// Scalar Floating-point Multiply Extended, -multiclass Neon_Scalar3Same_MULX_SD_size_patterns { - def : Pat<(f32 (opnode (f32 FPR32:$Rn), (f32 FPR32:$Rm))), - (INSTS FPR32:$Rn, FPR32:$Rm)>; - def : Pat<(f64 (opnode (f64 FPR64:$Rn), (f64 FPR64:$Rm))), - (INSTD FPR64:$Rn, FPR64:$Rm)>; -} - -defm : Neon_Scalar3Same_MULX_SD_size_patterns; -def : Pat<(v1f64 (int_aarch64_neon_vmulx (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FMULXddd FPR64:$Rn, FPR64:$Rm)>; - -// Scalar Integer Shift Left (Signed, Unsigned) -def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">; -def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">; - -// Patterns to match llvm.arm.* intrinsic for -// Scalar Integer Shift Left (Signed, Unsigned) -defm : Neon_Scalar3Same_D_size_patterns; -defm : Neon_Scalar3Same_D_size_patterns; - -// Patterns to match llvm.aarch64.* intrinsic for -// Scalar Integer Shift Left (Signed, Unsigned) -defm : Neon_Scalar3Same_D_size_patterns; -defm : Neon_Scalar3Same_D_size_patterns; - -// Scalar Integer Saturating Shift Left (Signed, Unsigned) -defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>; -defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>; - -// Patterns to match llvm.aarch64.* intrinsic for -// Scalar Integer Saturating Shift Letf (Signed, Unsigned) -defm : Neon_Scalar3Same_BHSD_size_patterns; -defm : Neon_Scalar3Same_BHSD_size_patterns; - -// Patterns to match llvm.arm.* intrinsic for -// Scalar Integer Saturating Shift Letf (Signed, Unsigned) -defm : Neon_Scalar3Same_D_size_patterns; -defm : Neon_Scalar3Same_D_size_patterns; - -// Scalar Integer Rounding Shift Left (Signed, Unsigned) -def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">; -def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">; - -// Patterns to match llvm.aarch64.* intrinsic for -// Scalar Integer Rounding Shift Left (Signed, Unsigned) -defm : Neon_Scalar3Same_D_size_patterns; -defm : Neon_Scalar3Same_D_size_patterns; - -// Patterns to match llvm.arm.* intrinsic for -// Scalar Integer Rounding Shift Left (Signed, Unsigned) -defm : Neon_Scalar3Same_D_size_patterns; -defm : Neon_Scalar3Same_D_size_patterns; - -// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) -defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>; -defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>; - -// Patterns to match llvm.aarch64.* intrinsic for -// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) -defm : Neon_Scalar3Same_BHSD_size_patterns; -defm : Neon_Scalar3Same_BHSD_size_patterns; - -// Patterns to match llvm.arm.* intrinsic for -// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) -defm : Neon_Scalar3Same_D_size_patterns; -defm : Neon_Scalar3Same_D_size_patterns; - -let SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC] in { -// Signed Saturating Doubling Multiply-Add Long -defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">; -} -defm : Neon_Scalar3Diff_ml_HS_size_patterns; - -// Signed Saturating Doubling Multiply-Subtract Long -let SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC] in { -defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">; -} -defm : Neon_Scalar3Diff_ml_HS_size_patterns; - -// Signed Saturating Doubling Multiply Long -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul, ReadFPMul] in { -defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">; -} -defm : Neon_Scalar3Diff_HS_size_patterns; - -// Scalar Signed Integer Convert To Floating-point -defm SCVTF : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">; -defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns; - -// Scalar Unsigned Integer Convert To Floating-point -defm UCVTF : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">; -defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns; - -// Scalar Floating-point Converts -def FCVTXN : NeonI_Scalar2SameMisc_fcvtxn_D_size<0b1, 0b10110, "fcvtxn">; -def : Neon_Scalar2SameMisc_fcvtxn_D_size_patterns; - -defm FCVTNS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11010, "fcvtns">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; - -defm FCVTNU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11010, "fcvtnu">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; - -defm FCVTMS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11011, "fcvtms">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; - -defm FCVTMU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11011, "fcvtmu">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; - -defm FCVTAS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11100, "fcvtas">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; - -defm FCVTAU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11100, "fcvtau">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; - -defm FCVTPS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11010, "fcvtps">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; - -defm FCVTPU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11010, "fcvtpu">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; - -defm FCVTZS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11011, "fcvtzs">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; - -defm FCVTZU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11011, "fcvtzu">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; - -// Patterns For Convert Instructions Between v1f64 and v1i64 -class Neon_Scalar2SameMisc_cvtf_v1f64_pattern - : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn))), (INST FPR64:$Rn)>; - -class Neon_Scalar2SameMisc_fcvt_v1f64_pattern - : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>; - -def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern; -def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern; - -def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern; -def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern; - -// Scalar Floating-point Reciprocal Estimate -defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">; -defm : Neon_Scalar2SameMisc_SD_size_patterns; -def : Neon_Scalar2SameMisc_V1_D_size_patterns; - -// Scalar Floating-point Reciprocal Exponent -defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">; -defm : Neon_Scalar2SameMisc_SD_size_patterns; - -// Scalar Floating-point Reciprocal Square Root Estimate -defm FRSQRTE: NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11101, "frsqrte">; -defm : Neon_Scalar2SameMisc_SD_size_patterns; -def : Neon_Scalar2SameMisc_V1_D_size_patterns; - -// Scalar Floating-point Round -class Neon_ScalarFloatRound_pattern - : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>; - -def : Neon_ScalarFloatRound_pattern; -def : Neon_ScalarFloatRound_pattern; -def : Neon_ScalarFloatRound_pattern; -def : Neon_ScalarFloatRound_pattern; -def : Neon_ScalarFloatRound_pattern; -def : Neon_ScalarFloatRound_pattern; -def : Neon_ScalarFloatRound_pattern; - -// Scalar Integer Compare - -// Scalar Compare Bitwise Equal -def CMEQddd: NeonI_Scalar3Same_D_size<0b1, 0b10001, "cmeq">; -defm : Neon_Scalar3Same_D_size_patterns; - -class Neon_Scalar3Same_cmp_D_size_v1_patterns - : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm), CC)), - (INSTD FPR64:$Rn, FPR64:$Rm)>; - -def : Neon_Scalar3Same_cmp_D_size_v1_patterns; - -// Scalar Compare Signed Greather Than Or Equal -def CMGEddd: NeonI_Scalar3Same_D_size<0b0, 0b00111, "cmge">; -defm : Neon_Scalar3Same_D_size_patterns; -def : Neon_Scalar3Same_cmp_D_size_v1_patterns; - -// Scalar Compare Unsigned Higher Or Same -def CMHSddd: NeonI_Scalar3Same_D_size<0b1, 0b00111, "cmhs">; -defm : Neon_Scalar3Same_D_size_patterns; -def : Neon_Scalar3Same_cmp_D_size_v1_patterns; - -// Scalar Compare Unsigned Higher -def CMHIddd: NeonI_Scalar3Same_D_size<0b1, 0b00110, "cmhi">; -defm : Neon_Scalar3Same_D_size_patterns; -def : Neon_Scalar3Same_cmp_D_size_v1_patterns; - -// Scalar Compare Signed Greater Than -def CMGTddd: NeonI_Scalar3Same_D_size<0b0, 0b00110, "cmgt">; -defm : Neon_Scalar3Same_D_size_patterns; -def : Neon_Scalar3Same_cmp_D_size_v1_patterns; - -// Scalar Compare Bitwise Test Bits -def CMTSTddd: NeonI_Scalar3Same_D_size<0b0, 0b10001, "cmtst">; -defm : Neon_Scalar3Same_D_size_patterns; -defm : Neon_Scalar3Same_D_size_patterns; - -// Scalar Compare Bitwise Equal To Zero -def CMEQddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01001, "cmeq">; -def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; -def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; - -// Scalar Compare Signed Greather Than Or Equal To Zero -def CMGEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01000, "cmge">; -def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; -def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; - -// Scalar Compare Signed Greater Than Zero -def CMGTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01000, "cmgt">; -def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; -def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; - -// Scalar Compare Signed Less Than Or Equal To Zero -def CMLEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01001, "cmle">; -def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; -def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; - -// Scalar Compare Less Than Zero -def CMLTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01010, "cmlt">; -def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; -def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; - -// Scalar Floating-point Compare - -// Scalar Floating-point Compare Mask Equal -defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">; -defm : Neon_Scalar3Same_SD_size_patterns; -def : Neon_Scalar3Same_cmp_V1_D_size_patterns; - -// Scalar Floating-point Compare Mask Equal To Zero -defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">; -defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns; - -// Scalar Floating-point Compare Mask Greater Than Or Equal -defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">; -defm : Neon_Scalar3Same_SD_size_patterns; -def : Neon_Scalar3Same_cmp_V1_D_size_patterns; - -// Scalar Floating-point Compare Mask Greater Than Or Equal To Zero -defm FCMGEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01100, "fcmge">; -defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns; - -// Scalar Floating-point Compare Mask Greather Than -defm FCMGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11100, "fcmgt">; -defm : Neon_Scalar3Same_SD_size_patterns; -def : Neon_Scalar3Same_cmp_V1_D_size_patterns; - -// Scalar Floating-point Compare Mask Greather Than Zero -defm FCMGTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01100, "fcmgt">; -defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns; - -// Scalar Floating-point Compare Mask Less Than Or Equal To Zero -defm FCMLEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01101, "fcmle">; -defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns; - -// Scalar Floating-point Compare Mask Less Than Zero -defm FCMLTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01110, "fcmlt">; -defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns; - -// Scalar Floating-point Absolute Compare Mask Greater Than Or Equal -defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">; -defm : Neon_Scalar3Same_SD_size_patterns; -def : Pat<(v1i64 (int_arm_neon_vacge (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FACGEddd FPR64:$Rn, FPR64:$Rm)>; - -// Scalar Floating-point Absolute Compare Mask Greater Than -defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">; -defm : Neon_Scalar3Same_SD_size_patterns; -def : Pat<(v1i64 (int_arm_neon_vacgt (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FACGTddd FPR64:$Rn, FPR64:$Rm)>; - -// Scalar Floating-point Absolute Difference -defm FABD: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11010, "fabd">; -defm : Neon_Scalar3Same_SD_size_patterns; - -// Scalar Absolute Value -defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">; -defm : Neon_Scalar2SameMisc_D_size_patterns; - -// Scalar Signed Saturating Absolute Value -defm SQABS : NeonI_Scalar2SameMisc_BHSD_size<0b0, 0b00111, "sqabs">; -defm : Neon_Scalar2SameMisc_BHSD_size_patterns; - -// Scalar Negate -defm NEG : NeonI_Scalar2SameMisc_D_size<0b1, 0b01011, "neg">; -defm : Neon_Scalar2SameMisc_D_size_patterns; - -// Scalar Signed Saturating Negate -defm SQNEG : NeonI_Scalar2SameMisc_BHSD_size<0b1, 0b00111, "sqneg">; -defm : Neon_Scalar2SameMisc_BHSD_size_patterns; - -// Scalar Signed Saturating Accumulated of Unsigned Value -defm SUQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b0, 0b00011, "suqadd">; -defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns; - -// Scalar Unsigned Saturating Accumulated of Signed Value -defm USQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b1, 0b00011, "usqadd">; -defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns; - -def : Pat<(v1i64 (int_aarch64_neon_suqadd (v1i64 FPR64:$Src), - (v1i64 FPR64:$Rn))), - (SUQADDdd FPR64:$Src, FPR64:$Rn)>; - -def : Pat<(v1i64 (int_aarch64_neon_usqadd (v1i64 FPR64:$Src), - (v1i64 FPR64:$Rn))), - (USQADDdd FPR64:$Src, FPR64:$Rn)>; - -def : Pat<(v1i64 (int_arm_neon_vabs (v1i64 FPR64:$Rn))), - (ABSdd FPR64:$Rn)>; - -def : Pat<(v1i64 (int_arm_neon_vqabs (v1i64 FPR64:$Rn))), - (SQABSdd FPR64:$Rn)>; - -def : Pat<(v1i64 (int_arm_neon_vqneg (v1i64 FPR64:$Rn))), - (SQNEGdd FPR64:$Rn)>; - -def : Pat<(v1i64 (sub (v1i64 (bitconvert (v8i8 Neon_AllZero))), - (v1i64 FPR64:$Rn))), - (NEGdd FPR64:$Rn)>; - -// Scalar Signed Saturating Extract Unsigned Narrow -defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">; -defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns; - -// Scalar Signed Saturating Extract Narrow -defm SQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b0, 0b10100, "sqxtn">; -defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns; - -// Scalar Unsigned Saturating Extract Narrow -defm UQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10100, "uqxtn">; -defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns; - -// Scalar Reduce Pairwise - -multiclass NeonI_ScalarPair_D_sizes opcode, - string asmop, bit Commutable = 0> { - let isCommutable = Commutable in { - def _D_2D : NeonI_ScalarPair, - Sched<[WriteFPALU, ReadFPALU]>; - } -} - -multiclass NeonI_ScalarPair_SD_sizes opcode, - string asmop, bit Commutable = 0> - : NeonI_ScalarPair_D_sizes { - let isCommutable = Commutable in { - def _S_2S : NeonI_ScalarPair, - Sched<[WriteFPALU, ReadFPALU]>; - } -} - -// Scalar Reduce Addition Pairwise (Integer) with -// Pattern to match llvm.arm.* intrinsic -defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>; - -// Pattern to match llvm.aarch64.* intrinsic for -// Scalar Reduce Addition Pairwise (Integer) -def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))), - (ADDPvv_D_2D VPR128:$Rn)>; -def : Pat<(v1i64 (int_aarch64_neon_vaddv (v2i64 VPR128:$Rn))), - (ADDPvv_D_2D VPR128:$Rn)>; - -// Scalar Reduce Addition Pairwise (Floating Point) -defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>; - -// Scalar Reduce Maximum Pairwise (Floating Point) -defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>; - -// Scalar Reduce Minimum Pairwise (Floating Point) -defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>; - -// Scalar Reduce maxNum Pairwise (Floating Point) -defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>; - -// Scalar Reduce minNum Pairwise (Floating Point) -defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>; - -multiclass Neon_ScalarPair_SD_size_patterns { - def : Pat<(f32 (opnode (v2f32 VPR64:$Rn))), - (INSTS VPR64:$Rn)>; - def : Pat<(f64 (opnode (v2f64 VPR128:$Rn))), - (INSTD VPR128:$Rn)>; -} - -// Patterns to match llvm.aarch64.* intrinsic for -// Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point) -defm : Neon_ScalarPair_SD_size_patterns; - -defm : Neon_ScalarPair_SD_size_patterns; - -defm : Neon_ScalarPair_SD_size_patterns; - -defm : Neon_ScalarPair_SD_size_patterns; - -defm : Neon_ScalarPair_SD_size_patterns; - -def : Pat<(f32 (int_aarch64_neon_vpfadd (v4f32 VPR128:$Rn))), - (FADDPvv_S_2S (v2f32 - (EXTRACT_SUBREG - (v4f32 (FADDP_4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rn))), - sub_64)))>; - -// Scalar by element Arithmetic - -class NeonI_ScalarXIndexedElemArith opcode, - string rmlane, bit u, bit szhi, bit szlo, - RegisterClass ResFPR, RegisterClass OpFPR, - RegisterOperand OpVPR, Operand OpImm> - : NeonI_ScalarXIndexedElem, - Sched<[WriteFPMul, ReadFPMul, ReadFPMul]> { - bits<3> Imm; - bits<5> MRm; -} - -class NeonI_ScalarXIndexedElemArith_Constraint_Impl opcode, - string rmlane, - bit u, bit szhi, bit szlo, - RegisterClass ResFPR, - RegisterClass OpFPR, - RegisterOperand OpVPR, - Operand OpImm> - : NeonI_ScalarXIndexedElem, - Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> { - let Constraints = "$src = $Rd"; - bits<3> Imm; - bits<5> MRm; -} - -// Scalar Floating Point multiply (scalar, by element) -def FMULssv_4S : NeonI_ScalarXIndexedElemArith<"fmul", - 0b1001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def FMULddv_2D : NeonI_ScalarXIndexedElemArith<"fmul", - 0b1001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { - let Inst{11} = Imm{0}; // h - let Inst{21} = 0b0; // l - let Inst{20-16} = MRm; -} - -// Scalar Floating Point multiply extended (scalar, by element) -def FMULXssv_4S : NeonI_ScalarXIndexedElemArith<"fmulx", - 0b1001, ".s", 0b1, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def FMULXddv_2D : NeonI_ScalarXIndexedElemArith<"fmulx", - 0b1001, ".d", 0b1, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { - let Inst{11} = Imm{0}; // h - let Inst{21} = 0b0; // l - let Inst{20-16} = MRm; -} - -multiclass Neon_ScalarXIndexedElem_MUL_MULX_Patterns< - SDPatternOperator opnode, - Instruction INST, - ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm, - ValueType OpNTy, ValueType ExTy, Operand OpNImm> { - - def : Pat<(ResTy (opnode (ResTy FPRC:$Rn), - (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)))), - (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (opnode (ResTy FPRC:$Rn), - (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)))), - (ResTy (INST (ResTy FPRC:$Rn), - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), - OpNImm:$Imm))>; - - // swapped operands - def : Pat<(ResTy (opnode - (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)), - (ResTy FPRC:$Rn))), - (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (opnode - (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)), - (ResTy FPRC:$Rn))), - (ResTy (INST (ResTy FPRC:$Rn), - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), - OpNImm:$Imm))>; -} - -// Patterns for Scalar Floating Point multiply (scalar, by element) -defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns; -defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns; - -// Patterns for Scalar Floating Point multiply extended (scalar, by element) -defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns; -defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns; - -// Scalar Floating Point fused multiply-add (scalar, by element) -def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla", - 0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def FMLAddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla", - 0b0001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { - let Inst{11} = Imm{0}; // h - let Inst{21} = 0b0; // l - let Inst{20-16} = MRm; -} - -// Scalar Floating Point fused multiply-subtract (scalar, by element) -def FMLSssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls", - 0b0101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def FMLSddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls", - 0b0101, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { - let Inst{11} = Imm{0}; // h - let Inst{21} = 0b0; // l - let Inst{20-16} = MRm; -} -// We are allowed to match the fma instruction regardless of compile options. -multiclass Neon_ScalarXIndexedElem_FMA_Patterns< - Instruction FMLAI, Instruction FMLSI, - ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm, - ValueType OpNTy, ValueType ExTy, Operand OpNImm> { - // fmla - def : Pat<(ResTy (fma (ResTy FPRC:$Rn), - (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)), - (ResTy FPRC:$Ra))), - (ResTy (FMLAI (ResTy FPRC:$Ra), - (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (fma (ResTy FPRC:$Rn), - (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)), - (ResTy FPRC:$Ra))), - (ResTy (FMLAI (ResTy FPRC:$Ra), - (ResTy FPRC:$Rn), - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), - OpNImm:$Imm))>; - - // swapped fmla operands - def : Pat<(ResTy (fma - (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)), - (ResTy FPRC:$Rn), - (ResTy FPRC:$Ra))), - (ResTy (FMLAI (ResTy FPRC:$Ra), - (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (fma - (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)), - (ResTy FPRC:$Rn), - (ResTy FPRC:$Ra))), - (ResTy (FMLAI (ResTy FPRC:$Ra), - (ResTy FPRC:$Rn), - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), - OpNImm:$Imm))>; - - // fmls - def : Pat<(ResTy (fma (ResTy FPRC:$Rn), - (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))), - (ResTy FPRC:$Ra))), - (ResTy (FMLSI (ResTy FPRC:$Ra), - (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (fma (ResTy FPRC:$Rn), - (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))), - (ResTy FPRC:$Ra))), - (ResTy (FMLSI (ResTy FPRC:$Ra), - (ResTy FPRC:$Rn), - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), - OpNImm:$Imm))>; - - // swapped fmls operands - def : Pat<(ResTy (fma - (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))), - (ResTy FPRC:$Rn), - (ResTy FPRC:$Ra))), - (ResTy (FMLSI (ResTy FPRC:$Ra), - (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (fma - (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))), - (ResTy FPRC:$Rn), - (ResTy FPRC:$Ra))), - (ResTy (FMLSI (ResTy FPRC:$Ra), - (ResTy FPRC:$Rn), - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), - OpNImm:$Imm))>; -} - -// Scalar Floating Point fused multiply-add and -// multiply-subtract (scalar, by element) -defm : Neon_ScalarXIndexedElem_FMA_Patterns; -defm : Neon_ScalarXIndexedElem_FMA_Patterns; -defm : Neon_ScalarXIndexedElem_FMA_Patterns; - -// Scalar Signed saturating doubling multiply long (scalar, by element) -def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull", - 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull", - 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> { - let Inst{11} = Imm{2}; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull", - 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull", - 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} - -multiclass Neon_ScalarXIndexedElem_MUL_Patterns< - SDPatternOperator opnode, - Instruction INST, - ValueType ResTy, RegisterClass FPRC, - ValueType OpVTy, ValueType OpTy, - ValueType VecOpTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> { - - def : Pat<(ResTy (opnode (OpVTy FPRC:$Rn), - (OpVTy (scalar_to_vector - (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))))), - (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (opnode (OpVTy FPRC:$Rn), - (OpVTy (extract_subvector (VecOpTy VPRC:$MRm), OpImm:$Imm)))), - (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>; - - //swapped operands - def : Pat<(ResTy (opnode - (OpVTy (scalar_to_vector - (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))), - (OpVTy FPRC:$Rn))), - (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (opnode - (OpVTy (extract_subvector (VecOpTy VPRC:$MRm), OpImm:$Imm)), - (OpVTy FPRC:$Rn))), - (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>; -} - - -// Patterns for Scalar Signed saturating doubling -// multiply long (scalar, by element) -defm : Neon_ScalarXIndexedElem_MUL_Patterns; -defm : Neon_ScalarXIndexedElem_MUL_Patterns; -defm : Neon_ScalarXIndexedElem_MUL_Patterns; -defm : Neon_ScalarXIndexedElem_MUL_Patterns; - -// Scalar Signed saturating doubling multiply-add long (scalar, by element) -def SQDMLALshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", - 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMLALshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", - 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> { - let Inst{11} = Imm{2}; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMLALdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", - 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def SQDMLALdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", - 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} - -// Scalar Signed saturating doubling -// multiply-subtract long (scalar, by element) -def SQDMLSLshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", - 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMLSLshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", - 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> { - let Inst{11} = Imm{2}; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMLSLdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", - 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def SQDMLSLdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", - 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} - -multiclass Neon_ScalarXIndexedElem_MLAL_Patterns< - SDPatternOperator opnode, - SDPatternOperator coreopnode, - Instruction INST, - ValueType ResTy, RegisterClass ResFPRC, RegisterClass FPRC, - ValueType OpTy, - ValueType OpVTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> { - - def : Pat<(ResTy (opnode - (ResTy ResFPRC:$Ra), - (ResTy (coreopnode (OpTy FPRC:$Rn), - (OpTy (scalar_to_vector - (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))))))), - (ResTy (INST (ResTy ResFPRC:$Ra), - (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (opnode - (ResTy ResFPRC:$Ra), - (ResTy (coreopnode (OpTy FPRC:$Rn), - (OpTy (extract_subvector (OpVTy VPRC:$MRm), OpImm:$Imm)))))), - (ResTy (INST (ResTy ResFPRC:$Ra), - (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>; - - // swapped operands - def : Pat<(ResTy (opnode - (ResTy ResFPRC:$Ra), - (ResTy (coreopnode - (OpTy (scalar_to_vector - (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))), - (OpTy FPRC:$Rn))))), - (ResTy (INST (ResTy ResFPRC:$Ra), - (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (opnode - (ResTy ResFPRC:$Ra), - (ResTy (coreopnode - (OpTy (extract_subvector (OpVTy VPRC:$MRm), OpImm:$Imm)), - (OpTy FPRC:$Rn))))), - (ResTy (INST (ResTy ResFPRC:$Ra), - (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>; -} - -// Patterns for Scalar Signed saturating -// doubling multiply-add long (scalar, by element) -defm : Neon_ScalarXIndexedElem_MLAL_Patterns; -defm : Neon_ScalarXIndexedElem_MLAL_Patterns; -defm : Neon_ScalarXIndexedElem_MLAL_Patterns; -defm : Neon_ScalarXIndexedElem_MLAL_Patterns; - -// Patterns for Scalar Signed saturating -// doubling multiply-sub long (scalar, by element) -defm : Neon_ScalarXIndexedElem_MLAL_Patterns; -defm : Neon_ScalarXIndexedElem_MLAL_Patterns; -defm : Neon_ScalarXIndexedElem_MLAL_Patterns; -defm : Neon_ScalarXIndexedElem_MLAL_Patterns; - -// Scalar Signed saturating doubling multiply returning -// high half (scalar, by element) -def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh", - 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqdmulh", - 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> { - let Inst{11} = Imm{2}; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqdmulh", - 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def SQDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqdmulh", - 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} - -// Patterns for Scalar Signed saturating doubling multiply returning -// high half (scalar, by element) -defm : Neon_ScalarXIndexedElem_MUL_Patterns; -defm : Neon_ScalarXIndexedElem_MUL_Patterns; -defm : Neon_ScalarXIndexedElem_MUL_Patterns; -defm : Neon_ScalarXIndexedElem_MUL_Patterns; - -// Scalar Signed saturating rounding doubling multiply -// returning high half (scalar, by element) -def SQRDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqrdmulh", - 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQRDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqrdmulh", - 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> { - let Inst{11} = Imm{2}; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQRDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqrdmulh", - 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def SQRDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqrdmulh", - 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} - -defm : Neon_ScalarXIndexedElem_MUL_Patterns; -defm : Neon_ScalarXIndexedElem_MUL_Patterns; -defm : Neon_ScalarXIndexedElem_MUL_Patterns; -defm : Neon_ScalarXIndexedElem_MUL_Patterns; - -// Scalar general arithmetic operation -class Neon_Scalar_GeneralMath2D_pattern - : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>; - -class Neon_Scalar_GeneralMath3D_pattern - : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (INST FPR64:$Rn, FPR64:$Rm)>; - -class Neon_Scalar_GeneralMath4D_pattern - : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), - (v1f64 FPR64:$Ra))), - (INST FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; - -def : Neon_Scalar_GeneralMath3D_pattern; -def : Neon_Scalar_GeneralMath3D_pattern; -def : Neon_Scalar_GeneralMath3D_pattern; -def : Neon_Scalar_GeneralMath3D_pattern; -def : Neon_Scalar_GeneralMath3D_pattern; -def : Neon_Scalar_GeneralMath3D_pattern; -def : Neon_Scalar_GeneralMath3D_pattern; -def : Neon_Scalar_GeneralMath3D_pattern; -def : Neon_Scalar_GeneralMath3D_pattern; - -def : Neon_Scalar_GeneralMath2D_pattern; -def : Neon_Scalar_GeneralMath2D_pattern; - -def : Neon_Scalar_GeneralMath4D_pattern; -def : Neon_Scalar_GeneralMath4D_pattern; - -// Scalar Copy - DUP element to scalar -class NeonI_Scalar_DUP - : NeonI_ScalarCopy<(outs ResRC:$Rd), (ins VPRC:$Rn, OpImm:$Imm), - asmop # "\t$Rd, $Rn." # asmlane # "[$Imm]", - [], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]> { - bits<4> Imm; -} - -def DUPbv_B : NeonI_Scalar_DUP<"dup", "b", FPR8, VPR128, neon_uimm4_bare> { - let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; -} -def DUPhv_H : NeonI_Scalar_DUP<"dup", "h", FPR16, VPR128, neon_uimm3_bare> { - let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; -} -def DUPsv_S : NeonI_Scalar_DUP<"dup", "s", FPR32, VPR128, neon_uimm2_bare> { - let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; -} -def DUPdv_D : NeonI_Scalar_DUP<"dup", "d", FPR64, VPR128, neon_uimm1_bare> { - let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; -} - -def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 0)), - (f32 (EXTRACT_SUBREG (v4f32 VPR128:$Rn), sub_32))>; -def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 1)), - (f32 (DUPsv_S (v4f32 VPR128:$Rn), 1))>; -def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 2)), - (f32 (DUPsv_S (v4f32 VPR128:$Rn), 2))>; -def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 3)), - (f32 (DUPsv_S (v4f32 VPR128:$Rn), 3))>; - -def : Pat<(f64 (vector_extract (v2f64 VPR128:$Rn), 0)), - (f64 (EXTRACT_SUBREG (v2f64 VPR128:$Rn), sub_64))>; -def : Pat<(f64 (vector_extract (v2f64 VPR128:$Rn), 1)), - (f64 (DUPdv_D (v2f64 VPR128:$Rn), 1))>; - -def : Pat<(f32 (vector_extract (v2f32 VPR64:$Rn), 0)), - (f32 (EXTRACT_SUBREG (v2f32 VPR64:$Rn), sub_32))>; -def : Pat<(f32 (vector_extract (v2f32 VPR64:$Rn), 1)), - (f32 (DUPsv_S (v4f32 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - 1))>; - -def : Pat<(f64 (vector_extract (v1f64 VPR64:$Rn), 0)), - (f64 (EXTRACT_SUBREG (v1f64 VPR64:$Rn), sub_64))>; - -multiclass NeonI_Scalar_DUP_Ext_Vec_pattern { - - def : Pat<(ResTy (extract_subvector (OpTy VPR128:$Rn), OpLImm:$Imm)), - (ResTy (DUPI VPR128:$Rn, OpLImm:$Imm))>; - - def : Pat<(ResTy (extract_subvector (NOpTy VPR64:$Rn), OpNImm:$Imm)), - (ResTy (DUPI - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - OpNImm:$Imm))>; -} - -// Patterns for extract subvectors of v1ix data using scalar DUP instructions. -defm : NeonI_Scalar_DUP_Ext_Vec_pattern; -defm : NeonI_Scalar_DUP_Ext_Vec_pattern; -defm : NeonI_Scalar_DUP_Ext_Vec_pattern; - -multiclass NeonI_Scalar_DUP_Copy_pattern1 { - - def : Pat<(ResTy (vector_insert (ResTy undef), - (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)), - (neon_uimm0_bare:$Imm))), - (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>; - - def : Pat<(ResTy (vector_insert (ResTy undef), - (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)), - (OpNImm:$Imm))), - (ResTy (DUPI - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - OpNImm:$Imm))>; -} - -multiclass NeonI_Scalar_DUP_Copy_pattern2 { - - def : Pat<(ResTy (scalar_to_vector - (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)))), - (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>; - - def : Pat<(ResTy (scalar_to_vector - (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)))), - (ResTy (DUPI - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - OpNImm:$Imm))>; -} - -// Patterns for vector copy to v1ix and v1fx vectors using scalar DUP -// instructions. -defm : NeonI_Scalar_DUP_Copy_pattern1; -defm : NeonI_Scalar_DUP_Copy_pattern1; -defm : NeonI_Scalar_DUP_Copy_pattern1; -defm : NeonI_Scalar_DUP_Copy_pattern1; -defm : NeonI_Scalar_DUP_Copy_pattern2; -defm : NeonI_Scalar_DUP_Copy_pattern2; -defm : NeonI_Scalar_DUP_Copy_pattern2; -defm : NeonI_Scalar_DUP_Copy_pattern2; - -multiclass NeonI_Scalar_DUP_alias { - def : NeonInstAlias; -} - -// Aliases for Scalar copy - DUP element (scalar) -// FIXME: This is actually the preferred syntax but TableGen can't deal with -// custom printing of aliases. -defm : NeonI_Scalar_DUP_alias<"mov", ".b", DUPbv_B, neon_uimm4_bare, FPR8>; -defm : NeonI_Scalar_DUP_alias<"mov", ".h", DUPhv_H, neon_uimm3_bare, FPR16>; -defm : NeonI_Scalar_DUP_alias<"mov", ".s", DUPsv_S, neon_uimm2_bare, FPR32>; -defm : NeonI_Scalar_DUP_alias<"mov", ".d", DUPdv_D, neon_uimm1_bare, FPR64>; - -multiclass NeonI_SDUP { - def : Pat<(ResTy (GetLow VPR128:$Rn)), - (ResTy (EXTRACT_SUBREG (OpTy VPR128:$Rn), sub_64))>; - def : Pat<(ResTy (GetHigh VPR128:$Rn)), - (ResTy (DUPdv_D (OpTy VPR128:$Rn), 1))>; -} - -defm : NeonI_SDUP; -defm : NeonI_SDUP; -defm : NeonI_SDUP; -defm : NeonI_SDUP; -defm : NeonI_SDUP; -defm : NeonI_SDUP; - -// The following is for sext/zext from v1xx to v1xx -multiclass NeonI_ext { - // v1i32 -> v1i64 - def : Pat<(v1i64 (ExtOp (v1i32 FPR32:$Rn))), - (EXTRACT_SUBREG - (v2i64 (!cast(prefix # "_2S") - (v2i32 (SUBREG_TO_REG (i64 0), $Rn, sub_32)), 0)), - sub_64)>; - - // v1i16 -> v1i32 - def : Pat<(v1i32 (ExtOp (v1i16 FPR16:$Rn))), - (EXTRACT_SUBREG - (v4i32 (!cast(prefix # "_4H") - (v4i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)), 0)), - sub_32)>; - - // v1i8 -> v1i16 - def : Pat<(v1i16 (ExtOp (v1i8 FPR8:$Rn))), - (EXTRACT_SUBREG - (v8i16 (!cast(prefix # "_8B") - (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)), - sub_16)>; -} - -defm NeonI_zext : NeonI_ext<"USHLLvvi", zext>; -defm NeonI_sext : NeonI_ext<"SSHLLvvi", sext>; - -// zext v1i8 -> v1i32 -def : Pat<(v1i32 (zext (v1i8 FPR8:$Rn))), - (v1i32 (EXTRACT_SUBREG - (v1i64 (SUBREG_TO_REG (i64 0), - (v1i8 (DUPbv_B - (v16i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), - 0)), - sub_8)), - sub_32))>; - -// zext v1i8 -> v1i64 -def : Pat<(v1i64 (zext (v1i8 FPR8:$Rn))), - (v1i64 (SUBREG_TO_REG (i64 0), - (v1i8 (DUPbv_B - (v16i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), - 0)), - sub_8))>; - -// zext v1i16 -> v1i64 -def : Pat<(v1i64 (zext (v1i16 FPR16:$Rn))), - (v1i64 (SUBREG_TO_REG (i64 0), - (v1i16 (DUPhv_H - (v8i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)), - 0)), - sub_16))>; - -// sext v1i8 -> v1i32 -def : Pat<(v1i32 (sext (v1i8 FPR8:$Rn))), - (EXTRACT_SUBREG - (v4i32 (SSHLLvvi_4H - (v4i16 (SUBREG_TO_REG (i64 0), - (v1i16 (EXTRACT_SUBREG - (v8i16 (SSHLLvvi_8B - (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)), - sub_16)), - sub_16)), 0)), - sub_32)>; - -// sext v1i8 -> v1i64 -def : Pat<(v1i64 (sext (v1i8 FPR8:$Rn))), - (EXTRACT_SUBREG - (v2i64 (SSHLLvvi_2S - (v2i32 (SUBREG_TO_REG (i64 0), - (v1i32 (EXTRACT_SUBREG - (v4i32 (SSHLLvvi_4H - (v4i16 (SUBREG_TO_REG (i64 0), - (v1i16 (EXTRACT_SUBREG - (v8i16 (SSHLLvvi_8B - (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)), - sub_16)), - sub_16)), 0)), - sub_32)), - sub_32)), 0)), - sub_64)>; - - -// sext v1i16 -> v1i64 -def : Pat<(v1i64 (sext (v1i16 FPR16:$Rn))), - (EXTRACT_SUBREG - (v2i64 (SSHLLvvi_2S - (v2i32 (SUBREG_TO_REG (i64 0), - (v1i32 (EXTRACT_SUBREG - (v4i32 (SSHLLvvi_4H - (v4i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)), 0)), - sub_32)), - sub_32)), 0)), - sub_64)>; - -//===----------------------------------------------------------------------===// -// Non-Instruction Patterns -//===----------------------------------------------------------------------===// - -// 64-bit vector bitcasts... - -def : Pat<(v1i64 (bitconvert (v8i8 VPR64:$src))), (v1i64 VPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v8i8 VPR64:$src))), (v2f32 VPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v8i8 VPR64:$src))), (v2i32 VPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v8i8 VPR64:$src))), (v4i16 VPR64:$src)>; - -def : Pat<(v1i64 (bitconvert (v4i16 VPR64:$src))), (v1i64 VPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v4i16 VPR64:$src))), (v2i32 VPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v4i16 VPR64:$src))), (v2f32 VPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v4i16 VPR64:$src))), (v8i8 VPR64:$src)>; - -def : Pat<(v1i64 (bitconvert (v2i32 VPR64:$src))), (v1i64 VPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v2i32 VPR64:$src))), (v2f32 VPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v2i32 VPR64:$src))), (v4i16 VPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v2i32 VPR64:$src))), (v8i8 VPR64:$src)>; - -def : Pat<(v1i64 (bitconvert (v2f32 VPR64:$src))), (v1i64 VPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v2f32 VPR64:$src))), (v2i32 VPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v2f32 VPR64:$src))), (v4i16 VPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v2f32 VPR64:$src))), (v8i8 VPR64:$src)>; - -def : Pat<(v2f32 (bitconvert (v1i64 VPR64:$src))), (v2f32 VPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>; - -def : Pat<(v1i64 (bitconvert (v1f64 VPR64:$src))), (v1i64 VPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v1f64 VPR64:$src))), (v2f32 VPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v1f64 VPR64:$src))), (v2i32 VPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v1f64 VPR64:$src))), (v4i16 VPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v1f64 VPR64:$src))), (v8i8 VPR64:$src)>; -def : Pat<(f64 (bitconvert (v1f64 VPR64:$src))), (f64 VPR64:$src)>; - -def : Pat<(v1f64 (bitconvert (v1i64 VPR64:$src))), (v1f64 VPR64:$src)>; -def : Pat<(v1f64 (bitconvert (v2f32 VPR64:$src))), (v1f64 VPR64:$src)>; -def : Pat<(v1f64 (bitconvert (v2i32 VPR64:$src))), (v1f64 VPR64:$src)>; -def : Pat<(v1f64 (bitconvert (v4i16 VPR64:$src))), (v1f64 VPR64:$src)>; -def : Pat<(v1f64 (bitconvert (v8i8 VPR64:$src))), (v1f64 VPR64:$src)>; -def : Pat<(v1f64 (bitconvert (f64 VPR64:$src))), (v1f64 VPR64:$src)>; - -// ..and 128-bit vector bitcasts... - -def : Pat<(v2f64 (bitconvert (v16i8 VPR128:$src))), (v2f64 VPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v16i8 VPR128:$src))), (v2i64 VPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v16i8 VPR128:$src))), (v4f32 VPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v16i8 VPR128:$src))), (v4i32 VPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v16i8 VPR128:$src))), (v8i16 VPR128:$src)>; - -def : Pat<(v2f64 (bitconvert (v8i16 VPR128:$src))), (v2f64 VPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v8i16 VPR128:$src))), (v2i64 VPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v8i16 VPR128:$src))), (v4i32 VPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v8i16 VPR128:$src))), (v4f32 VPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v8i16 VPR128:$src))), (v16i8 VPR128:$src)>; - -def : Pat<(v2f64 (bitconvert (v4i32 VPR128:$src))), (v2f64 VPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v4i32 VPR128:$src))), (v2i64 VPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v4i32 VPR128:$src))), (v4f32 VPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v4i32 VPR128:$src))), (v8i16 VPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v4i32 VPR128:$src))), (v16i8 VPR128:$src)>; - -def : Pat<(v2f64 (bitconvert (v4f32 VPR128:$src))), (v2f64 VPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v4f32 VPR128:$src))), (v2i64 VPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v4f32 VPR128:$src))), (v4i32 VPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v4f32 VPR128:$src))), (v8i16 VPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v4f32 VPR128:$src))), (v16i8 VPR128:$src)>; - -def : Pat<(v2f64 (bitconvert (v2i64 VPR128:$src))), (v2f64 VPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v2i64 VPR128:$src))), (v4f32 VPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v2i64 VPR128:$src))), (v4i32 VPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v2i64 VPR128:$src))), (v8i16 VPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v2i64 VPR128:$src))), (v16i8 VPR128:$src)>; - -def : Pat<(v2i64 (bitconvert (v2f64 VPR128:$src))), (v2i64 VPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v2f64 VPR128:$src))), (v4f32 VPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v2f64 VPR128:$src))), (v4i32 VPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v2f64 VPR128:$src))), (v8i16 VPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>; - -// ...and scalar bitcasts... -def : Pat<(f16 (bitconvert (v1i16 FPR16:$src))), (f16 FPR16:$src)>; -def : Pat<(f32 (bitconvert (v1i32 FPR32:$src))), (f32 FPR32:$src)>; -def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; -def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; - -def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>; -def : Pat<(i64 (bitconvert (v1f64 FPR64:$src))), (FMOVxd $src)>; -def : Pat<(i64 (bitconvert (v2i32 FPR64:$src))), (FMOVxd $src)>; -def : Pat<(i64 (bitconvert (v2f32 FPR64:$src))), (FMOVxd $src)>; -def : Pat<(i64 (bitconvert (v4i16 FPR64:$src))), (FMOVxd $src)>; -def : Pat<(i64 (bitconvert (v8i8 FPR64:$src))), (FMOVxd $src)>; - -def : Pat<(i32 (bitconvert (v1i32 FPR32:$src))), (FMOVws $src)>; - -def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>; - -def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), (f64 VPR64:$src)>; -def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), (f64 VPR64:$src)>; -def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))), (f64 VPR64:$src)>; -def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))), (f64 VPR64:$src)>; -def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))), (f64 VPR64:$src)>; - -def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))), (f128 VPR128:$src)>; -def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))), (f128 VPR128:$src)>; -def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))), (f128 VPR128:$src)>; -def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>; -def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>; -def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>; - -def : Pat<(v1i16 (bitconvert (f16 FPR16:$src))), (v1i16 FPR16:$src)>; -def : Pat<(v1i32 (bitconvert (f32 FPR32:$src))), (v1i32 FPR32:$src)>; -def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; -def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; - -def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; -def : Pat<(v1f64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; -def : Pat<(v2i32 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; -def : Pat<(v2f32 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; -def : Pat<(v4i16 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; -def : Pat<(v8i8 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; - -def : Pat<(v1i32 (bitconvert (i32 GPR32:$src))), (FMOVsw $src)>; - -def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; - -def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; - -// Scalar Three Same - -def neon_uimm3 : Operand, - ImmLeaf { - let ParserMatchClass = uimm3_asmoperand; - let PrintMethod = "printUImmHexOperand"; -} - -def neon_uimm4 : Operand, - ImmLeaf { - let ParserMatchClass = uimm4_asmoperand; - let PrintMethod = "printUImmHexOperand"; -} - -// Bitwise Extract -class NeonI_Extract op2, string asmop, - string OpS, RegisterOperand OpVPR, Operand OpImm> - : NeonI_BitExtract, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>{ - bits<4> Index; -} - -def EXTvvvi_8b : NeonI_Extract<0b0, 0b00, "ext", "8b", - VPR64, neon_uimm3> { - let Inst{14-11} = {0b0, Index{2}, Index{1}, Index{0}}; -} - -def EXTvvvi_16b: NeonI_Extract<0b1, 0b00, "ext", "16b", - VPR128, neon_uimm4> { - let Inst{14-11} = Index; -} - -class NI_Extract - : Pat<(OpTy (Neon_vextract (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm), - (i64 OpImm:$Imm))), - (INST OpVPR:$Rn, OpVPR:$Rm, OpImm:$Imm)>; - -def : NI_Extract; -def : NI_Extract; -def : NI_Extract; -def : NI_Extract; -def : NI_Extract; -def : NI_Extract; -def : NI_Extract; -def : NI_Extract; -def : NI_Extract; -def : NI_Extract; -def : NI_Extract; -def : NI_Extract; - -// Table lookup -class NI_TBL op2, bits<2> len, bit op, - string asmop, string OpS, RegisterOperand OpVPR, - RegisterOperand VecList> - : NeonI_TBL, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -// The vectors in look up table are always 16b -multiclass NI_TBL_pat len, bit op, string asmop, string List> { - def _8b : NI_TBL<0, 0b00, len, op, asmop, "8b", VPR64, - !cast(List # "16B_operand")>; - - def _16b : NI_TBL<1, 0b00, len, op, asmop, "16b", VPR128, - !cast(List # "16B_operand")>; -} - -defm TBL1 : NI_TBL_pat<0b00, 0b0, "tbl", "VOne">; -defm TBL2 : NI_TBL_pat<0b01, 0b0, "tbl", "VPair">; -defm TBL3 : NI_TBL_pat<0b10, 0b0, "tbl", "VTriple">; -defm TBL4 : NI_TBL_pat<0b11, 0b0, "tbl", "VQuad">; - -// Table lookup extension -class NI_TBX op2, bits<2> len, bit op, - string asmop, string OpS, RegisterOperand OpVPR, - RegisterOperand VecList> - : NeonI_TBL, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; -} - -// The vectors in look up table are always 16b -multiclass NI_TBX_pat len, bit op, string asmop, string List> { - def _8b : NI_TBX<0, 0b00, len, op, asmop, "8b", VPR64, - !cast(List # "16B_operand")>; - - def _16b : NI_TBX<1, 0b00, len, op, asmop, "16b", VPR128, - !cast(List # "16B_operand")>; -} - -defm TBX1 : NI_TBX_pat<0b00, 0b1, "tbx", "VOne">; -defm TBX2 : NI_TBX_pat<0b01, 0b1, "tbx", "VPair">; -defm TBX3 : NI_TBX_pat<0b10, 0b1, "tbx", "VTriple">; -defm TBX4 : NI_TBX_pat<0b11, 0b1, "tbx", "VQuad">; - -class NeonI_INS_main - : NeonI_copy<0b1, 0b0, 0b0011, - (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm), - asmop # "\t$Rd." # Res # "[$Imm], $Rn", - [(set (ResTy VPR128:$Rd), - (ResTy (vector_insert - (ResTy VPR128:$src), - (OpTy OpGPR:$Rn), - (OpImm:$Imm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - bits<4> Imm; - let Constraints = "$src = $Rd"; -} - -//Insert element (vector, from main) -def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32, - neon_uimm4_bare> { - let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; -} -def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32, - neon_uimm3_bare> { - let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; -} -def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32, - neon_uimm2_bare> { - let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; -} -def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64, - neon_uimm1_bare> { - let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; -} - -def : NeonInstAlias<"mov $Rd.b[$Imm], $Rn", - (INSbw VPR128:$Rd, GPR32:$Rn, neon_uimm4_bare:$Imm), 0>; -def : NeonInstAlias<"mov $Rd.h[$Imm], $Rn", - (INShw VPR128:$Rd, GPR32:$Rn, neon_uimm3_bare:$Imm), 0>; -def : NeonInstAlias<"mov $Rd.s[$Imm], $Rn", - (INSsw VPR128:$Rd, GPR32:$Rn, neon_uimm2_bare:$Imm), 0>; -def : NeonInstAlias<"mov $Rd.d[$Imm], $Rn", - (INSdx VPR128:$Rd, GPR64:$Rn, neon_uimm1_bare:$Imm), 0>; - -class Neon_INS_main_pattern - : Pat<(ResTy (vector_insert - (ResTy VPR64:$src), - (OpTy OpGPR:$Rn), - (OpImm:$Imm))), - (ResTy (EXTRACT_SUBREG - (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)), - OpGPR:$Rn, OpImm:$Imm)), sub_64))>; - -def INSbw_pattern : Neon_INS_main_pattern; -def INShw_pattern : Neon_INS_main_pattern; -def INSsw_pattern : Neon_INS_main_pattern; -def INSdx_pattern : Neon_INS_main_pattern; - -class NeonI_INS_element - : NeonI_insert<0b1, 0b1, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, - ResImm:$Immd, ResImm:$Immn), - asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]", - [], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; - bits<4> Immd; - bits<4> Immn; -} - -//Insert element (vector, from element) -def INSELb : NeonI_INS_element<"ins", "b", neon_uimm4_bare> { - let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1}; - let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}}; -} -def INSELh : NeonI_INS_element<"ins", "h", neon_uimm3_bare> { - let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0}; - let Inst{14-11} = {Immn{2}, Immn{1}, Immn{0}, 0b0}; - // bit 11 is unspecified, but should be set to zero. -} -def INSELs : NeonI_INS_element<"ins", "s", neon_uimm2_bare> { - let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0}; - let Inst{14-11} = {Immn{1}, Immn{0}, 0b0, 0b0}; - // bits 11-12 are unspecified, but should be set to zero. -} -def INSELd : NeonI_INS_element<"ins", "d", neon_uimm1_bare> { - let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0}; - let Inst{14-11} = {Immn{0}, 0b0, 0b0, 0b0}; - // bits 11-13 are unspecified, but should be set to zero. -} - -def : NeonInstAlias<"mov $Rd.b[$Immd], $Rn.b[$Immn]", - (INSELb VPR128:$Rd, VPR128:$Rn, - neon_uimm4_bare:$Immd, neon_uimm4_bare:$Immn), 0>; -def : NeonInstAlias<"mov $Rd.h[$Immd], $Rn.h[$Immn]", - (INSELh VPR128:$Rd, VPR128:$Rn, - neon_uimm3_bare:$Immd, neon_uimm3_bare:$Immn), 0>; -def : NeonInstAlias<"mov $Rd.s[$Immd], $Rn.s[$Immn]", - (INSELs VPR128:$Rd, VPR128:$Rn, - neon_uimm2_bare:$Immd, neon_uimm2_bare:$Immn), 0>; -def : NeonInstAlias<"mov $Rd.d[$Immd], $Rn.d[$Immn]", - (INSELd VPR128:$Rd, VPR128:$Rn, - neon_uimm1_bare:$Immd, neon_uimm1_bare:$Immn), 0>; - -multiclass Neon_INS_elt_pattern { -def : Pat<(ResTy (vector_insert - (ResTy VPR128:$src), - (MidTy (vector_extract - (ResTy VPR128:$Rn), - (StImm:$Immn))), - (StImm:$Immd))), - (INS (ResTy VPR128:$src), (ResTy VPR128:$Rn), - StImm:$Immd, StImm:$Immn)>; - -def : Pat <(ResTy (vector_insert - (ResTy VPR128:$src), - (MidTy (vector_extract - (NaTy VPR64:$Rn), - (NaImm:$Immn))), - (StImm:$Immd))), - (INS (ResTy VPR128:$src), - (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)), - StImm:$Immd, NaImm:$Immn)>; - -def : Pat <(NaTy (vector_insert - (NaTy VPR64:$src), - (MidTy (vector_extract - (ResTy VPR128:$Rn), - (StImm:$Immn))), - (NaImm:$Immd))), - (NaTy (EXTRACT_SUBREG - (ResTy (INS - (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)), - (ResTy VPR128:$Rn), - NaImm:$Immd, StImm:$Immn)), - sub_64))>; - -def : Pat <(NaTy (vector_insert - (NaTy VPR64:$src), - (MidTy (vector_extract - (NaTy VPR64:$Rn), - (NaImm:$Immn))), - (NaImm:$Immd))), - (NaTy (EXTRACT_SUBREG - (ResTy (INS - (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)), - (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)), - NaImm:$Immd, NaImm:$Immn)), - sub_64))>; -} - -defm : Neon_INS_elt_pattern; -defm : Neon_INS_elt_pattern; -defm : Neon_INS_elt_pattern; -defm : Neon_INS_elt_pattern; -defm : Neon_INS_elt_pattern; -defm : Neon_INS_elt_pattern; - -multiclass Neon_INS_elt_float_pattern { -def : Pat <(ResTy (vector_insert - (ResTy VPR128:$src), - (MidTy OpFPR:$Rn), - (ResImm:$Imm))), - (INS (ResTy VPR128:$src), - (ResTy (SUBREG_TO_REG (i64 0), OpFPR:$Rn, SubIndex)), - ResImm:$Imm, - (i64 0))>; - -def : Pat <(NaTy (vector_insert - (NaTy VPR64:$src), - (MidTy OpFPR:$Rn), - (ResImm:$Imm))), - (NaTy (EXTRACT_SUBREG - (ResTy (INS - (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)), - (ResTy (SUBREG_TO_REG (i64 0), (MidTy OpFPR:$Rn), SubIndex)), - ResImm:$Imm, - (i64 0))), - sub_64))>; -} - -defm : Neon_INS_elt_float_pattern; -defm : Neon_INS_elt_float_pattern; - -class NeonI_SMOV - : NeonI_copy, - Sched<[WriteFPALU, ReadFPALU]> { - bits<4> Imm; -} - -//Signed integer move (main, from element) -def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare, - GPR32, i32> { - let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; -} -def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare, - GPR32, i32> { - let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; -} -def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare, - GPR64, i64> { - let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; -} -def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare, - GPR64, i64> { - let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; -} -def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare, - GPR64, i64> { - let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; -} - -multiclass Neon_SMOVx_pattern { - def : Pat<(i64 (sext_inreg - (i64 (anyext - (i32 (vector_extract - (StTy VPR128:$Rn), (StImm:$Imm))))), - eleTy)), - (SMOVI VPR128:$Rn, StImm:$Imm)>; - - def : Pat<(i64 (sext - (i32 (vector_extract - (StTy VPR128:$Rn), (StImm:$Imm))))), - (SMOVI VPR128:$Rn, StImm:$Imm)>; - - def : Pat<(i64 (sext_inreg - (i64 (vector_extract - (NaTy VPR64:$Rn), (NaImm:$Imm))), - eleTy)), - (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - NaImm:$Imm)>; - - def : Pat<(i64 (sext_inreg - (i64 (anyext - (i32 (vector_extract - (NaTy VPR64:$Rn), (NaImm:$Imm))))), - eleTy)), - (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - NaImm:$Imm)>; - - def : Pat<(i64 (sext - (i32 (vector_extract - (NaTy VPR64:$Rn), (NaImm:$Imm))))), - (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - NaImm:$Imm)>; -} - -defm : Neon_SMOVx_pattern; -defm : Neon_SMOVx_pattern; -defm : Neon_SMOVx_pattern; - -class Neon_SMOVw_pattern - : Pat<(i32 (sext_inreg - (i32 (vector_extract - (NaTy VPR64:$Rn), (NaImm:$Imm))), - eleTy)), - (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - NaImm:$Imm)>; - -def : Neon_SMOVw_pattern; -def : Neon_SMOVw_pattern; - -class NeonI_UMOV - : NeonI_copy, - Sched<[WriteFPALU, ReadFPALU]> { - bits<4> Imm; -} - -//Unsigned integer move (main, from element) -def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare, - GPR32, i32> { - let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; -} -def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare, - GPR32, i32> { - let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; -} -def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare, - GPR32, i32> { - let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; -} -def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare, - GPR64, i64> { - let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; -} - -def : NeonInstAlias<"mov $Rd, $Rn.s[$Imm]", - (UMOVws GPR32:$Rd, VPR128:$Rn, neon_uimm2_bare:$Imm), 0>; -def : NeonInstAlias<"mov $Rd, $Rn.d[$Imm]", - (UMOVxd GPR64:$Rd, VPR128:$Rn, neon_uimm1_bare:$Imm), 0>; - -class Neon_UMOV_pattern - : Pat<(ResTy (vector_extract - (NaTy VPR64:$Rn), NaImm:$Imm)), - (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - NaImm:$Imm)>; - -def : Neon_UMOV_pattern; -def : Neon_UMOV_pattern; -def : Neon_UMOV_pattern; - -def : Pat<(i32 (and - (i32 (vector_extract - (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))), - 255)), - (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>; - -def : Pat<(i32 (and - (i32 (vector_extract - (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))), - 65535)), - (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>; - -def : Pat<(i64 (zext - (i32 (vector_extract - (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))), - (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>; - -def : Pat<(i32 (and - (i32 (vector_extract - (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))), - 255)), - (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64), - neon_uimm3_bare:$Imm)>; - -def : Pat<(i32 (and - (i32 (vector_extract - (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))), - 65535)), - (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64), - neon_uimm2_bare:$Imm)>; - -def : Pat<(i64 (zext - (i32 (vector_extract - (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))), - (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64), - neon_uimm0_bare:$Imm)>; - -// Additional copy patterns for scalar types -def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))), - (UMOVwb (v16i8 - (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>; - -def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))), - (UMOVwh (v8i16 - (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>; - -def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))), - (FMOVws FPR32:$Rn)>; - -def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))), - (FMOVxd FPR64:$Rn)>; - -def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))), - (f64 FPR64:$Rn)>; - -def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)), - (v1i8 (EXTRACT_SUBREG (v16i8 - (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))), - sub_8))>; - -def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)), - (v1i16 (EXTRACT_SUBREG (v8i16 - (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))), - sub_16))>; - -def : Pat<(v1i32 (scalar_to_vector GPR32:$src)), - (FMOVsw $src)>; - -def : Pat<(v1i64 (scalar_to_vector GPR64:$src)), - (FMOVdx $src)>; - -def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)), - (v8i8 (EXTRACT_SUBREG (v16i8 - (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))), - sub_64))>; - -def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)), - (v4i16 (EXTRACT_SUBREG (v8i16 - (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))), - sub_64))>; - -def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)), - (v2i32 (EXTRACT_SUBREG (v16i8 - (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))), - sub_64))>; - -def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)), - (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))>; - -def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)), - (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))>; - -def : Pat<(v4i32 (scalar_to_vector GPR32:$Rn)), - (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))>; - -def : Pat<(v2i64 (scalar_to_vector GPR64:$Rn)), - (INSdx (v2i64 (IMPLICIT_DEF)), $Rn, (i64 0))>; - -def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))), - (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)>; -def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))), - (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)>; - -def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))), - (v1f64 FPR64:$Rn)>; - -def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), - (f64 FPR64:$src), sub_64)>; - -class NeonI_DUP_Elt - : NeonI_copy, - Sched<[WriteFPALU, ReadFPALU]> { - bits<4> Imm; -} - -def DUPELT16b : NeonI_DUP_Elt<0b1, "dup", ".16b", ".b", VPR128, - neon_uimm4_bare> { - let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; -} - -def DUPELT8h : NeonI_DUP_Elt<0b1, "dup", ".8h", ".h", VPR128, - neon_uimm3_bare> { - let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; -} - -def DUPELT4s : NeonI_DUP_Elt<0b1, "dup", ".4s", ".s", VPR128, - neon_uimm2_bare> { - let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; -} - -def DUPELT2d : NeonI_DUP_Elt<0b1, "dup", ".2d", ".d", VPR128, - neon_uimm1_bare> { - let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; -} - -def DUPELT8b : NeonI_DUP_Elt<0b0, "dup", ".8b", ".b", VPR64, - neon_uimm4_bare> { - let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; -} - -def DUPELT4h : NeonI_DUP_Elt<0b0, "dup", ".4h", ".h", VPR64, - neon_uimm3_bare> { - let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; -} - -def DUPELT2s : NeonI_DUP_Elt<0b0, "dup", ".2s", ".s", VPR64, - neon_uimm2_bare> { - let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; -} - -multiclass NeonI_DUP_Elt_pattern { -def : Pat<(ResTy (Neon_vduplane (OpTy VPR128:$Rn), OpLImm:$Imm)), - (ResTy (DUPELT (OpTy VPR128:$Rn), OpLImm:$Imm))>; - -def : Pat<(ResTy (Neon_vduplane - (NaTy VPR64:$Rn), OpNImm:$Imm)), - (ResTy (DUPELT - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), OpNImm:$Imm))>; -} -defm : NeonI_DUP_Elt_pattern; -defm : NeonI_DUP_Elt_pattern; -defm : NeonI_DUP_Elt_pattern; -defm : NeonI_DUP_Elt_pattern; -defm : NeonI_DUP_Elt_pattern; -defm : NeonI_DUP_Elt_pattern; -defm : NeonI_DUP_Elt_pattern; -defm : NeonI_DUP_Elt_pattern; -defm : NeonI_DUP_Elt_pattern; -defm : NeonI_DUP_Elt_pattern; - -def : Pat<(v2f32 (Neon_vdup (f32 FPR32:$Rn))), - (v2f32 (DUPELT2s - (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - (i64 0)))>; -def : Pat<(v4f32 (Neon_vdup (f32 FPR32:$Rn))), - (v4f32 (DUPELT4s - (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - (i64 0)))>; -def : Pat<(v2f64 (Neon_vdup (f64 FPR64:$Rn))), - (v2f64 (DUPELT2d - (SUBREG_TO_REG (i64 0), FPR64:$Rn, sub_64), - (i64 0)))>; - -multiclass NeonI_DUP_pattern { -def : Pat<(ResTy (Neon_vduplane (OpTy OpRC:$Rn), OpNImm:$Imm)), - (ResTy (DUPELT - (SUBREG_TO_REG (i64 0), OpRC:$Rn, SubIndex), OpNImm:$Imm))>; -} - -defm : NeonI_DUP_pattern; -defm : NeonI_DUP_pattern; -defm : NeonI_DUP_pattern; -defm : NeonI_DUP_pattern; -defm : NeonI_DUP_pattern; - -class NeonI_DUP - : NeonI_copy, - Sched<[WriteFPALU, ReadFPALU]>; - -def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> { - let Inst{20-16} = 0b00001; - // bits 17-20 are unspecified, but should be set to zero. -} - -def DUP8h : NeonI_DUP<0b1, "dup", ".8h", VPR128, v8i16, GPR32, i32> { - let Inst{20-16} = 0b00010; - // bits 18-20 are unspecified, but should be set to zero. -} - -def DUP4s : NeonI_DUP<0b1, "dup", ".4s", VPR128, v4i32, GPR32, i32> { - let Inst{20-16} = 0b00100; - // bits 19-20 are unspecified, but should be set to zero. -} - -def DUP2d : NeonI_DUP<0b1, "dup", ".2d", VPR128, v2i64, GPR64, i64> { - let Inst{20-16} = 0b01000; - // bit 20 is unspecified, but should be set to zero. -} - -def DUP8b : NeonI_DUP<0b0, "dup", ".8b", VPR64, v8i8, GPR32, i32> { - let Inst{20-16} = 0b00001; - // bits 17-20 are unspecified, but should be set to zero. -} - -def DUP4h : NeonI_DUP<0b0, "dup", ".4h", VPR64, v4i16, GPR32, i32> { - let Inst{20-16} = 0b00010; - // bits 18-20 are unspecified, but should be set to zero. -} - -def DUP2s : NeonI_DUP<0b0, "dup", ".2s", VPR64, v2i32, GPR32, i32> { - let Inst{20-16} = 0b00100; - // bits 19-20 are unspecified, but should be set to zero. -} - -// patterns for CONCAT_VECTORS -multiclass Concat_Vector_Pattern { -def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), undef)), - (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)>; -def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))), - (INSELd - (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rm, sub_64)), - (i64 1), - (i64 0))>; -def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rn))), - (DUPELT2d - (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - (i64 0))> ; -} - -defm : Concat_Vector_Pattern; -defm : Concat_Vector_Pattern; -defm : Concat_Vector_Pattern; -defm : Concat_Vector_Pattern; -defm : Concat_Vector_Pattern; -defm : Concat_Vector_Pattern; - -def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), undef)), - (v2i32 (SUBREG_TO_REG(i64 0), $Rn, sub_32))>; -def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), - (EXTRACT_SUBREG - (v4i32 (INSELs - (v4i32 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)), - (v4i32 (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)), - (i64 1), - (i64 0))), - sub_64)>; -def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rn))), - (DUPELT2s (v4i32 (SUBREG_TO_REG(i64 0), $Rn, sub_32)), 0)>; - -//patterns for EXTRACT_SUBVECTOR -def : Pat<(v8i8 (extract_subvector (v16i8 VPR128:$Rn), (i64 0))), - (v8i8 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; -def : Pat<(v4i16 (extract_subvector (v8i16 VPR128:$Rn), (i64 0))), - (v4i16 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; -def : Pat<(v2i32 (extract_subvector (v4i32 VPR128:$Rn), (i64 0))), - (v2i32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; -def : Pat<(v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 0))), - (v1i64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; -def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))), - (v2f32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; -def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))), - (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; - -// The followings are for instruction class (3V Elem) - -// Variant 1 - -class NI_2VE size, bits<4> opcode, - string asmop, string ResS, string OpS, string EleOpS, - Operand OpImm, RegisterOperand ResVPR, - RegisterOperand OpVPR, RegisterOperand EleOpVPR> - : NeonI_2VElem, - Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> { - bits<3> Index; - bits<5> Re; - - let Constraints = "$src = $Rd"; -} - -multiclass NI_2VE_v1 opcode, string asmop> { - // vector register class for element is always 128-bit to cover the max index - def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", - neon_uimm2_bare, VPR64, VPR64, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", - neon_uimm2_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - // Index operations on 16-bit(H) elements are restricted to using v0-v15. - def _4h8h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h", - neon_uimm3_bare, VPR64, VPR64, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } - - def _8h8h : NI_2VE<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h", - neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } -} - -defm MLAvve : NI_2VE_v1<0b1, 0b0000, "mla">; -defm MLSvve : NI_2VE_v1<0b1, 0b0100, "mls">; - -// Pattern for lane in 128-bit vector -class NI_2VE_laneq - : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn), - (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST ResVPR:$src, OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>; - -// Pattern for lane in 64-bit vector -class NI_2VE_lane - : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn), - (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST ResVPR:$src, OpVPR:$Rn, - (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; - -multiclass NI_2VE_v1_pat -{ - def : NI_2VE_laneq(subop # "_2s4s"), neon_uimm2_bare, - op, VPR64, VPR64, VPR128, v2i32, v2i32, v4i32>; - - def : NI_2VE_laneq(subop # "_4s4s"), neon_uimm2_bare, - op, VPR128, VPR128, VPR128, v4i32, v4i32, v4i32>; - - def : NI_2VE_laneq(subop # "_4h8h"), neon_uimm3_bare, - op, VPR64, VPR64, VPR128Lo, v4i16, v4i16, v8i16>; - - def : NI_2VE_laneq(subop # "_8h8h"), neon_uimm3_bare, - op, VPR128, VPR128, VPR128Lo, v8i16, v8i16, v8i16>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VE_lane(subop # "_2s4s"), neon_uimm1_bare, - op, VPR64, VPR64, VPR64, v2i32, v2i32, v2i32>; - - def : NI_2VE_lane(subop # "_4h8h"), neon_uimm2_bare, - op, VPR64, VPR64, VPR64Lo, v4i16, v4i16, v4i16>; -} - -defm MLA_lane_v1 : NI_2VE_v1_pat<"MLAvve", Neon_mla>; -defm MLS_lane_v1 : NI_2VE_v1_pat<"MLSvve", Neon_mls>; - -class NI_2VE_2op size, bits<4> opcode, - string asmop, string ResS, string OpS, string EleOpS, - Operand OpImm, RegisterOperand ResVPR, - RegisterOperand OpVPR, RegisterOperand EleOpVPR> - : NeonI_2VElem, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - bits<3> Index; - bits<5> Re; -} - -multiclass NI_2VE_v1_2op opcode, string asmop> { - // vector register class for element is always 128-bit to cover the max index - def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", - neon_uimm2_bare, VPR64, VPR64, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", - neon_uimm2_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - // Index operations on 16-bit(H) elements are restricted to using v0-v15. - def _4h8h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h", - neon_uimm3_bare, VPR64, VPR64, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } - - def _8h8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h", - neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } -} - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { -defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">; -defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">; -defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">; -} - -// Pattern for lane in 128-bit vector -class NI_2VE_mul_laneq - : Pat<(ResTy (op (OpTy OpVPR:$Rn), - (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>; - -// Pattern for lane in 64-bit vector -class NI_2VE_mul_lane - : Pat<(ResTy (op (OpTy OpVPR:$Rn), - (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST OpVPR:$Rn, - (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; - -multiclass NI_2VE_mul_v1_pat { - def : NI_2VE_mul_laneq(subop # "_2s4s"), neon_uimm2_bare, - op, VPR64, VPR128, v2i32, v2i32, v4i32>; - - def : NI_2VE_mul_laneq(subop # "_4s4s"), neon_uimm2_bare, - op, VPR128, VPR128, v4i32, v4i32, v4i32>; - - def : NI_2VE_mul_laneq(subop # "_4h8h"), neon_uimm3_bare, - op, VPR64, VPR128Lo, v4i16, v4i16, v8i16>; - - def : NI_2VE_mul_laneq(subop # "_8h8h"), neon_uimm3_bare, - op, VPR128, VPR128Lo, v8i16, v8i16, v8i16>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VE_mul_lane(subop # "_2s4s"), neon_uimm1_bare, - op, VPR64, VPR64, v2i32, v2i32, v2i32>; - - def : NI_2VE_mul_lane(subop # "_4h8h"), neon_uimm2_bare, - op, VPR64, VPR64Lo, v4i16, v4i16, v4i16>; -} - -defm MUL_lane_v1 : NI_2VE_mul_v1_pat<"MULve", mul>; -defm SQDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQDMULHve", int_arm_neon_vqdmulh>; -defm SQRDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQRDMULHve", int_arm_neon_vqrdmulh>; - -// Variant 2 - -multiclass NI_2VE_v2_2op opcode, string asmop> { - // vector register class for element is always 128-bit to cover the max index - def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", - neon_uimm2_bare, VPR64, VPR64, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", - neon_uimm2_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - // _1d2d doesn't exist! - - def _2d2d : NI_2VE_2op<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d", - neon_uimm1_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{0}}; - let Inst{21} = 0b0; - let Inst{20-16} = Re; - } -} - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { -defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">; -defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">; -} - -class NI_2VE_mul_lane_2d - : Pat<(ResTy (op (OpTy OpVPR:$Rn), - (OpTy (coreop (EleOpTy EleOpVPR:$Re), (EleOpTy EleOpVPR:$Re))))), - (INST OpVPR:$Rn, - (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), 0)>; - -multiclass NI_2VE_mul_v2_pat { - def : NI_2VE_mul_laneq(subop # "_2s4s"), neon_uimm2_bare, - op, VPR64, VPR128, v2f32, v2f32, v4f32>; - - def : NI_2VE_mul_laneq(subop # "_4s4s"), neon_uimm2_bare, - op, VPR128, VPR128, v4f32, v4f32, v4f32>; - - def : NI_2VE_mul_laneq(subop # "_2d2d"), neon_uimm1_bare, - op, VPR128, VPR128, v2f64, v2f64, v2f64>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VE_mul_lane(subop # "_2s4s"), neon_uimm1_bare, - op, VPR64, VPR64, v2f32, v2f32, v2f32>; - - def : NI_2VE_mul_lane_2d(subop # "_2d2d"), neon_uimm1_bare, - op, VPR128, VPR64, v2f64, v2f64, v1f64, - BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>; -} - -defm FMUL_lane_v2 : NI_2VE_mul_v2_pat<"FMULve", fmul>; -defm FMULX_lane_v2 : NI_2VE_mul_v2_pat<"FMULXve", int_aarch64_neon_vmulx>; - -def : Pat<(v2f32 (fmul (v2f32 (Neon_vdup (f32 FPR32:$Re))), - (v2f32 VPR64:$Rn))), - (FMULve_2s4s VPR64:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>; - -def : Pat<(v4f32 (fmul (v4f32 (Neon_vdup (f32 FPR32:$Re))), - (v4f32 VPR128:$Rn))), - (FMULve_4s4s VPR128:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>; - -def : Pat<(v2f64 (fmul (v2f64 (Neon_vdup (f64 FPR64:$Re))), - (v2f64 VPR128:$Rn))), - (FMULve_2d2d VPR128:$Rn, (SUBREG_TO_REG (i64 0), $Re, sub_64), 0)>; - -// The followings are patterns using fma -// -ffp-contract=fast generates fma - -multiclass NI_2VE_v2 opcode, string asmop> { - // vector register class for element is always 128-bit to cover the max index - def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", - neon_uimm2_bare, VPR64, VPR64, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", - neon_uimm2_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - // _1d2d doesn't exist! - - def _2d2d : NI_2VE<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d", - neon_uimm1_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{0}}; - let Inst{21} = 0b0; - let Inst{20-16} = Re; - } -} - -defm FMLAvve : NI_2VE_v2<0b0, 0b0001, "fmla">; -defm FMLSvve : NI_2VE_v2<0b0, 0b0101, "fmls">; - -// Pattern for lane in 128-bit vector -class NI_2VEswap_laneq - : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))), - (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))), - (INST ResVPR:$src, ResVPR:$Rn, OpVPR:$Re, OpImm:$Index)>; - -// Pattern for lane 0 -class NI_2VEfma_lane0 - : Pat<(ResTy (op (ResTy ResVPR:$Rn), - (ResTy (Neon_vdup (f32 FPR32:$Re))), - (ResTy ResVPR:$src))), - (INST ResVPR:$src, ResVPR:$Rn, - (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>; - -// Pattern for lane in 64-bit vector -class NI_2VEswap_lane - : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))), - (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))), - (INST ResVPR:$src, ResVPR:$Rn, - (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), OpImm:$Index)>; - -// Pattern for lane in 64-bit vector -class NI_2VEswap_lane_2d2d - : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (OpTy OpVPR:$Re))), - (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))), - (INST ResVPR:$src, ResVPR:$Rn, - (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), 0)>; - - -multiclass NI_2VE_fma_v2_pat { - def : NI_2VEswap_laneq(subop # "_2s4s"), - neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32, - BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; - - def : NI_2VEfma_lane0(subop # "_2s4s"), - op, VPR64, v2f32>; - - def : NI_2VEswap_laneq(subop # "_4s4s"), - neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32, - BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; - - def : NI_2VEfma_lane0(subop # "_4s4s"), - op, VPR128, v4f32>; - - def : NI_2VEswap_laneq(subop # "_2d2d"), - neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64, - BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VEswap_lane(subop # "_2s4s"), - neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32, - BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; - - def : NI_2VEswap_lane_2d2d(subop # "_2d2d"), - neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64, - BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>; -} - -defm FMLA_lane_v2_s : NI_2VE_fma_v2_pat<"FMLAvve", fma>; - -// Pattern for lane 0 -class NI_2VEfms_lane0 - : Pat<(ResTy (op (ResTy (fneg ResVPR:$Rn)), - (ResTy (Neon_vdup (f32 FPR32:$Re))), - (ResTy ResVPR:$src))), - (INST ResVPR:$src, ResVPR:$Rn, - (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>; - -multiclass NI_2VE_fms_v2_pat -{ - def : NI_2VEswap_laneq(subop # "_2s4s"), - neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32, - BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>; - - def : NI_2VEswap_laneq(subop # "_2s4s"), - neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32, - BinOpFrag<(Neon_vduplane - (fneg node:$LHS), node:$RHS)>>; - - def : NI_2VEfms_lane0(subop # "_2s4s"), - op, VPR64, v2f32>; - - def : NI_2VEswap_laneq(subop # "_4s4s"), - neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32, - BinOpFrag<(fneg (Neon_vduplane - node:$LHS, node:$RHS))>>; - - def : NI_2VEswap_laneq(subop # "_4s4s"), - neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32, - BinOpFrag<(Neon_vduplane - (fneg node:$LHS), node:$RHS)>>; - - def : NI_2VEfms_lane0(subop # "_4s4s"), - op, VPR128, v4f32>; - - def : NI_2VEswap_laneq(subop # "_2d2d"), - neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64, - BinOpFrag<(fneg (Neon_vduplane - node:$LHS, node:$RHS))>>; - - def : NI_2VEswap_laneq(subop # "_2d2d"), - neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64, - BinOpFrag<(Neon_vduplane - (fneg node:$LHS), node:$RHS)>>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VEswap_lane(subop # "_2s4s"), - neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32, - BinOpFrag<(fneg (Neon_vduplane - node:$LHS, node:$RHS))>>; - - def : NI_2VEswap_lane(subop # "_2s4s"), - neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32, - BinOpFrag<(Neon_vduplane - (fneg node:$LHS), node:$RHS)>>; - - def : NI_2VEswap_lane(subop # "_4s4s"), - neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32, - BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>; - - def : NI_2VEswap_lane(subop # "_4s4s"), - neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32, - BinOpFrag<(Neon_vduplane (fneg node:$LHS), node:$RHS)>>; - - def : NI_2VEswap_lane_2d2d(subop # "_2d2d"), - neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64, - BinOpFrag<(fneg (Neon_combine_2d - node:$LHS, node:$RHS))>>; - - def : NI_2VEswap_lane_2d2d(subop # "_2d2d"), - neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64, - BinOpFrag<(Neon_combine_2d - (fneg node:$LHS), (fneg node:$RHS))>>; -} - -defm FMLS_lane_v2_s : NI_2VE_fms_v2_pat<"FMLSvve", fma>; - -// Variant 3: Long type -// E.g. SMLAL : 4S/4H/H (v0-v15), 2D/2S/S -// SMLAL2: 4S/8H/H (v0-v15), 2D/4S/S - -multiclass NI_2VE_v3 opcode, string asmop> { - // vector register class for element is always 128-bit to cover the max index - def _2d2s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s", - neon_uimm2_bare, VPR128, VPR64, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - def _2d4s : NI_2VE<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s", - neon_uimm2_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - // Index operations on 16-bit(H) elements are restricted to using v0-v15. - def _4s8h : NI_2VE<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h", - neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } - - def _4s4h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h", - neon_uimm3_bare, VPR128, VPR64, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } -} - -defm SMLALvve : NI_2VE_v3<0b0, 0b0010, "smlal">; -defm UMLALvve : NI_2VE_v3<0b1, 0b0010, "umlal">; -defm SMLSLvve : NI_2VE_v3<0b0, 0b0110, "smlsl">; -defm UMLSLvve : NI_2VE_v3<0b1, 0b0110, "umlsl">; -defm SQDMLALvve : NI_2VE_v3<0b0, 0b0011, "sqdmlal">; -defm SQDMLSLvve : NI_2VE_v3<0b0, 0b0111, "sqdmlsl">; - -multiclass NI_2VE_v3_2op opcode, string asmop> { - // vector register class for element is always 128-bit to cover the max index - def _2d2s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s", - neon_uimm2_bare, VPR128, VPR64, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - def _2d4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s", - neon_uimm2_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - // Index operations on 16-bit(H) elements are restricted to using v0-v15. - def _4s8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h", - neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } - - def _4s4h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h", - neon_uimm3_bare, VPR128, VPR64, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } -} - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { -defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">; -defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">; -defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">; -} - -def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))), - (FMOVdd $src)>; - -// Pattern for lane in 128-bit vector -class NI_2VEL2_laneq - : Pat<(ResTy (op (ResTy VPR128:$src), - (HalfOpTy (hiop (OpTy VPR128:$Rn))), - (HalfOpTy (Neon_vduplane - (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST VPR128:$src, VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>; - -// Pattern for lane in 64-bit vector -class NI_2VEL2_lane - : Pat<(ResTy (op (ResTy VPR128:$src), - (HalfOpTy (hiop (OpTy VPR128:$Rn))), - (HalfOpTy (Neon_vduplane - (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST VPR128:$src, VPR128:$Rn, - (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; - -class NI_2VEL2_lane0 - : Pat<(ResTy (op (ResTy VPR128:$src), - (HalfOpTy (hiop (OpTy VPR128:$Rn))), - (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))), - (INST VPR128:$src, VPR128:$Rn, (DupInst $Re), 0)>; - -multiclass NI_2VEL_v3_pat { - def : NI_2VE_laneq(subop # "_4s4h"), neon_uimm3_bare, - op, VPR128, VPR64, VPR128Lo, v4i32, v4i16, v8i16>; - - def : NI_2VE_laneq(subop # "_2d2s"), neon_uimm2_bare, - op, VPR128, VPR64, VPR128, v2i64, v2i32, v4i32>; - - def : NI_2VEL2_laneq(subop # "_4s8h"), neon_uimm3_bare, - op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>; - - def : NI_2VEL2_laneq(subop # "_2d4s"), neon_uimm2_bare, - op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>; - - def : NI_2VEL2_lane0(subop # "_4s8h"), - op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>; - - def : NI_2VEL2_lane0(subop # "_2d4s"), - op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VE_lane(subop # "_4s4h"), neon_uimm2_bare, - op, VPR128, VPR64, VPR64Lo, v4i32, v4i16, v4i16>; - - def : NI_2VE_lane(subop # "_2d2s"), neon_uimm1_bare, - op, VPR128, VPR64, VPR64, v2i64, v2i32, v2i32>; - - def : NI_2VEL2_lane(subop # "_4s8h"), neon_uimm2_bare, - op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>; - - def : NI_2VEL2_lane(subop # "_2d4s"), neon_uimm1_bare, - op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>; -} - -defm SMLAL_lane_v3 : NI_2VEL_v3_pat<"SMLALvve", Neon_smlal>; -defm UMLAL_lane_v3 : NI_2VEL_v3_pat<"UMLALvve", Neon_umlal>; -defm SMLSL_lane_v3 : NI_2VEL_v3_pat<"SMLSLvve", Neon_smlsl>; -defm UMLSL_lane_v3 : NI_2VEL_v3_pat<"UMLSLvve", Neon_umlsl>; - -// Pattern for lane in 128-bit vector -class NI_2VEL2_mul_laneq - : Pat<(ResTy (op - (HalfOpTy (hiop (OpTy VPR128:$Rn))), - (HalfOpTy (Neon_vduplane - (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>; - -// Pattern for lane in 64-bit vector -class NI_2VEL2_mul_lane - : Pat<(ResTy (op - (HalfOpTy (hiop (OpTy VPR128:$Rn))), - (HalfOpTy (Neon_vduplane - (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST VPR128:$Rn, - (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; - -// Pattern for fixed lane 0 -class NI_2VEL2_mul_lane0 - : Pat<(ResTy (op - (HalfOpTy (hiop (OpTy VPR128:$Rn))), - (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))), - (INST VPR128:$Rn, (DupInst $Re), 0)>; - -multiclass NI_2VEL_mul_v3_pat { - def : NI_2VE_mul_laneq(subop # "_4s4h"), neon_uimm3_bare, - op, VPR64, VPR128Lo, v4i32, v4i16, v8i16>; - - def : NI_2VE_mul_laneq(subop # "_2d2s"), neon_uimm2_bare, - op, VPR64, VPR128, v2i64, v2i32, v4i32>; - - def : NI_2VEL2_mul_laneq(subop # "_4s8h"), neon_uimm3_bare, - op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>; - - def : NI_2VEL2_mul_laneq(subop # "_2d4s"), neon_uimm2_bare, - op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>; - - def : NI_2VEL2_mul_lane0(subop # "_4s8h"), - op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>; - - def : NI_2VEL2_mul_lane0(subop # "_2d4s"), - op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VE_mul_lane(subop # "_4s4h"), neon_uimm2_bare, - op, VPR64, VPR64Lo, v4i32, v4i16, v4i16>; - - def : NI_2VE_mul_lane(subop # "_2d2s"), neon_uimm1_bare, - op, VPR64, VPR64, v2i64, v2i32, v2i32>; - - def : NI_2VEL2_mul_lane(subop # "_4s8h"), neon_uimm2_bare, - op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>; - - def : NI_2VEL2_mul_lane(subop # "_2d4s"), neon_uimm1_bare, - op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>; -} - -defm SMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SMULLve", int_arm_neon_vmulls>; -defm UMULL_lane_v3 : NI_2VEL_mul_v3_pat<"UMULLve", int_arm_neon_vmullu>; -defm SQDMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SQDMULLve", int_arm_neon_vqdmull>; - -multiclass NI_qdma { - def _4s : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (op node:$Ra, - (v4i32 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>; - - def _2d : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (op node:$Ra, - (v2i64 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>; -} - -defm Neon_qdmlal : NI_qdma; -defm Neon_qdmlsl : NI_qdma; - -multiclass NI_2VEL_v3_qdma_pat { - def : NI_2VE_laneq(subop # "_4s4h"), neon_uimm3_bare, - !cast(op # "_4s"), VPR128, VPR64, VPR128Lo, - v4i32, v4i16, v8i16>; - - def : NI_2VE_laneq(subop # "_2d2s"), neon_uimm2_bare, - !cast(op # "_2d"), VPR128, VPR64, VPR128, - v2i64, v2i32, v4i32>; - - def : NI_2VEL2_laneq(subop # "_4s8h"), neon_uimm3_bare, - !cast(op # "_4s"), VPR128Lo, - v4i32, v8i16, v8i16, v4i16, Neon_High8H>; - - def : NI_2VEL2_laneq(subop # "_2d4s"), neon_uimm2_bare, - !cast(op # "_2d"), VPR128, - v2i64, v4i32, v4i32, v2i32, Neon_High4S>; - - def : NI_2VEL2_lane0(subop # "_4s8h"), - !cast(op # "_4s"), - v4i32, v8i16, v4i16, Neon_High8H, DUP8h>; - - def : NI_2VEL2_lane0(subop # "_2d4s"), - !cast(op # "_2d"), - v2i64, v4i32, v2i32, Neon_High4S, DUP4s>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VE_lane(subop # "_4s4h"), neon_uimm2_bare, - !cast(op # "_4s"), VPR128, VPR64, VPR64Lo, - v4i32, v4i16, v4i16>; - - def : NI_2VE_lane(subop # "_2d2s"), neon_uimm1_bare, - !cast(op # "_2d"), VPR128, VPR64, VPR64, - v2i64, v2i32, v2i32>; - - def : NI_2VEL2_lane(subop # "_4s8h"), neon_uimm2_bare, - !cast(op # "_4s"), VPR64Lo, - v4i32, v8i16, v4i16, v4i16, Neon_High8H>; - - def : NI_2VEL2_lane(subop # "_2d4s"), neon_uimm1_bare, - !cast(op # "_2d"), VPR64, - v2i64, v4i32, v2i32, v2i32, Neon_High4S>; -} - -defm SQDMLAL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLALvve", "Neon_qdmlal">; -defm SQDMLSL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLSLvve", "Neon_qdmlsl">; - -// End of implementation for instruction class (3V Elem) - -class NeonI_REV size, bit Q, bit U, - bits<5> opcode, RegisterOperand ResVPR, ValueType ResTy, - SDPatternOperator Neon_Rev> - : NeonI_2VMisc, - Sched<[WriteFPALU, ReadFPALU]>; - -def REV64_16b : NeonI_REV<"rev64", "16b", 0b00, 0b1, 0b0, 0b00000, VPR128, - v16i8, Neon_rev64>; -def REV64_8h : NeonI_REV<"rev64", "8h", 0b01, 0b1, 0b0, 0b00000, VPR128, - v8i16, Neon_rev64>; -def REV64_4s : NeonI_REV<"rev64", "4s", 0b10, 0b1, 0b0, 0b00000, VPR128, - v4i32, Neon_rev64>; -def REV64_8b : NeonI_REV<"rev64", "8b", 0b00, 0b0, 0b0, 0b00000, VPR64, - v8i8, Neon_rev64>; -def REV64_4h : NeonI_REV<"rev64", "4h", 0b01, 0b0, 0b0, 0b00000, VPR64, - v4i16, Neon_rev64>; -def REV64_2s : NeonI_REV<"rev64", "2s", 0b10, 0b0, 0b0, 0b00000, VPR64, - v2i32, Neon_rev64>; - -def : Pat<(v4f32 (Neon_rev64 (v4f32 VPR128:$Rn))), (REV64_4s VPR128:$Rn)>; -def : Pat<(v2f32 (Neon_rev64 (v2f32 VPR64:$Rn))), (REV64_2s VPR64:$Rn)>; - -def REV32_16b : NeonI_REV<"rev32", "16b", 0b00, 0b1, 0b1, 0b00000, VPR128, - v16i8, Neon_rev32>; -def REV32_8h : NeonI_REV<"rev32", "8h", 0b01, 0b1, 0b1, 0b00000, VPR128, - v8i16, Neon_rev32>; -def REV32_8b : NeonI_REV<"rev32", "8b", 0b00, 0b0, 0b1, 0b00000, VPR64, - v8i8, Neon_rev32>; -def REV32_4h : NeonI_REV<"rev32", "4h", 0b01, 0b0, 0b1, 0b00000, VPR64, - v4i16, Neon_rev32>; - -def REV16_16b : NeonI_REV<"rev16", "16b", 0b00, 0b1, 0b0, 0b00001, VPR128, - v16i8, Neon_rev16>; -def REV16_8b : NeonI_REV<"rev16", "8b", 0b00, 0b0, 0b0, 0b00001, VPR64, - v8i8, Neon_rev16>; - -multiclass NeonI_PairwiseAdd opcode, - SDPatternOperator Neon_Padd> { - def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.8h, $Rn.16b", - [(set (v8i16 VPR128:$Rd), - (v8i16 (Neon_Padd (v16i8 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.4h, $Rn.8b", - [(set (v4i16 VPR64:$Rd), - (v4i16 (Neon_Padd (v8i8 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.4s, $Rn.8h", - [(set (v4i32 VPR128:$Rd), - (v4i32 (Neon_Padd (v8i16 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.2s, $Rn.4h", - [(set (v2i32 VPR64:$Rd), - (v2i32 (Neon_Padd (v4i16 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.2d, $Rn.4s", - [(set (v2i64 VPR128:$Rd), - (v2i64 (Neon_Padd (v4i32 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.1d, $Rn.2s", - [(set (v1i64 VPR64:$Rd), - (v1i64 (Neon_Padd (v2i32 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm SADDLP : NeonI_PairwiseAdd<"saddlp", 0b0, 0b00010, - int_arm_neon_vpaddls>; -defm UADDLP : NeonI_PairwiseAdd<"uaddlp", 0b1, 0b00010, - int_arm_neon_vpaddlu>; - -def : Pat<(v1i64 (int_aarch64_neon_saddlv (v2i32 VPR64:$Rn))), - (SADDLP2s1d $Rn)>; -def : Pat<(v1i64 (int_aarch64_neon_uaddlv (v2i32 VPR64:$Rn))), - (UADDLP2s1d $Rn)>; - -multiclass NeonI_PairwiseAddAcc opcode, - SDPatternOperator Neon_Padd> { - let Constraints = "$src = $Rd" in { - def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "\t$Rd.8h, $Rn.16b", - [(set (v8i16 VPR128:$Rd), - (v8i16 (Neon_Padd - (v8i16 VPR128:$src), (v16i8 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode, - (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), - asmop # "\t$Rd.4h, $Rn.8b", - [(set (v4i16 VPR64:$Rd), - (v4i16 (Neon_Padd - (v4i16 VPR64:$src), (v8i8 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "\t$Rd.4s, $Rn.8h", - [(set (v4i32 VPR128:$Rd), - (v4i32 (Neon_Padd - (v4i32 VPR128:$src), (v8i16 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), - asmop # "\t$Rd.2s, $Rn.4h", - [(set (v2i32 VPR64:$Rd), - (v2i32 (Neon_Padd - (v2i32 VPR64:$src), (v4i16 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "\t$Rd.2d, $Rn.4s", - [(set (v2i64 VPR128:$Rd), - (v2i64 (Neon_Padd - (v2i64 VPR128:$src), (v4i32 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode, - (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), - asmop # "\t$Rd.1d, $Rn.2s", - [(set (v1i64 VPR64:$Rd), - (v1i64 (Neon_Padd - (v1i64 VPR64:$src), (v2i32 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } -} - -defm SADALP : NeonI_PairwiseAddAcc<"sadalp", 0b0, 0b00110, - int_arm_neon_vpadals>; -defm UADALP : NeonI_PairwiseAddAcc<"uadalp", 0b1, 0b00110, - int_arm_neon_vpadalu>; - -multiclass NeonI_2VMisc_BHSDsize_1Arg opcode> { - def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.16b, $Rn.16b", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.8h, $Rn.8h", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.4s, $Rn.4s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.2d, $Rn.2d", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.8b, $Rn.8b", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.4h, $Rn.4h", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.2s, $Rn.2s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm SQABS : NeonI_2VMisc_BHSDsize_1Arg<"sqabs", 0b0, 0b00111>; -defm SQNEG : NeonI_2VMisc_BHSDsize_1Arg<"sqneg", 0b1, 0b00111>; -defm ABS : NeonI_2VMisc_BHSDsize_1Arg<"abs", 0b0, 0b01011>; -defm NEG : NeonI_2VMisc_BHSDsize_1Arg<"neg", 0b1, 0b01011>; - -multiclass NeonI_2VMisc_BHSD_1Arg_Pattern { - def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$Rn))), - (v16i8 (!cast(Prefix # 16b) (v16i8 VPR128:$Rn)))>; - - def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$Rn))), - (v8i16 (!cast(Prefix # 8h) (v8i16 VPR128:$Rn)))>; - - def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$Rn))), - (v4i32 (!cast(Prefix # 4s) (v4i32 VPR128:$Rn)))>; - - def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$Rn))), - (v2i64 (!cast(Prefix # 2d) (v2i64 VPR128:$Rn)))>; - - def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$Rn))), - (v8i8 (!cast(Prefix # 8b) (v8i8 VPR64:$Rn)))>; - - def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$Rn))), - (v4i16 (!cast(Prefix # 4h) (v4i16 VPR64:$Rn)))>; - - def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$Rn))), - (v2i32 (!cast(Prefix # 2s) (v2i32 VPR64:$Rn)))>; -} - -defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQABS", int_arm_neon_vqabs>; -defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQNEG", int_arm_neon_vqneg>; -defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"ABS", int_arm_neon_vabs>; - -def : Pat<(v16i8 (sub - (v16i8 Neon_AllZero), - (v16i8 VPR128:$Rn))), - (v16i8 (NEG16b (v16i8 VPR128:$Rn)))>; -def : Pat<(v8i8 (sub - (v8i8 Neon_AllZero), - (v8i8 VPR64:$Rn))), - (v8i8 (NEG8b (v8i8 VPR64:$Rn)))>; -def : Pat<(v8i16 (sub - (v8i16 (bitconvert (v16i8 Neon_AllZero))), - (v8i16 VPR128:$Rn))), - (v8i16 (NEG8h (v8i16 VPR128:$Rn)))>; -def : Pat<(v4i16 (sub - (v4i16 (bitconvert (v8i8 Neon_AllZero))), - (v4i16 VPR64:$Rn))), - (v4i16 (NEG4h (v4i16 VPR64:$Rn)))>; -def : Pat<(v4i32 (sub - (v4i32 (bitconvert (v16i8 Neon_AllZero))), - (v4i32 VPR128:$Rn))), - (v4i32 (NEG4s (v4i32 VPR128:$Rn)))>; -def : Pat<(v2i32 (sub - (v2i32 (bitconvert (v8i8 Neon_AllZero))), - (v2i32 VPR64:$Rn))), - (v2i32 (NEG2s (v2i32 VPR64:$Rn)))>; -def : Pat<(v2i64 (sub - (v2i64 (bitconvert (v16i8 Neon_AllZero))), - (v2i64 VPR128:$Rn))), - (v2i64 (NEG2d (v2i64 VPR128:$Rn)))>; - -multiclass NeonI_2VMisc_BHSDsize_2Args opcode> { - let Constraints = "$src = $Rd" in { - def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "\t$Rd.16b, $Rn.16b", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "\t$Rd.8h, $Rn.8h", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "\t$Rd.4s, $Rn.4s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "\t$Rd.2d, $Rn.2d", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode, - (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), - asmop # "\t$Rd.8b, $Rn.8b", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), - asmop # "\t$Rd.4h, $Rn.4h", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, - (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), - asmop # "\t$Rd.2s, $Rn.2s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } -} - -defm SUQADD : NeonI_2VMisc_BHSDsize_2Args<"suqadd", 0b0, 0b00011>; -defm USQADD : NeonI_2VMisc_BHSDsize_2Args<"usqadd", 0b1, 0b00011>; - -multiclass NeonI_2VMisc_BHSD_2Args_Pattern { - def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$src), (v16i8 VPR128:$Rn))), - (v16i8 (!cast(Prefix # 16b) - (v16i8 VPR128:$src), (v16i8 VPR128:$Rn)))>; - - def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$src), (v8i16 VPR128:$Rn))), - (v8i16 (!cast(Prefix # 8h) - (v8i16 VPR128:$src), (v8i16 VPR128:$Rn)))>; - - def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$src), (v4i32 VPR128:$Rn))), - (v4i32 (!cast(Prefix # 4s) - (v4i32 VPR128:$src), (v4i32 VPR128:$Rn)))>; - - def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$src), (v2i64 VPR128:$Rn))), - (v2i64 (!cast(Prefix # 2d) - (v2i64 VPR128:$src), (v2i64 VPR128:$Rn)))>; - - def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$src), (v8i8 VPR64:$Rn))), - (v8i8 (!cast(Prefix # 8b) - (v8i8 VPR64:$src), (v8i8 VPR64:$Rn)))>; - - def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$src), (v4i16 VPR64:$Rn))), - (v4i16 (!cast(Prefix # 4h) - (v4i16 VPR64:$src), (v4i16 VPR64:$Rn)))>; - - def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$src), (v2i32 VPR64:$Rn))), - (v2i32 (!cast(Prefix # 2s) - (v2i32 VPR64:$src), (v2i32 VPR64:$Rn)))>; -} - -defm : NeonI_2VMisc_BHSD_2Args_Pattern<"SUQADD", int_aarch64_neon_suqadd>; -defm : NeonI_2VMisc_BHSD_2Args_Pattern<"USQADD", int_aarch64_neon_usqadd>; - -multiclass NeonI_2VMisc_BHSsizes { - def 16b : NeonI_2VMisc<0b1, U, 0b00, 0b00100, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.16b, $Rn.16b", - [(set (v16i8 VPR128:$Rd), - (v16i8 (Neon_Op (v16i8 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8h : NeonI_2VMisc<0b1, U, 0b01, 0b00100, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.8h, $Rn.8h", - [(set (v8i16 VPR128:$Rd), - (v8i16 (Neon_Op (v8i16 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4s : NeonI_2VMisc<0b1, U, 0b10, 0b00100, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.4s, $Rn.4s", - [(set (v4i32 VPR128:$Rd), - (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8b : NeonI_2VMisc<0b0, U, 0b00, 0b00100, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.8b, $Rn.8b", - [(set (v8i8 VPR64:$Rd), - (v8i8 (Neon_Op (v8i8 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4h : NeonI_2VMisc<0b0, U, 0b01, 0b00100, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.4h, $Rn.4h", - [(set (v4i16 VPR64:$Rd), - (v4i16 (Neon_Op (v4i16 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2s : NeonI_2VMisc<0b0, U, 0b10, 0b00100, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.2s, $Rn.2s", - [(set (v2i32 VPR64:$Rd), - (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm CLS : NeonI_2VMisc_BHSsizes<"cls", 0b0, int_arm_neon_vcls>; -defm CLZ : NeonI_2VMisc_BHSsizes<"clz", 0b1, ctlz>; - -multiclass NeonI_2VMisc_Bsize size, - bits<5> Opcode> { - def 16b : NeonI_2VMisc<0b1, U, size, Opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.16b, $Rn.16b", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8b : NeonI_2VMisc<0b0, U, size, Opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.8b, $Rn.8b", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm CNT : NeonI_2VMisc_Bsize<"cnt", 0b0, 0b00, 0b00101>; -defm NOT : NeonI_2VMisc_Bsize<"not", 0b1, 0b00, 0b00101>; -defm RBIT : NeonI_2VMisc_Bsize<"rbit", 0b1, 0b01, 0b00101>; - -def : NeonInstAlias<"mvn $Rd.16b, $Rn.16b", - (NOT16b VPR128:$Rd, VPR128:$Rn), 0>; -def : NeonInstAlias<"mvn $Rd.8b, $Rn.8b", - (NOT8b VPR64:$Rd, VPR64:$Rn), 0>; - -def : Pat<(v16i8 (ctpop (v16i8 VPR128:$Rn))), - (v16i8 (CNT16b (v16i8 VPR128:$Rn)))>; -def : Pat<(v8i8 (ctpop (v8i8 VPR64:$Rn))), - (v8i8 (CNT8b (v8i8 VPR64:$Rn)))>; - -def : Pat<(v16i8 (xor - (v16i8 VPR128:$Rn), - (v16i8 Neon_AllOne))), - (v16i8 (NOT16b (v16i8 VPR128:$Rn)))>; -def : Pat<(v8i8 (xor - (v8i8 VPR64:$Rn), - (v8i8 Neon_AllOne))), - (v8i8 (NOT8b (v8i8 VPR64:$Rn)))>; -def : Pat<(v8i16 (xor - (v8i16 VPR128:$Rn), - (v8i16 (bitconvert (v16i8 Neon_AllOne))))), - (NOT16b VPR128:$Rn)>; -def : Pat<(v4i16 (xor - (v4i16 VPR64:$Rn), - (v4i16 (bitconvert (v8i8 Neon_AllOne))))), - (NOT8b VPR64:$Rn)>; -def : Pat<(v4i32 (xor - (v4i32 VPR128:$Rn), - (v4i32 (bitconvert (v16i8 Neon_AllOne))))), - (NOT16b VPR128:$Rn)>; -def : Pat<(v2i32 (xor - (v2i32 VPR64:$Rn), - (v2i32 (bitconvert (v8i8 Neon_AllOne))))), - (NOT8b VPR64:$Rn)>; -def : Pat<(v2i64 (xor - (v2i64 VPR128:$Rn), - (v2i64 (bitconvert (v16i8 Neon_AllOne))))), - (NOT16b VPR128:$Rn)>; - -def : Pat<(v16i8 (int_aarch64_neon_rbit (v16i8 VPR128:$Rn))), - (v16i8 (RBIT16b (v16i8 VPR128:$Rn)))>; -def : Pat<(v8i8 (int_aarch64_neon_rbit (v8i8 VPR64:$Rn))), - (v8i8 (RBIT8b (v8i8 VPR64:$Rn)))>; - -multiclass NeonI_2VMisc_SDsizes opcode, - SDPatternOperator Neon_Op> { - def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.4s, $Rn.4s", - [(set (v4f32 VPR128:$Rd), - (v4f32 (Neon_Op (v4f32 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.2d, $Rn.2d", - [(set (v2f64 VPR128:$Rd), - (v2f64 (Neon_Op (v2f64 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.2s, $Rn.2s", - [(set (v2f32 VPR64:$Rd), - (v2f32 (Neon_Op (v2f32 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm FABS : NeonI_2VMisc_SDsizes<"fabs", 0b0, 0b01111, fabs>; -defm FNEG : NeonI_2VMisc_SDsizes<"fneg", 0b1, 0b01111, fneg>; - -multiclass NeonI_2VMisc_HSD_Narrow opcode> { - def 8h8b : NeonI_2VMisc<0b0, U, 0b00, opcode, - (outs VPR64:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.8b, $Rn.8h", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4s4h : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.4h, $Rn.4s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2d2s : NeonI_2VMisc<0b0, U, 0b10, opcode, - (outs VPR64:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.2s, $Rn.2d", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - let Constraints = "$Rd = $src" in { - def 8h16b : NeonI_2VMisc<0b1, U, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "2\t$Rd.16b, $Rn.8h", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 4s8h : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "2\t$Rd.8h, $Rn.4s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 2d4s : NeonI_2VMisc<0b1, U, 0b10, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "2\t$Rd.4s, $Rn.2d", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } -} - -defm XTN : NeonI_2VMisc_HSD_Narrow<"xtn", 0b0, 0b10010>; -defm SQXTUN : NeonI_2VMisc_HSD_Narrow<"sqxtun", 0b1, 0b10010>; -defm SQXTN : NeonI_2VMisc_HSD_Narrow<"sqxtn", 0b0, 0b10100>; -defm UQXTN : NeonI_2VMisc_HSD_Narrow<"uqxtn", 0b1, 0b10100>; - -multiclass NeonI_2VMisc_Narrow_Patterns { - def : Pat<(v8i8 (Neon_Op (v8i16 VPR128:$Rn))), - (v8i8 (!cast(Prefix # 8h8b) (v8i16 VPR128:$Rn)))>; - - def : Pat<(v4i16 (Neon_Op (v4i32 VPR128:$Rn))), - (v4i16 (!cast(Prefix # 4s4h) (v4i32 VPR128:$Rn)))>; - - def : Pat<(v2i32 (Neon_Op (v2i64 VPR128:$Rn))), - (v2i32 (!cast(Prefix # 2d2s) (v2i64 VPR128:$Rn)))>; - - def : Pat<(v16i8 (concat_vectors - (v8i8 VPR64:$src), - (v8i8 (Neon_Op (v8i16 VPR128:$Rn))))), - (!cast(Prefix # 8h16b) - (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64), - VPR128:$Rn)>; - - def : Pat<(v8i16 (concat_vectors - (v4i16 VPR64:$src), - (v4i16 (Neon_Op (v4i32 VPR128:$Rn))))), - (!cast(Prefix # 4s8h) - (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64), - VPR128:$Rn)>; - - def : Pat<(v4i32 (concat_vectors - (v2i32 VPR64:$src), - (v2i32 (Neon_Op (v2i64 VPR128:$Rn))))), - (!cast(Prefix # 2d4s) - (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64), - VPR128:$Rn)>; -} - -defm : NeonI_2VMisc_Narrow_Patterns<"XTN", trunc>; -defm : NeonI_2VMisc_Narrow_Patterns<"SQXTUN", int_arm_neon_vqmovnsu>; -defm : NeonI_2VMisc_Narrow_Patterns<"SQXTN", int_arm_neon_vqmovns>; -defm : NeonI_2VMisc_Narrow_Patterns<"UQXTN", int_arm_neon_vqmovnu>; - -multiclass NeonI_2VMisc_SHIFT opcode> { - let DecoderMethod = "DecodeSHLLInstruction" in { - def 8b8h : NeonI_2VMisc<0b0, U, 0b00, opcode, - (outs VPR128:$Rd), - (ins VPR64:$Rn, uimm_exact8:$Imm), - asmop # "\t$Rd.8h, $Rn.8b, $Imm", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4h4s : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR128:$Rd), - (ins VPR64:$Rn, uimm_exact16:$Imm), - asmop # "\t$Rd.4s, $Rn.4h, $Imm", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2s2d : NeonI_2VMisc<0b0, U, 0b10, opcode, - (outs VPR128:$Rd), - (ins VPR64:$Rn, uimm_exact32:$Imm), - asmop # "\t$Rd.2d, $Rn.2s, $Imm", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode, - (outs VPR128:$Rd), - (ins VPR128:$Rn, uimm_exact8:$Imm), - asmop # "2\t$Rd.8h, $Rn.16b, $Imm", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), - (ins VPR128:$Rn, uimm_exact16:$Imm), - asmop # "2\t$Rd.4s, $Rn.8h, $Imm", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, - (outs VPR128:$Rd), - (ins VPR128:$Rn, uimm_exact32:$Imm), - asmop # "2\t$Rd.2d, $Rn.4s, $Imm", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - } -} - -defm SHLL : NeonI_2VMisc_SHIFT<"shll", 0b1, 0b10011>; - -class NeonI_SHLL_Patterns - : Pat<(DesTy (shl - (DesTy (ExtOp (OpTy VPR64:$Rn))), - (DesTy (Neon_vdup - (i32 Neon_Imm:$Imm))))), - (!cast("SHLL" # suffix) VPR64:$Rn, Neon_Imm:$Imm)>; - -class NeonI_SHLL_High_Patterns - : Pat<(DesTy (shl - (DesTy (ExtOp - (OpTy (GetHigh VPR128:$Rn)))), - (DesTy (Neon_vdup - (i32 Neon_Imm:$Imm))))), - (!cast("SHLL" # suffix) VPR128:$Rn, Neon_Imm:$Imm)>; - -def : NeonI_SHLL_Patterns; -def : NeonI_SHLL_Patterns; -def : NeonI_SHLL_Patterns; -def : NeonI_SHLL_Patterns; -def : NeonI_SHLL_Patterns; -def : NeonI_SHLL_Patterns; -def : NeonI_SHLL_High_Patterns; -def : NeonI_SHLL_High_Patterns; -def : NeonI_SHLL_High_Patterns; -def : NeonI_SHLL_High_Patterns; -def : NeonI_SHLL_High_Patterns; -def : NeonI_SHLL_High_Patterns; - -multiclass NeonI_2VMisc_SD_Narrow opcode> { - def 4s4h : NeonI_2VMisc<0b0, U, 0b00, opcode, - (outs VPR64:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.4h, $Rn.4s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.2s, $Rn.2d", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - let Constraints = "$src = $Rd" in { - def 4s8h : NeonI_2VMisc<0b1, U, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "2\t$Rd.8h, $Rn.4s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "2\t$Rd.4s, $Rn.2d", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } -} - -defm FCVTN : NeonI_2VMisc_SD_Narrow<"fcvtn", 0b0, 0b10110>; - -multiclass NeonI_2VMisc_Narrow_Pattern { - - def : Pat<(v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))), - (!cast(prefix # "4s4h") (v4f32 VPR128:$Rn))>; - - def : Pat<(v8i16 (concat_vectors - (v4i16 VPR64:$src), - (v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))))), - (!cast(prefix # "4s8h") - (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)), - (v4f32 VPR128:$Rn))>; - - def : Pat<(v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))), - (!cast(prefix # "2d2s") (v2f64 VPR128:$Rn))>; - - def : Pat<(v4f32 (concat_vectors - (v2f32 VPR64:$src), - (v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))))), - (!cast(prefix # "2d4s") - (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)), - (v2f64 VPR128:$Rn))>; -} - -defm : NeonI_2VMisc_Narrow_Pattern<"FCVTN", int_arm_neon_vcvtfp2hf, fround>; - -multiclass NeonI_2VMisc_D_Narrow opcode> { - def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.2s, $Rn.2d", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "2\t$Rd.4s, $Rn.2d", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; - } - - def : Pat<(v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))), - (!cast(prefix # "2d2s") VPR128:$Rn)>; - - def : Pat<(v4f32 (concat_vectors - (v2f32 VPR64:$src), - (v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))))), - (!cast(prefix # "2d4s") - (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)), - VPR128:$Rn)>; -} - -defm FCVTXN : NeonI_2VMisc_D_Narrow<"fcvtxn","FCVTXN", 0b1, 0b10110>; - -def Neon_High4Float : PatFrag<(ops node:$in), - (extract_subvector (v4f32 node:$in), (iPTR 2))>; - -multiclass NeonI_2VMisc_HS_Extend opcode> { - def 4h4s : NeonI_2VMisc<0b0, U, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.4s, $Rn.4h", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2s2d : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.2d, $Rn.2s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8h4s : NeonI_2VMisc<0b1, U, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "2\t$Rd.4s, $Rn.8h", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4s2d : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "2\t$Rd.2d, $Rn.4s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm FCVTL : NeonI_2VMisc_HS_Extend<"fcvtl", 0b0, 0b10111>; - -multiclass NeonI_2VMisc_Extend_Pattern { - def : Pat<(v4f32 (int_arm_neon_vcvthf2fp (v4i16 VPR64:$Rn))), - (!cast(prefix # "4h4s") VPR64:$Rn)>; - - def : Pat<(v4f32 (int_arm_neon_vcvthf2fp - (v4i16 (Neon_High8H - (v8i16 VPR128:$Rn))))), - (!cast(prefix # "8h4s") VPR128:$Rn)>; - - def : Pat<(v2f64 (fextend (v2f32 VPR64:$Rn))), - (!cast(prefix # "2s2d") VPR64:$Rn)>; - - def : Pat<(v2f64 (fextend - (v2f32 (Neon_High4Float - (v4f32 VPR128:$Rn))))), - (!cast(prefix # "4s2d") VPR128:$Rn)>; -} - -defm : NeonI_2VMisc_Extend_Pattern<"FCVTL">; - -multiclass NeonI_2VMisc_SD_Conv opcode, - ValueType ResTy4s, ValueType OpTy4s, - ValueType ResTy2d, ValueType OpTy2d, - ValueType ResTy2s, ValueType OpTy2s, - SDPatternOperator Neon_Op> { - - def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.4s, $Rn.4s", - [(set (ResTy4s VPR128:$Rd), - (ResTy4s (Neon_Op (OpTy4s VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2d : NeonI_2VMisc<0b1, U, {Size, 0b1}, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.2d, $Rn.2d", - [(set (ResTy2d VPR128:$Rd), - (ResTy2d (Neon_Op (OpTy2d VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.2s, $Rn.2s", - [(set (ResTy2s VPR64:$Rd), - (ResTy2s (Neon_Op (OpTy2s VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -multiclass NeonI_2VMisc_fp_to_int opcode, SDPatternOperator Neon_Op> { - defm _ : NeonI_2VMisc_SD_Conv; -} - -defm FCVTNS : NeonI_2VMisc_fp_to_int<"fcvtns", 0b0, 0b0, 0b11010, - int_arm_neon_vcvtns>; -defm FCVTNU : NeonI_2VMisc_fp_to_int<"fcvtnu", 0b0, 0b1, 0b11010, - int_arm_neon_vcvtnu>; -defm FCVTPS : NeonI_2VMisc_fp_to_int<"fcvtps", 0b1, 0b0, 0b11010, - int_arm_neon_vcvtps>; -defm FCVTPU : NeonI_2VMisc_fp_to_int<"fcvtpu", 0b1, 0b1, 0b11010, - int_arm_neon_vcvtpu>; -defm FCVTMS : NeonI_2VMisc_fp_to_int<"fcvtms", 0b0, 0b0, 0b11011, - int_arm_neon_vcvtms>; -defm FCVTMU : NeonI_2VMisc_fp_to_int<"fcvtmu", 0b0, 0b1, 0b11011, - int_arm_neon_vcvtmu>; -defm FCVTZS : NeonI_2VMisc_fp_to_int<"fcvtzs", 0b1, 0b0, 0b11011, fp_to_sint>; -defm FCVTZU : NeonI_2VMisc_fp_to_int<"fcvtzu", 0b1, 0b1, 0b11011, fp_to_uint>; -defm FCVTAS : NeonI_2VMisc_fp_to_int<"fcvtas", 0b0, 0b0, 0b11100, - int_arm_neon_vcvtas>; -defm FCVTAU : NeonI_2VMisc_fp_to_int<"fcvtau", 0b0, 0b1, 0b11100, - int_arm_neon_vcvtau>; - -multiclass NeonI_2VMisc_int_to_fp opcode, SDPatternOperator Neon_Op> { - defm _ : NeonI_2VMisc_SD_Conv; -} - -defm SCVTF : NeonI_2VMisc_int_to_fp<"scvtf", 0b0, 0b0, 0b11101, sint_to_fp>; -defm UCVTF : NeonI_2VMisc_int_to_fp<"ucvtf", 0b0, 0b1, 0b11101, uint_to_fp>; - -multiclass NeonI_2VMisc_fp_to_fp opcode, SDPatternOperator Neon_Op> { - defm _ : NeonI_2VMisc_SD_Conv; -} - -defm FRINTN : NeonI_2VMisc_fp_to_fp<"frintn", 0b0, 0b0, 0b11000, - int_aarch64_neon_frintn>; -defm FRINTA : NeonI_2VMisc_fp_to_fp<"frinta", 0b0, 0b1, 0b11000, frnd>; -defm FRINTP : NeonI_2VMisc_fp_to_fp<"frintp", 0b1, 0b0, 0b11000, fceil>; -defm FRINTM : NeonI_2VMisc_fp_to_fp<"frintm", 0b0, 0b0, 0b11001, ffloor>; -defm FRINTX : NeonI_2VMisc_fp_to_fp<"frintx", 0b0, 0b1, 0b11001, frint>; -defm FRINTZ : NeonI_2VMisc_fp_to_fp<"frintz", 0b1, 0b0, 0b11001, ftrunc>; -defm FRINTI : NeonI_2VMisc_fp_to_fp<"frinti", 0b1, 0b1, 0b11001, fnearbyint>; -defm FRECPE : NeonI_2VMisc_fp_to_fp<"frecpe", 0b1, 0b0, 0b11101, - int_arm_neon_vrecpe>; -defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101, - int_arm_neon_vrsqrte>; -let SchedRW = [WriteFPSqrt, ReadFPSqrt] in { -defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111, fsqrt>; -} - -multiclass NeonI_2VMisc_S_Conv opcode, SDPatternOperator Neon_Op> { - def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.4s, $Rn.4s", - [(set (v4i32 VPR128:$Rd), - (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.2s, $Rn.2s", - [(set (v2i32 VPR64:$Rd), - (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm URECPE : NeonI_2VMisc_S_Conv<"urecpe", 0b1, 0b0, 0b11100, - int_arm_neon_vrecpe>; -defm URSQRTE : NeonI_2VMisc_S_Conv<"ursqrte", 0b1, 0b1, 0b11100, - int_arm_neon_vrsqrte>; - -// Crypto Class -class NeonI_Cryptoaes_2v size, bits<5> opcode, - string asmop, SDPatternOperator opnode> - : NeonI_Crypto_AES, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; - let Predicates = [HasNEON, HasCrypto]; -} - -def AESE : NeonI_Cryptoaes_2v<0b00, 0b00100, "aese", int_arm_neon_aese>; -def AESD : NeonI_Cryptoaes_2v<0b00, 0b00101, "aesd", int_arm_neon_aesd>; - -class NeonI_Cryptoaes size, bits<5> opcode, - string asmop, SDPatternOperator opnode> - : NeonI_Crypto_AES, - Sched<[WriteFPALU, ReadFPALU]>; - -def AESMC : NeonI_Cryptoaes<0b00, 0b00110, "aesmc", int_arm_neon_aesmc>; -def AESIMC : NeonI_Cryptoaes<0b00, 0b00111, "aesimc", int_arm_neon_aesimc>; - -class NeonI_Cryptosha_vv size, bits<5> opcode, - string asmop, SDPatternOperator opnode> - : NeonI_Crypto_SHA, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; - let Predicates = [HasNEON, HasCrypto]; -} - -def SHA1SU1 : NeonI_Cryptosha_vv<0b00, 0b00001, "sha1su1", - int_arm_neon_sha1su1>; -def SHA256SU0 : NeonI_Cryptosha_vv<0b00, 0b00010, "sha256su0", - int_arm_neon_sha256su0>; - -class NeonI_Cryptosha_ss size, bits<5> opcode, - string asmop, SDPatternOperator opnode> - : NeonI_Crypto_SHA, - Sched<[WriteFPALU, ReadFPALU]> { - let Predicates = [HasNEON, HasCrypto]; - let hasSideEffects = 0; -} - -def SHA1H : NeonI_Cryptosha_ss<0b00, 0b00000, "sha1h", int_arm_neon_sha1h>; -def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)), - (COPY_TO_REGCLASS (SHA1H (COPY_TO_REGCLASS i32:$Rn, FPR32)), GPR32)>; - - -class NeonI_Cryptosha3_vvv size, bits<3> opcode, string asmop, - SDPatternOperator opnode> - : NeonI_Crypto_3VSHA, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; - let Predicates = [HasNEON, HasCrypto]; -} - -def SHA1SU0 : NeonI_Cryptosha3_vvv<0b00, 0b011, "sha1su0", - int_arm_neon_sha1su0>; -def SHA256SU1 : NeonI_Cryptosha3_vvv<0b00, 0b110, "sha256su1", - int_arm_neon_sha256su1>; - -class NeonI_Cryptosha3_qqv size, bits<3> opcode, string asmop, - SDPatternOperator opnode> - : NeonI_Crypto_3VSHA, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; - let Predicates = [HasNEON, HasCrypto]; -} - -def SHA256H : NeonI_Cryptosha3_qqv<0b00, 0b100, "sha256h", - int_arm_neon_sha256h>; -def SHA256H2 : NeonI_Cryptosha3_qqv<0b00, 0b101, "sha256h2", - int_arm_neon_sha256h2>; - -class NeonI_Cryptosha3_qsv size, bits<3> opcode, string asmop> - : NeonI_Crypto_3VSHA, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; - let hasSideEffects = 0; - let Predicates = [HasNEON, HasCrypto]; -} - -def SHA1C : NeonI_Cryptosha3_qsv<0b00, 0b000, "sha1c">; -def SHA1P : NeonI_Cryptosha3_qsv<0b00, 0b001, "sha1p">; -def SHA1M : NeonI_Cryptosha3_qsv<0b00, 0b010, "sha1m">; - -def : Pat<(int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk), - (SHA1C v4i32:$hash_abcd, - (COPY_TO_REGCLASS i32:$hash_e, FPR32), v4i32:$wk)>; -def : Pat<(int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk), - (SHA1M v4i32:$hash_abcd, - (COPY_TO_REGCLASS i32:$hash_e, FPR32), v4i32:$wk)>; -def : Pat<(int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk), - (SHA1P v4i32:$hash_abcd, - (COPY_TO_REGCLASS i32:$hash_e, FPR32), v4i32:$wk)>; - -// Additional patterns to match shl to USHL. -def : Pat<(v8i8 (shl (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))), - (USHLvvv_8B $Rn, $Rm)>; -def : Pat<(v4i16 (shl (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))), - (USHLvvv_4H $Rn, $Rm)>; -def : Pat<(v2i32 (shl (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))), - (USHLvvv_2S $Rn, $Rm)>; -def : Pat<(v1i64 (shl (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), - (USHLddd $Rn, $Rm)>; -def : Pat<(v16i8 (shl (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))), - (USHLvvv_16B $Rn, $Rm)>; -def : Pat<(v8i16 (shl (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))), - (USHLvvv_8H $Rn, $Rm)>; -def : Pat<(v4i32 (shl (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))), - (USHLvvv_4S $Rn, $Rm)>; -def : Pat<(v2i64 (shl (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))), - (USHLvvv_2D $Rn, $Rm)>; - -def : Pat<(v1i8 (shl (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))), - (EXTRACT_SUBREG - (USHLvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)), - sub_8)>; -def : Pat<(v1i16 (shl (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), - (EXTRACT_SUBREG - (USHLvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)), - sub_16)>; -def : Pat<(v1i32 (shl (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), - (EXTRACT_SUBREG - (USHLvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)), - sub_32)>; - -// Additional patterns to match sra, srl. -// For a vector right shift by vector, the shift amounts of SSHL/USHL are -// negative. Negate the vector of shift amount first. -def : Pat<(v8i8 (srl (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))), - (USHLvvv_8B $Rn, (NEG8b $Rm))>; -def : Pat<(v4i16 (srl (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))), - (USHLvvv_4H $Rn, (NEG4h $Rm))>; -def : Pat<(v2i32 (srl (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))), - (USHLvvv_2S $Rn, (NEG2s $Rm))>; -def : Pat<(v1i64 (srl (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), - (USHLddd $Rn, (NEGdd $Rm))>; -def : Pat<(v16i8 (srl (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))), - (USHLvvv_16B $Rn, (NEG16b $Rm))>; -def : Pat<(v8i16 (srl (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))), - (USHLvvv_8H $Rn, (NEG8h $Rm))>; -def : Pat<(v4i32 (srl (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))), - (USHLvvv_4S $Rn, (NEG4s $Rm))>; -def : Pat<(v2i64 (srl (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))), - (USHLvvv_2D $Rn, (NEG2d $Rm))>; - -def : Pat<(v1i8 (srl (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))), - (EXTRACT_SUBREG - (USHLvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - (NEG8b (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8))), - sub_8)>; -def : Pat<(v1i16 (srl (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), - (EXTRACT_SUBREG - (USHLvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - (NEG4h (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16))), - sub_16)>; -def : Pat<(v1i32 (srl (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), - (EXTRACT_SUBREG - (USHLvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - (NEG2s (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32))), - sub_32)>; - -def : Pat<(v8i8 (sra (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))), - (SSHLvvv_8B $Rn, (NEG8b $Rm))>; -def : Pat<(v4i16 (sra (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))), - (SSHLvvv_4H $Rn, (NEG4h $Rm))>; -def : Pat<(v2i32 (sra (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))), - (SSHLvvv_2S $Rn, (NEG2s $Rm))>; -def : Pat<(v1i64 (sra (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), - (SSHLddd $Rn, (NEGdd $Rm))>; -def : Pat<(v16i8 (sra (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))), - (SSHLvvv_16B $Rn, (NEG16b $Rm))>; -def : Pat<(v8i16 (sra (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))), - (SSHLvvv_8H $Rn, (NEG8h $Rm))>; -def : Pat<(v4i32 (sra (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))), - (SSHLvvv_4S $Rn, (NEG4s $Rm))>; -def : Pat<(v2i64 (sra (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))), - (SSHLvvv_2D $Rn, (NEG2d $Rm))>; - -def : Pat<(v1i8 (sra (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))), - (EXTRACT_SUBREG - (SSHLvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - (NEG8b (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8))), - sub_8)>; -def : Pat<(v1i16 (sra (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), - (EXTRACT_SUBREG - (SSHLvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - (NEG4h (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16))), - sub_16)>; -def : Pat<(v1i32 (sra (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), - (EXTRACT_SUBREG - (SSHLvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - (NEG2s (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32))), - sub_32)>; - -// -// Patterns for handling half-precision values -// - -// Convert between f16 value and f32 value -def : Pat<(f32 (f16_to_f32 (i32 GPR32:$Rn))), - (FCVTsh (EXTRACT_SUBREG (FMOVsw $Rn), sub_16))>; -def : Pat<(i32 (f32_to_f16 (f32 FPR32:$Rn))), - (FMOVws (SUBREG_TO_REG (i64 0), (f16 (FCVThs $Rn)), sub_16))>; - -// Convert f16 value coming in as i16 value to f32 -def : Pat<(f32 (f16_to_f32 (i32 (and (i32 GPR32:$Rn), 65535)))), - (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>; -def : Pat<(f32 (f16_to_f32 (i32 (assertzext GPR32:$Rn)))), - (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>; - -def : Pat<(f32 (f16_to_f32 (i32 (assertzext (i32 ( - f32_to_f16 (f32 FPR32:$Rn))))))), - (f32 FPR32:$Rn)>; - -// Patterns for vector extract of half-precision FP value in i16 storage type -def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract - (v4i16 VPR64:$Rn), neon_uimm2_bare:$Imm)), 65535)))), - (FCVTsh (f16 (DUPhv_H - (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - neon_uimm2_bare:$Imm)))>; - -def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract - (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)), 65535)))), - (FCVTsh (f16 (DUPhv_H (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)))>; - -// Patterns for vector insert of half-precision FP value 0 in i16 storage type -def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn), - (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))), - (neon_uimm3_bare:$Imm))), - (v8i16 (INSELh (v8i16 VPR128:$Rn), - (v8i16 (SUBREG_TO_REG (i64 0), - (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)), - sub_16)), - neon_uimm3_bare:$Imm, 0))>; - -def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn), - (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))), - (neon_uimm2_bare:$Imm))), - (v4i16 (EXTRACT_SUBREG - (v8i16 (INSELh - (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - (v8i16 (SUBREG_TO_REG (i64 0), - (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)), - sub_16)), - neon_uimm2_bare:$Imm, 0)), - sub_64))>; - -// Patterns for vector insert of half-precision FP value in i16 storage type -def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn), - (i32 (assertsext (i32 (fp_to_sint - (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))), - (neon_uimm3_bare:$Imm))), - (v8i16 (INSELh (v8i16 VPR128:$Rn), - (v8i16 (SUBREG_TO_REG (i64 0), - (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)), - sub_16)), - neon_uimm3_bare:$Imm, 0))>; - -def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn), - (i32 (assertsext (i32 (fp_to_sint - (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))), - (neon_uimm2_bare:$Imm))), - (v4i16 (EXTRACT_SUBREG - (v8i16 (INSELh - (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - (v8i16 (SUBREG_TO_REG (i64 0), - (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)), - sub_16)), - neon_uimm2_bare:$Imm, 0)), - sub_64))>; - -def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn), - (i32 (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)), - (neon_uimm3_bare:$Imm1))), - (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src), - neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>; - -// Patterns for vector copy of half-precision FP value in i16 storage type -def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn), - (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32 - (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)), - 65535)))))))), - (neon_uimm3_bare:$Imm1))), - (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src), - neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>; - -def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn), - (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32 - (vector_extract (v4i16 VPR64:$src), neon_uimm3_bare:$Imm2)), - 65535)))))))), - (neon_uimm3_bare:$Imm1))), - (v4i16 (EXTRACT_SUBREG - (v8i16 (INSELh - (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)), - neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2)), - sub_64))>; - - diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp deleted file mode 100644 index 103aeb00d873..000000000000 --- a/lib/Target/AArch64/AArch64MCInstLower.cpp +++ /dev/null @@ -1,157 +0,0 @@ -//===-- AArch64MCInstLower.cpp - Convert AArch64 MachineInstr to an MCInst -==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains code to lower AArch64 MachineInstrs to their corresponding -// MCInst records. -// -//===----------------------------------------------------------------------===// - -#include "AArch64AsmPrinter.h" -#include "AArch64TargetMachine.h" -#include "MCTargetDesc/AArch64MCExpr.h" -#include "Utils/AArch64BaseInfo.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/IR/Mangler.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" - -using namespace llvm; - -MCOperand -AArch64AsmPrinter::lowerSymbolOperand(const MachineOperand &MO, - const MCSymbol *Sym) const { - const MCExpr *Expr = nullptr; - - Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, OutContext); - - switch (MO.getTargetFlags()) { - case AArch64II::MO_GOT: - Expr = AArch64MCExpr::CreateGOT(Expr, OutContext); - break; - case AArch64II::MO_GOT_LO12: - Expr = AArch64MCExpr::CreateGOTLo12(Expr, OutContext); - break; - case AArch64II::MO_LO12: - Expr = AArch64MCExpr::CreateLo12(Expr, OutContext); - break; - case AArch64II::MO_DTPREL_G1: - Expr = AArch64MCExpr::CreateDTPREL_G1(Expr, OutContext); - break; - case AArch64II::MO_DTPREL_G0_NC: - Expr = AArch64MCExpr::CreateDTPREL_G0_NC(Expr, OutContext); - break; - case AArch64II::MO_GOTTPREL: - Expr = AArch64MCExpr::CreateGOTTPREL(Expr, OutContext); - break; - case AArch64II::MO_GOTTPREL_LO12: - Expr = AArch64MCExpr::CreateGOTTPRELLo12(Expr, OutContext); - break; - case AArch64II::MO_TLSDESC: - Expr = AArch64MCExpr::CreateTLSDesc(Expr, OutContext); - break; - case AArch64II::MO_TLSDESC_LO12: - Expr = AArch64MCExpr::CreateTLSDescLo12(Expr, OutContext); - break; - case AArch64II::MO_TPREL_G1: - Expr = AArch64MCExpr::CreateTPREL_G1(Expr, OutContext); - break; - case AArch64II::MO_TPREL_G0_NC: - Expr = AArch64MCExpr::CreateTPREL_G0_NC(Expr, OutContext); - break; - case AArch64II::MO_ABS_G3: - Expr = AArch64MCExpr::CreateABS_G3(Expr, OutContext); - break; - case AArch64II::MO_ABS_G2_NC: - Expr = AArch64MCExpr::CreateABS_G2_NC(Expr, OutContext); - break; - case AArch64II::MO_ABS_G1_NC: - Expr = AArch64MCExpr::CreateABS_G1_NC(Expr, OutContext); - break; - case AArch64II::MO_ABS_G0_NC: - Expr = AArch64MCExpr::CreateABS_G0_NC(Expr, OutContext); - break; - case AArch64II::MO_NO_FLAG: - // Expr is already correct - break; - default: - llvm_unreachable("Unexpected MachineOperand flag"); - } - - if (!MO.isJTI() && MO.getOffset()) - Expr = MCBinaryExpr::CreateAdd(Expr, - MCConstantExpr::Create(MO.getOffset(), - OutContext), - OutContext); - - return MCOperand::CreateExpr(Expr); -} - -bool AArch64AsmPrinter::lowerOperand(const MachineOperand &MO, - MCOperand &MCOp) const { - switch (MO.getType()) { - default: llvm_unreachable("unknown operand type"); - case MachineOperand::MO_Register: - if (MO.isImplicit()) - return false; - assert(!MO.getSubReg() && "Subregs should be eliminated!"); - MCOp = MCOperand::CreateReg(MO.getReg()); - break; - case MachineOperand::MO_Immediate: - MCOp = MCOperand::CreateImm(MO.getImm()); - break; - case MachineOperand::MO_FPImmediate: { - assert(MO.getFPImm()->isZero() && "Only fp imm 0.0 is supported"); - MCOp = MCOperand::CreateFPImm(0.0); - break; - } - case MachineOperand::MO_BlockAddress: - MCOp = lowerSymbolOperand(MO, GetBlockAddressSymbol(MO.getBlockAddress())); - break; - case MachineOperand::MO_ExternalSymbol: - MCOp = lowerSymbolOperand(MO, GetExternalSymbolSymbol(MO.getSymbolName())); - break; - case MachineOperand::MO_GlobalAddress: - MCOp = lowerSymbolOperand(MO, getSymbol(MO.getGlobal())); - break; - case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( - MO.getMBB()->getSymbol(), OutContext)); - break; - case MachineOperand::MO_JumpTableIndex: - MCOp = lowerSymbolOperand(MO, GetJTISymbol(MO.getIndex())); - break; - case MachineOperand::MO_ConstantPoolIndex: - MCOp = lowerSymbolOperand(MO, GetCPISymbol(MO.getIndex())); - break; - case MachineOperand::MO_RegisterMask: - // Ignore call clobbers - return false; - - } - - return true; -} - -void llvm::LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, - MCInst &OutMI, - AArch64AsmPrinter &AP) { - OutMI.setOpcode(MI->getOpcode()); - - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - - MCOperand MCOp; - if (AP.lowerOperand(MO, MCOp)) - OutMI.addOperand(MCOp); - } -} diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp deleted file mode 100644 index f45d8f784f42..000000000000 --- a/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp +++ /dev/null @@ -1,18 +0,0 @@ -//===-- AArch64MachineFuctionInfo.cpp - AArch64 machine function info -----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file just contains the anchor for the AArch64MachineFunctionInfo to -// force vtable emission. -// -//===----------------------------------------------------------------------===// -#include "AArch64MachineFunctionInfo.h" - -using namespace llvm; - -void AArch64MachineFunctionInfo::anchor() { } diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h deleted file mode 100644 index 33da54f97fda..000000000000 --- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ /dev/null @@ -1,149 +0,0 @@ -//=- AArch64MachineFuctionInfo.h - AArch64 machine function info -*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares AArch64-specific per-machine-function information. -// -//===----------------------------------------------------------------------===// - -#ifndef AARCH64MACHINEFUNCTIONINFO_H -#define AARCH64MACHINEFUNCTIONINFO_H - -#include "llvm/CodeGen/MachineFunction.h" - -namespace llvm { - -/// This class is derived from MachineFunctionInfo and contains private AArch64 -/// target-specific information for each MachineFunction. -class AArch64MachineFunctionInfo : public MachineFunctionInfo { - virtual void anchor(); - - /// Number of bytes of arguments this function has on the stack. If the callee - /// is expected to restore the argument stack this should be a multiple of 16, - /// all usable during a tail call. - /// - /// The alternative would forbid tail call optimisation in some cases: if we - /// want to transfer control from a function with 8-bytes of stack-argument - /// space to a function with 16-bytes then misalignment of this value would - /// make a stack adjustment necessary, which could not be undone by the - /// callee. - unsigned BytesInStackArgArea; - - /// The number of bytes to restore to deallocate space for incoming - /// arguments. Canonically 0 in the C calling convention, but non-zero when - /// callee is expected to pop the args. - unsigned ArgumentStackToRestore; - - /// If the stack needs to be adjusted on frame entry in two stages, this - /// records the size of the first adjustment just prior to storing - /// callee-saved registers. The callee-saved slots are addressed assuming - /// SP == - InitialStackAdjust. - unsigned InitialStackAdjust; - - /// Number of local-dynamic TLS accesses. - unsigned NumLocalDynamics; - - /// @see AArch64 Procedure Call Standard, B.3 - /// - /// The Frame index of the area where LowerFormalArguments puts the - /// general-purpose registers that might contain variadic parameters. - int VariadicGPRIdx; - - /// @see AArch64 Procedure Call Standard, B.3 - /// - /// The size of the frame object used to store the general-purpose registers - /// which might contain variadic arguments. This is the offset from - /// VariadicGPRIdx to what's stored in __gr_top. - unsigned VariadicGPRSize; - - /// @see AArch64 Procedure Call Standard, B.3 - /// - /// The Frame index of the area where LowerFormalArguments puts the - /// floating-point registers that might contain variadic parameters. - int VariadicFPRIdx; - - /// @see AArch64 Procedure Call Standard, B.3 - /// - /// The size of the frame object used to store the floating-point registers - /// which might contain variadic arguments. This is the offset from - /// VariadicFPRIdx to what's stored in __vr_top. - unsigned VariadicFPRSize; - - /// @see AArch64 Procedure Call Standard, B.3 - /// - /// The Frame index of an object pointing just past the last known stacked - /// argument on entry to a variadic function. This goes into the __stack field - /// of the va_list type. - int VariadicStackIdx; - - /// The offset of the frame pointer from the stack pointer on function - /// entry. This is expected to be negative. - int FramePointerOffset; - -public: - AArch64MachineFunctionInfo() - : BytesInStackArgArea(0), - ArgumentStackToRestore(0), - InitialStackAdjust(0), - NumLocalDynamics(0), - VariadicGPRIdx(0), - VariadicGPRSize(0), - VariadicFPRIdx(0), - VariadicFPRSize(0), - VariadicStackIdx(0), - FramePointerOffset(0) {} - - explicit AArch64MachineFunctionInfo(MachineFunction &MF) - : BytesInStackArgArea(0), - ArgumentStackToRestore(0), - InitialStackAdjust(0), - NumLocalDynamics(0), - VariadicGPRIdx(0), - VariadicGPRSize(0), - VariadicFPRIdx(0), - VariadicFPRSize(0), - VariadicStackIdx(0), - FramePointerOffset(0) {} - - unsigned getBytesInStackArgArea() const { return BytesInStackArgArea; } - void setBytesInStackArgArea (unsigned bytes) { BytesInStackArgArea = bytes;} - - unsigned getArgumentStackToRestore() const { return ArgumentStackToRestore; } - void setArgumentStackToRestore(unsigned bytes) { - ArgumentStackToRestore = bytes; - } - - unsigned getInitialStackAdjust() const { return InitialStackAdjust; } - void setInitialStackAdjust(unsigned bytes) { InitialStackAdjust = bytes; } - - unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; } - void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; } - - int getVariadicGPRIdx() const { return VariadicGPRIdx; } - void setVariadicGPRIdx(int Idx) { VariadicGPRIdx = Idx; } - - unsigned getVariadicGPRSize() const { return VariadicGPRSize; } - void setVariadicGPRSize(unsigned Size) { VariadicGPRSize = Size; } - - int getVariadicFPRIdx() const { return VariadicFPRIdx; } - void setVariadicFPRIdx(int Idx) { VariadicFPRIdx = Idx; } - - unsigned getVariadicFPRSize() const { return VariadicFPRSize; } - void setVariadicFPRSize(unsigned Size) { VariadicFPRSize = Size; } - - int getVariadicStackIdx() const { return VariadicStackIdx; } - void setVariadicStackIdx(int Idx) { VariadicStackIdx = Idx; } - - int getFramePointerOffset() const { return FramePointerOffset; } - void setFramePointerOffset(int Idx) { FramePointerOffset = Idx; } - -}; - -} // End llvm namespace - -#endif diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp deleted file mode 100644 index 5382effd7bb9..000000000000 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ /dev/null @@ -1,186 +0,0 @@ -//===- AArch64RegisterInfo.cpp - AArch64 Register Information -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the AArch64 implementation of the TargetRegisterInfo -// class. -// -//===----------------------------------------------------------------------===// - - -#include "AArch64RegisterInfo.h" -#include "AArch64FrameLowering.h" -#include "AArch64MachineFunctionInfo.h" -#include "AArch64TargetMachine.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterScavenging.h" - -using namespace llvm; - -#define GET_REGINFO_TARGET_DESC -#include "AArch64GenRegisterInfo.inc" - -AArch64RegisterInfo::AArch64RegisterInfo() - : AArch64GenRegisterInfo(AArch64::X30) { -} - -const MCPhysReg * -AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - return CSR_PCS_SaveList; -} - -const uint32_t* -AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID) const { - return CSR_PCS_RegMask; -} - -const uint32_t *AArch64RegisterInfo::getTLSDescCallPreservedMask() const { - return TLSDesc_RegMask; -} - -const TargetRegisterClass * -AArch64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { - if (RC == &AArch64::FlagClassRegClass) - return &AArch64::GPR64RegClass; - - return RC; -} - - - -BitVector -AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { - BitVector Reserved(getNumRegs()); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - Reserved.set(AArch64::XSP); - Reserved.set(AArch64::WSP); - - Reserved.set(AArch64::XZR); - Reserved.set(AArch64::WZR); - - if (TFI->hasFP(MF)) { - Reserved.set(AArch64::X29); - Reserved.set(AArch64::W29); - } - - return Reserved; -} - -static bool hasFrameOffset(int opcode) { - return opcode != AArch64::LD1x2_8B && opcode != AArch64::LD1x3_8B && - opcode != AArch64::LD1x4_8B && opcode != AArch64::ST1x2_8B && - opcode != AArch64::ST1x3_8B && opcode != AArch64::ST1x4_8B && - opcode != AArch64::LD1x2_16B && opcode != AArch64::LD1x3_16B && - opcode != AArch64::LD1x4_16B && opcode != AArch64::ST1x2_16B && - opcode != AArch64::ST1x3_16B && opcode != AArch64::ST1x4_16B; -} - -void -AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI, - int SPAdj, - unsigned FIOperandNum, - RegScavenger *RS) const { - assert(SPAdj == 0 && "Cannot deal with nonzero SPAdj yet"); - MachineInstr &MI = *MBBI; - MachineBasicBlock &MBB = *MI.getParent(); - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - const AArch64FrameLowering *TFI = - static_cast(MF.getTarget().getFrameLowering()); - - // In order to work out the base and offset for addressing, the FrameLowering - // code needs to know (sometimes) whether the instruction is storing/loading a - // callee-saved register, or whether it's a more generic - // operation. Fortunately the frame indices are used *only* for that purpose - // and are contiguous, so we can check here. - const std::vector &CSI = MFI->getCalleeSavedInfo(); - int MinCSFI = 0; - int MaxCSFI = -1; - - if (CSI.size()) { - MinCSFI = CSI[0].getFrameIdx(); - MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); - } - - int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); - bool IsCalleeSaveOp = FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI; - - unsigned FrameReg; - int64_t Offset; - Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj, - IsCalleeSaveOp); - // A vector load/store instruction doesn't have an offset operand. - bool HasOffsetOp = hasFrameOffset(MI.getOpcode()); - if (HasOffsetOp) - Offset += MI.getOperand(FIOperandNum + 1).getImm(); - - // DBG_VALUE instructions have no real restrictions so they can be handled - // easily. - if (MI.isDebugValue()) { - MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*isDef=*/ false); - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); - return; - } - - const AArch64InstrInfo &TII = - *static_cast(MF.getTarget().getInstrInfo()); - int MinOffset, MaxOffset, OffsetScale; - if (MI.getOpcode() == AArch64::ADDxxi_lsl0_s || !HasOffsetOp) { - MinOffset = 0; - MaxOffset = 0xfff; - OffsetScale = 1; - } else { - // Load/store of a stack object - TII.getAddressConstraints(MI, OffsetScale, MinOffset, MaxOffset); - } - - // There are two situations we don't use frame + offset directly in the - // instruction: - // (1) The offset can't really be scaled - // (2) Can't encode offset as it doesn't have an offset operand - if ((Offset % OffsetScale != 0 || Offset < MinOffset || Offset > MaxOffset) || - (!HasOffsetOp && Offset != 0)) { - unsigned BaseReg = - MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); - emitRegUpdate(MBB, MBBI, MBBI->getDebugLoc(), TII, - BaseReg, FrameReg, BaseReg, Offset); - FrameReg = BaseReg; - Offset = 0; - } - - // Negative offsets are expected if we address from FP, but for - // now this checks nothing has gone horribly wrong. - assert(Offset >= 0 && "Unexpected negative offset from SP"); - - MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, true); - if (HasOffsetOp) - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset / OffsetScale); -} - -unsigned -AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - if (TFI->hasFP(MF)) - return AArch64::X29; - else - return AArch64::XSP; -} - -bool -AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - const AArch64FrameLowering *AFI - = static_cast(TFI); - return AFI->useFPForAddressing(MF); -} diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h deleted file mode 100644 index 5b501f9cc160..000000000000 --- a/lib/Target/AArch64/AArch64RegisterInfo.h +++ /dev/null @@ -1,79 +0,0 @@ -//==- AArch64RegisterInfo.h - AArch64 Register Information Impl -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the AArch64 implementation of the MCRegisterInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_AARCH64REGISTERINFO_H -#define LLVM_TARGET_AARCH64REGISTERINFO_H - -#include "llvm/Target/TargetRegisterInfo.h" - -#define GET_REGINFO_HEADER -#include "AArch64GenRegisterInfo.inc" - -namespace llvm { - -class AArch64InstrInfo; -class AArch64Subtarget; - -struct AArch64RegisterInfo : public AArch64GenRegisterInfo { - AArch64RegisterInfo(); - - const MCPhysReg * - getCalleeSavedRegs(const MachineFunction *MF =nullptr) const override; - const uint32_t *getCallPreservedMask(CallingConv::ID) const override; - - unsigned getCSRFirstUseCost() const override { - // The cost will be compared against BlockFrequency where entry has the - // value of 1 << 14. A value of 5 will choose to spill or split really - // cold path instead of using a callee-saved register. - return 5; - } - - const uint32_t *getTLSDescCallPreservedMask() const; - - BitVector getReservedRegs(const MachineFunction &MF) const override; - unsigned getFrameRegister(const MachineFunction &MF) const override; - - void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - unsigned FIOperandNum, - RegScavenger *Rs = nullptr) const override; - - /// getCrossCopyRegClass - Returns a legal register class to copy a register - /// in the specified class to or from. Returns original class if it is - /// possible to copy between a two registers of the specified class. - const TargetRegisterClass * - getCrossCopyRegClass(const TargetRegisterClass *RC) const override; - - /// getLargestLegalSuperClass - Returns the largest super class of RC that is - /// legal to use in the current sub-target and has the same spill size. - const TargetRegisterClass* - getLargestLegalSuperClass(const TargetRegisterClass *RC) const override { - if (RC == &AArch64::tcGPR64RegClass) - return &AArch64::GPR64RegClass; - - return RC; - } - - bool requiresRegisterScavenging(const MachineFunction &MF) const override { - return true; - } - - bool requiresFrameIndexScavenging(const MachineFunction &MF) const override { - return true; - } - - bool useFPForScavengingIndex(const MachineFunction &MF) const override; -}; - -} // end namespace llvm - -#endif // LLVM_TARGET_AARCH64REGISTERINFO_H diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td deleted file mode 100644 index 9de7abdf5ff0..000000000000 --- a/lib/Target/AArch64/AArch64RegisterInfo.td +++ /dev/null @@ -1,290 +0,0 @@ -//===- AArch64RegisterInfo.td - ARM Register defs ----------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains declarations that describe the AArch64 register file -// -//===----------------------------------------------------------------------===// - -let Namespace = "AArch64" in { -def sub_128 : SubRegIndex<128>; -def sub_64 : SubRegIndex<64>; -def sub_32 : SubRegIndex<32>; -def sub_16 : SubRegIndex<16>; -def sub_8 : SubRegIndex<8>; - -// Note: Code depends on these having consecutive numbers. -def qqsub : SubRegIndex<256, 256>; - -def qsub_0 : SubRegIndex<128>; -def qsub_1 : SubRegIndex<128, 128>; -def qsub_2 : ComposedSubRegIndex; -def qsub_3 : ComposedSubRegIndex; - -def dsub_0 : SubRegIndex<64>; -def dsub_1 : SubRegIndex<64, 64>; -def dsub_2 : ComposedSubRegIndex; -def dsub_3 : ComposedSubRegIndex; -} - -// Registers are identified with 5-bit ID numbers. -class AArch64Reg enc, string n> : Register { - let HWEncoding = enc; - let Namespace = "AArch64"; -} - -class AArch64RegWithSubs enc, string n, list subregs = [], - list inds = []> - : AArch64Reg { - let SubRegs = subregs; - let SubRegIndices = inds; -} - -//===----------------------------------------------------------------------===// -// Integer registers: w0-w30, wzr, wsp, x0-x30, xzr, sp -//===----------------------------------------------------------------------===// - -foreach Index = 0-30 in { - def W#Index : AArch64Reg< Index, "w"#Index>, DwarfRegNum<[Index]>; -} - -def WSP : AArch64Reg<31, "wsp">, DwarfRegNum<[31]>; -def WZR : AArch64Reg<31, "wzr">; - -// Could be combined with previous loop, but this way leaves w and x registers -// consecutive as LLVM register numbers, which makes for easier debugging. -foreach Index = 0-30 in { - def X#Index : AArch64RegWithSubs("W"#Index)], [sub_32]>, - DwarfRegNum<[Index]>; -} - -def XSP : AArch64RegWithSubs<31, "sp", [WSP], [sub_32]>, DwarfRegNum<[31]>; -def XZR : AArch64RegWithSubs<31, "xzr", [WZR], [sub_32]>; - -// Most instructions treat register 31 as zero for reads and a black-hole for -// writes. - -// Note that the order of registers is important for the Disassembler here: -// tablegen uses it to form MCRegisterClass::getRegister, which we assume can -// take an encoding value. -def GPR32 : RegisterClass<"AArch64", [i32], 32, - (add (sequence "W%u", 0, 30), WZR)> { -} - -def GPR64 : RegisterClass<"AArch64", [i64], 64, - (add (sequence "X%u", 0, 30), XZR)> { -} - -def GPR32nowzr : RegisterClass<"AArch64", [i32], 32, - (sequence "W%u", 0, 30)> { -} - -def GPR64noxzr : RegisterClass<"AArch64", [i64], 64, - (sequence "X%u", 0, 30)> { -} - -// For tail calls, we can't use callee-saved registers or the structure-return -// register, as they are supposed to be live across function calls and may be -// clobbered by the epilogue. -def tcGPR64 : RegisterClass<"AArch64", [i64], 64, - (add (sequence "X%u", 0, 7), - (sequence "X%u", 9, 18))> { -} - - -// Certain addressing-useful instructions accept sp directly. Again the order of -// registers is important to the Disassembler. -def GPR32wsp : RegisterClass<"AArch64", [i32], 32, - (add (sequence "W%u", 0, 30), WSP)> { -} - -def GPR64xsp : RegisterClass<"AArch64", [i64], 64, - (add (sequence "X%u", 0, 30), XSP)> { -} - -// Some aliases *only* apply to SP (e.g. MOV uses different encoding for SP and -// non-SP variants). We can't use a bare register in those patterns because -// TableGen doesn't like it, so we need a class containing just stack registers -def Rxsp : RegisterClass<"AArch64", [i64], 64, - (add XSP)> { -} - -def Rwsp : RegisterClass<"AArch64", [i32], 32, - (add WSP)> { -} - -//===----------------------------------------------------------------------===// -// Scalar registers in the vector unit: -// b0-b31, h0-h31, s0-s31, d0-d31, q0-q31 -//===----------------------------------------------------------------------===// - -foreach Index = 0-31 in { - def B # Index : AArch64Reg< Index, "b" # Index>, - DwarfRegNum<[!add(Index, 64)]>; - - def H # Index : AArch64RegWithSubs("B" # Index)], [sub_8]>, - DwarfRegNum<[!add(Index, 64)]>; - - def S # Index : AArch64RegWithSubs("H" # Index)], [sub_16]>, - DwarfRegNum<[!add(Index, 64)]>; - - def D # Index : AArch64RegWithSubs("S" # Index)], [sub_32]>, - DwarfRegNum<[!add(Index, 64)]>; - - def Q # Index : AArch64RegWithSubs("D" # Index)], [sub_64]>, - DwarfRegNum<[!add(Index, 64)]>; -} - - -def FPR8 : RegisterClass<"AArch64", [v1i8], 8, - (sequence "B%u", 0, 31)> { -} - -def FPR16 : RegisterClass<"AArch64", [f16, v1i16], 16, - (sequence "H%u", 0, 31)> { -} - -def FPR32 : RegisterClass<"AArch64", [f32, v1i32], 32, - (sequence "S%u", 0, 31)> { -} - -def FPR64 : RegisterClass<"AArch64", - [f64, v2f32, v2i32, v4i16, v8i8, v1i64, v1f64], - 64, (sequence "D%u", 0, 31)>; - -def FPR128 : RegisterClass<"AArch64", - [f128, v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], - 128, (sequence "Q%u", 0, 31)>; - -def FPR64Lo : RegisterClass<"AArch64", - [f64, v2f32, v2i32, v4i16, v8i8, v1i64, v1f64], - 64, (sequence "D%u", 0, 15)>; - -def FPR128Lo : RegisterClass<"AArch64", - [f128, v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], - 128, (sequence "Q%u", 0, 15)>; - -//===----------------------------------------------------------------------===// -// Vector registers: -//===----------------------------------------------------------------------===// - -def VPR64AsmOperand : AsmOperandClass { - let Name = "VPR"; - let PredicateMethod = "isReg"; - let RenderMethod = "addRegOperands"; -} - -def VPR64 : RegisterOperand; - -def VPR128 : RegisterOperand; - -def VPR64Lo : RegisterOperand; - -def VPR128Lo : RegisterOperand; - -// Flags register -def NZCV : Register<"nzcv"> { - let Namespace = "AArch64"; -} - -def FlagClass : RegisterClass<"AArch64", [i32], 32, (add NZCV)> { - let CopyCost = -1; - let isAllocatable = 0; -} - -//===----------------------------------------------------------------------===// -// Consecutive vector registers -//===----------------------------------------------------------------------===// -// 2 Consecutive 64-bit registers: D0_D1, D1_D2, ..., D31_D0 -def Tuples2D : RegisterTuples<[dsub_0, dsub_1], - [(rotl FPR64, 0), (rotl FPR64, 1)]>; - -// 3 Consecutive 64-bit registers: D0_D1_D2, ..., D31_D0_D1 -def Tuples3D : RegisterTuples<[dsub_0, dsub_1, dsub_2], - [(rotl FPR64, 0), (rotl FPR64, 1), - (rotl FPR64, 2)]>; - -// 4 Consecutive 64-bit registers: D0_D1_D2_D3, ..., D31_D0_D1_D2 -def Tuples4D : RegisterTuples<[dsub_0, dsub_1, dsub_2, dsub_3], - [(rotl FPR64, 0), (rotl FPR64, 1), - (rotl FPR64, 2), (rotl FPR64, 3)]>; - -// 2 Consecutive 128-bit registers: Q0_Q1, Q1_Q2, ..., Q30_Q31 -def Tuples2Q : RegisterTuples<[qsub_0, qsub_1], - [(rotl FPR128, 0), (rotl FPR128, 1)]>; - -// 3 Consecutive 128-bit registers: Q0_Q1_Q2, ..., Q31_Q0_Q1 -def Tuples3Q : RegisterTuples<[qsub_0, qsub_1, qsub_2], - [(rotl FPR128, 0), (rotl FPR128, 1), - (rotl FPR128, 2)]>; - -// 4 Consecutive 128-bit registers: Q0_Q1_Q2_Q3, ..., Q31_Q0_Q1_Q2 -def Tuples4Q : RegisterTuples<[qsub_0, qsub_1, qsub_2, qsub_3], - [(rotl FPR128, 0), (rotl FPR128, 1), - (rotl FPR128, 2), (rotl FPR128, 3)]>; - -// The followings are super register classes to model 2/3/4 consecutive -// 64-bit/128-bit registers. - -def DPair : RegisterClass<"AArch64", [v2i64], 64, (add Tuples2D)>; - -def DTriple : RegisterClass<"AArch64", [untyped], 64, (add Tuples3D)> { - let Size = 192; // 3 x 64 bits, we have no predefined type of that size. -} - -def DQuad : RegisterClass<"AArch64", [v4i64], 64, (add Tuples4D)>; - -def QPair : RegisterClass<"AArch64", [v4i64], 128, (add Tuples2Q)>; - -def QTriple : RegisterClass<"AArch64", [untyped], 128, (add Tuples3Q)> { - let Size = 384; // 3 x 128 bits, we have no predefined type of that size. -} - -def QQuad : RegisterClass<"AArch64", [v8i64], 128, (add Tuples4Q)>; - - -// The followings are vector list operands -multiclass VectorList_operands { - def _asmoperand : AsmOperandClass { - let Name = PREFIX # LAYOUT # Count; - let RenderMethod = "addVectorListOperands"; - let PredicateMethod = - "isVectorList"; - let ParserMethod = "ParseVectorList"; - } - - def _operand : RegisterOperand"> { - let ParserMatchClass = - !cast(PREFIX # LAYOUT # "_asmoperand"); - } -} - -multiclass VectorList_BHSD { - defm 8B : VectorList_operands; - defm 4H : VectorList_operands; - defm 2S : VectorList_operands; - defm 1D : VectorList_operands; - defm 16B : VectorList_operands; - defm 8H : VectorList_operands; - defm 4S : VectorList_operands; - defm 2D : VectorList_operands; -} - -// Vector list operand with 1/2/3/4 registers: VOne8B_operand,..., VQuad2D_operand -defm VOne : VectorList_BHSD<"VOne", 1, FPR64, FPR128>; -defm VPair : VectorList_BHSD<"VPair", 2, DPair, QPair>; -defm VTriple : VectorList_BHSD<"VTriple", 3, DTriple, QTriple>; -defm VQuad : VectorList_BHSD<"VQuad", 4, DQuad, QQuad>; diff --git a/lib/Target/AArch64/AArch64Schedule.td b/lib/Target/AArch64/AArch64Schedule.td deleted file mode 100644 index 6ec47dbaa589..000000000000 --- a/lib/Target/AArch64/AArch64Schedule.td +++ /dev/null @@ -1,80 +0,0 @@ -//===- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Generic processor itineraries for legacy compatibility. - -def GenericItineraries : ProcessorItineraries<[], [], []>; - - -//===----------------------------------------------------------------------===// -// Base SchedReadWrite types - -// Basic ALU -def WriteALU : SchedWrite; // Generic: may contain shift and/or ALU operation -def WriteALUs : SchedWrite; // Shift only with no ALU operation -def ReadALU : SchedRead; // Operand not needed for shifting -def ReadALUs : SchedRead; // Operand needed for shifting - -// Multiply with optional accumulate -def WriteMAC : SchedWrite; -def ReadMAC : SchedRead; - -// Compares -def WriteCMP : SchedWrite; -def ReadCMP : SchedRead; - -// Division -def WriteDiv : SchedWrite; -def ReadDiv : SchedRead; - -// Loads -def WriteLd : SchedWrite; -def WritePreLd : SchedWrite; -def WriteVecLd : SchedWrite; -def ReadLd : SchedRead; -def ReadPreLd : SchedRead; -def ReadVecLd : SchedRead; - -// Stores -def WriteSt : SchedWrite; -def WriteVecSt : SchedWrite; -def ReadSt : SchedRead; -def ReadVecSt : SchedRead; - -// Branches -def WriteBr : SchedWrite; -def WriteBrL : SchedWrite; -def ReadBr : SchedRead; - -// Floating Point ALU -def WriteFPALU : SchedWrite; -def ReadFPALU : SchedRead; - -// Floating Point MAC, Mul, Div, Sqrt -// Most processors will simply send all of these down a dedicated pipe, but -// they're explicitly separated here for flexibility of modeling later. May -// consider consolidating them into a single WriteFPXXXX type in the future. -def WriteFPMAC : SchedWrite; -def WriteFPMul : SchedWrite; -def WriteFPDiv : SchedWrite; -def WriteFPSqrt : SchedWrite; -def ReadFPMAC : SchedRead; -def ReadFPMul : SchedRead; -def ReadFPDiv : SchedRead; -def ReadFPSqrt : SchedRead; - -// Noop -def WriteNoop : SchedWrite; - - -//===----------------------------------------------------------------------===// -// Subtarget specific Machine Models. - -include "AArch64ScheduleA53.td" diff --git a/lib/Target/AArch64/AArch64ScheduleA53.td b/lib/Target/AArch64/AArch64ScheduleA53.td deleted file mode 100644 index 20a14e79228a..000000000000 --- a/lib/Target/AArch64/AArch64ScheduleA53.td +++ /dev/null @@ -1,144 +0,0 @@ -//=- AArch64ScheduleA53.td - ARM Cortex-A53 Scheduling Definitions -*- tablegen -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the itinerary class data for the ARM Cortex A53 processors. -// -//===----------------------------------------------------------------------===// - -// ===---------------------------------------------------------------------===// -// The following definitions describe the simpler per-operand machine model. -// This works with MachineScheduler. See MCSchedModel.h for details. - -// Cortex-A53 machine model for scheduling and other instruction cost heuristics. -def CortexA53Model : SchedMachineModel { - let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. - let MinLatency = 1 ; // OperandCycles are interpreted as MinLatency. - let LoadLatency = 2; // Optimistic load latency assuming bypass. - // This is overriden by OperandCycles if the - // Itineraries are queried instead. - let MispredictPenalty = 9; // Based on "Cortex-A53 Software Optimisation - // Specification - Instruction Timings" - // v 1.0 Spreadsheet -} - - -//===----------------------------------------------------------------------===// -// Define each kind of processor resource and number available. - -// Modeling each pipeline as a ProcResource using the default BufferSize = -1. -// Cortex-A53 is in-order and therefore should be using BufferSize = 0. The -// current configuration performs better with the basic latencies provided so -// far. Will revisit BufferSize once the latency information is more accurate. - -let SchedModel = CortexA53Model in { - -def A53UnitALU : ProcResource<2>; // Int ALU -def A53UnitMAC : ProcResource<1>; // Int MAC -def A53UnitDiv : ProcResource<1>; // Int Division -def A53UnitLdSt : ProcResource<1>; // Load/Store -def A53UnitB : ProcResource<1>; // Branch -def A53UnitFPALU : ProcResource<1>; // FP ALU -def A53UnitFPMDS : ProcResource<1>; // FP Mult/Div/Sqrt - - -//===----------------------------------------------------------------------===// -// Subtarget-specific SchedWrite types which both map the ProcResources and -// set the latency. - -// Issue - Every instruction must consume an A53WriteIssue. Optionally, -// instructions that cannot be dual-issued will also include the -// A53WriteIssue2nd in their SchedRW list. That second WriteRes will -// ensure that a second issue slot is consumed. -def A53WriteIssue : SchedWriteRes<[]>; -def A53WriteIssue2nd : SchedWriteRes<[]> { let Latency = 0; } - -// ALU - These are reduced to 1 despite a true latency of 4 in order to easily -// model forwarding logic. Once forwarding is properly modelled, then -// they'll be corrected. -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } - -// MAC -def : WriteRes { let Latency = 4; } - -// Div -def : WriteRes { let Latency = 4; } - -// Load - Note: Vector loads take 1-5 cycles to issue. For the WriteVecLd below, -// choosing the median of 3 which makes the latency 6. May model this more -// carefully in the future. -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 6; } - -// Store - Note: Vector stores take 1-3 cycles to issue. For the ReadVecSt below, -// choosing the median of 2 which makes the latency 5. May model this more -// carefully in the future. -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 5; } - -// Branch -def : WriteRes; -def : WriteRes; - -// FP ALU -def : WriteRes {let Latency = 6; } - -// FP MAC, Mul, Div, Sqrt -// Using Double Precision numbers for now as a worst case. Additionally, not -// modeling the exact hazard but instead treating the whole pipe as a hazard. -// As an example VMUL, VMLA, and others are actually pipelined. VDIV and VSQRT -// have a total latency of 33 and 32 respectively but only a hazard of 29 and -// 28 (double-prescion example). -def : WriteRes { let Latency = 10; } -def : WriteRes { let Latency = 6; } -def : WriteRes { let Latency = 33; - let ResourceCycles = [29]; } -def : WriteRes { let Latency = 32; - let ResourceCycles = [28]; } - - -//===----------------------------------------------------------------------===// -// Subtarget-specific SchedRead types. - -// No forwarding defined for ReadALU yet. -def : ReadAdvance; - -// No forwarding defined for ReadCMP yet. -def : ReadAdvance; - -// No forwarding defined for ReadBr yet. -def : ReadAdvance; - -// No forwarding defined for ReadMAC yet. -def : ReadAdvance; - -// No forwarding defined for ReadDiv yet. -def : ReadAdvance; - -// No forwarding defined for ReadLd, ReadPreLd, ReadVecLd yet. -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; - -// No forwarding defined for ReadSt and ReadVecSt yet. -def : ReadAdvance; -def : ReadAdvance; - -// No forwarding defined for ReadFPALU yet. -def : ReadAdvance; - -// No forwarding defined for ReadFPMAC/Mul/Div/Sqrt yet. -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; - -} diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp deleted file mode 100644 index 17010d41ed42..000000000000 --- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ /dev/null @@ -1,26 +0,0 @@ -//===-- AArch64SelectionDAGInfo.cpp - AArch64 SelectionDAG Info -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the AArch64SelectionDAGInfo class. -// -//===----------------------------------------------------------------------===// - -#include "AArch64TargetMachine.h" -#include "llvm/CodeGen/SelectionDAG.h" -using namespace llvm; - -#define DEBUG_TYPE "arm-selectiondag-info" - -AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const AArch64TargetMachine &TM) - : TargetSelectionDAGInfo(TM), - Subtarget(&TM.getSubtarget()) { -} - -AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() { -} diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h deleted file mode 100644 index d412ed2be180..000000000000 --- a/lib/Target/AArch64/AArch64SelectionDAGInfo.h +++ /dev/null @@ -1,32 +0,0 @@ -//===-- AArch64SelectionDAGInfo.h - AArch64 SelectionDAG Info ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the AArch64 subclass for TargetSelectionDAGInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64SELECTIONDAGINFO_H -#define LLVM_AARCH64SELECTIONDAGINFO_H - -#include "llvm/Target/TargetSelectionDAGInfo.h" - -namespace llvm { - -class AArch64TargetMachine; - -class AArch64SelectionDAGInfo : public TargetSelectionDAGInfo { - const AArch64Subtarget *Subtarget; -public: - explicit AArch64SelectionDAGInfo(const AArch64TargetMachine &TM); - ~AArch64SelectionDAGInfo(); -}; - -} - -#endif diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp deleted file mode 100644 index f88c899cc9c1..000000000000 --- a/lib/Target/AArch64/AArch64Subtarget.cpp +++ /dev/null @@ -1,99 +0,0 @@ -//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the AArch64 specific subclass of TargetSubtargetInfo. -// -//===----------------------------------------------------------------------===// - -#include "AArch64Subtarget.h" -#include "AArch64RegisterInfo.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetSubtargetInfo.h" - -using namespace llvm; - -#define DEBUG_TYPE "aarch64-subtarget" - -#define GET_SUBTARGETINFO_TARGET_DESC -#define GET_SUBTARGETINFO_CTOR -#include "AArch64GenSubtargetInfo.inc" - -enum AlignMode { - DefaultAlign, - StrictAlign, - NoStrictAlign -}; - -static cl::opt -Align(cl::desc("Load/store alignment support"), - cl::Hidden, cl::init(DefaultAlign), - cl::values( - clEnumValN(DefaultAlign, "aarch64-default-align", - "Generate unaligned accesses only on hardware/OS " - "combinations that are known to support them"), - clEnumValN(StrictAlign, "aarch64-strict-align", - "Disallow all unaligned memory accesses"), - clEnumValN(NoStrictAlign, "aarch64-no-strict-align", - "Allow unaligned memory accesses"), - clEnumValEnd)); - -// Pin the vtable to this file. -void AArch64Subtarget::anchor() {} - -AArch64Subtarget::AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS, - bool LittleEndian) - : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), - HasFPARMv8(false), HasNEON(false), HasCrypto(false), TargetTriple(TT), - CPUString(CPU), IsLittleEndian(LittleEndian) { - - initializeSubtargetFeatures(CPU, FS); -} - -void AArch64Subtarget::initializeSubtargetFeatures(StringRef CPU, - StringRef FS) { - AllowsUnalignedMem = false; - - if (CPU.empty()) - CPUString = "generic"; - - std::string FullFS = FS; - if (CPUString == "generic") { - // Enable FP by default. - if (FullFS.empty()) - FullFS = "+fp-armv8"; - else - FullFS = "+fp-armv8," + FullFS; - } - - ParseSubtargetFeatures(CPU, FullFS); - - switch (Align) { - case DefaultAlign: - // Linux targets support unaligned accesses on AARCH64 - AllowsUnalignedMem = isTargetLinux(); - break; - case StrictAlign: - AllowsUnalignedMem = false; - break; - case NoStrictAlign: - AllowsUnalignedMem = true; - break; - } -} - -bool AArch64Subtarget::GVIsIndirectSymbol(const GlobalValue *GV, - Reloc::Model RelocM) const { - if (RelocM == Reloc::Static) - return false; - - return !GV->hasLocalLinkage() && !GV->hasHiddenVisibility(); -} diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h deleted file mode 100644 index dd2b4d211f2d..000000000000 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ /dev/null @@ -1,89 +0,0 @@ -//==-- AArch64Subtarget.h - Define Subtarget for the AArch64 ---*- C++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the AArch64 specific subclass of TargetSubtargetInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_AARCH64_SUBTARGET_H -#define LLVM_TARGET_AARCH64_SUBTARGET_H - -#include "llvm/ADT/Triple.h" -#include "llvm/Target/TargetSubtargetInfo.h" -#include - -#define GET_SUBTARGETINFO_HEADER -#include "AArch64GenSubtargetInfo.inc" - -namespace llvm { -class StringRef; -class GlobalValue; - -class AArch64Subtarget : public AArch64GenSubtargetInfo { - virtual void anchor(); -protected: - enum ARMProcFamilyEnum {Others, CortexA53, CortexA57}; - - /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others. - ARMProcFamilyEnum ARMProcFamily; - - bool HasFPARMv8; - bool HasNEON; - bool HasCrypto; - - /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory - /// accesses for some types. For details, see - /// AArch64TargetLowering::allowsUnalignedMemoryAccesses(). - bool AllowsUnalignedMem; - - /// TargetTriple - What processor and OS we're targeting. - Triple TargetTriple; - - /// CPUString - String name of used CPU. - std::string CPUString; - - /// IsLittleEndian - The target is Little Endian - bool IsLittleEndian; - -private: - void initializeSubtargetFeatures(StringRef CPU, StringRef FS); - -public: - /// This constructor initializes the data members to match that - /// of the specified triple. - /// - AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS, - bool LittleEndian); - - bool enableMachineScheduler() const override { - return true; - } - - /// ParseSubtargetFeatures - Parses features string setting specified - /// subtarget options. Definition of function is auto generated by tblgen. - void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - - bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const; - - bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } - bool isTargetLinux() const { return TargetTriple.isOSLinux(); } - - bool hasFPARMv8() const { return HasFPARMv8; } - bool hasNEON() const { return HasNEON; } - bool hasCrypto() const { return HasCrypto; } - - bool allowsUnalignedMem() const { return AllowsUnalignedMem; } - - bool isLittle() const { return IsLittleEndian; } - - const std::string & getCPUString() const { return CPUString; } -}; -} // End llvm namespace - -#endif // LLVM_TARGET_AARCH64_SUBTARGET_H diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp deleted file mode 100644 index 6bd6f5912d79..000000000000 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ /dev/null @@ -1,121 +0,0 @@ -//===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the implementation of the AArch64TargetMachine -// methods. Principally just setting up the passes needed to generate correct -// code on this architecture. -// -//===----------------------------------------------------------------------===// - -#include "AArch64.h" -#include "AArch64TargetMachine.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/PassManager.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Transforms/Scalar.h" - -using namespace llvm; - -extern "C" void LLVMInitializeAArch64Target() { - RegisterTargetMachine X(TheAArch64leTarget); - RegisterTargetMachine Y(TheAArch64beTarget); -} - -AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, - bool LittleEndian) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, LittleEndian), - InstrInfo(Subtarget), - DL(LittleEndian ? - "e-m:e-i64:64-i128:128-n32:64-S128" : - "E-m:e-i64:64-i128:128-n32:64-S128"), - TLInfo(*this), - TSInfo(*this), - FrameLowering(Subtarget) { - initAsmInfo(); -} - -void AArch64leTargetMachine::anchor() { } - -AArch64leTargetMachine:: -AArch64leTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} - -void AArch64beTargetMachine::anchor() { } - -AArch64beTargetMachine:: -AArch64beTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} - -void AArch64TargetMachine::addAnalysisPasses(PassManagerBase &PM) { - // Add first the target-independent BasicTTI pass, then our AArch64 pass. This - // allows the AArch64 pass to delegate to the target independent layer when - // appropriate. - PM.add(createBasicTargetTransformInfoPass(this)); - PM.add(createAArch64TargetTransformInfoPass(this)); -} - -namespace { -/// AArch64 Code Generator Pass Configuration Options. -class AArch64PassConfig : public TargetPassConfig { -public: - AArch64PassConfig(AArch64TargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} - - AArch64TargetMachine &getAArch64TargetMachine() const { - return getTM(); - } - - const AArch64Subtarget &getAArch64Subtarget() const { - return *getAArch64TargetMachine().getSubtargetImpl(); - } - - bool addPreISel() override; - bool addInstSelector() override; - bool addPreEmitPass() override; -}; -} // namespace - -bool AArch64PassConfig::addPreISel() { - if (TM->getOptLevel() != CodeGenOpt::None) - addPass(createGlobalMergePass(TM)); - - return false; -} - -TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) { - return new AArch64PassConfig(this, PM); -} - -bool AArch64PassConfig::addPreEmitPass() { - addPass(&UnpackMachineBundlesID); - addPass(createAArch64BranchFixupPass()); - return true; -} - -bool AArch64PassConfig::addInstSelector() { - addPass(createAArch64ISelDAG(getAArch64TargetMachine(), getOptLevel())); - - // For ELF, cleanup any local-dynamic TLS accesses. - if (getAArch64Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None) - addPass(createAArch64CleanupLocalDynamicTLSPass()); - - return false; -} diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h deleted file mode 100644 index 3800635e0fac..000000000000 --- a/lib/Target/AArch64/AArch64TargetMachine.h +++ /dev/null @@ -1,94 +0,0 @@ -//=== AArch64TargetMachine.h - Define TargetMachine for AArch64 -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the AArch64 specific subclass of TargetMachine. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64TARGETMACHINE_H -#define LLVM_AARCH64TARGETMACHINE_H - -#include "AArch64FrameLowering.h" -#include "AArch64ISelLowering.h" -#include "AArch64InstrInfo.h" -#include "AArch64SelectionDAGInfo.h" -#include "AArch64Subtarget.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { - -class AArch64TargetMachine : public LLVMTargetMachine { - AArch64Subtarget Subtarget; - AArch64InstrInfo InstrInfo; - const DataLayout DL; - AArch64TargetLowering TLInfo; - AArch64SelectionDAGInfo TSInfo; - AArch64FrameLowering FrameLowering; - -public: - AArch64TargetMachine(const Target &T, StringRef TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, - bool LittleEndian); - - const AArch64InstrInfo *getInstrInfo() const override { - return &InstrInfo; - } - - const AArch64FrameLowering *getFrameLowering() const override { - return &FrameLowering; - } - - const AArch64TargetLowering *getTargetLowering() const override { - return &TLInfo; - } - - const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override { - return &TSInfo; - } - - const AArch64Subtarget *getSubtargetImpl() const override { return &Subtarget; } - - const DataLayout *getDataLayout() const override { return &DL; } - - const TargetRegisterInfo *getRegisterInfo() const override { - return &InstrInfo.getRegisterInfo(); - } - TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - - void addAnalysisPasses(PassManagerBase &PM) override; -}; - -// AArch64leTargetMachine - AArch64 little endian target machine. -// -class AArch64leTargetMachine : public AArch64TargetMachine { - virtual void anchor(); -public: - AArch64leTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; - -// AArch64beTargetMachine - AArch64 big endian target machine. -// -class AArch64beTargetMachine : public AArch64TargetMachine { - virtual void anchor(); -public: - AArch64beTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; - -} // End llvm namespace - -#endif diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp deleted file mode 100644 index 663d61944a8f..000000000000 --- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp +++ /dev/null @@ -1,24 +0,0 @@ -//===-- AArch64TargetObjectFile.cpp - AArch64 Object Info -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file deals with any AArch64 specific requirements on object files. -// -//===----------------------------------------------------------------------===// - - -#include "AArch64TargetObjectFile.h" - -using namespace llvm; - -void -AArch64ElfTargetObjectFile::Initialize(MCContext &Ctx, - const TargetMachine &TM) { - TargetLoweringObjectFileELF::Initialize(Ctx, TM); - InitializeELF(TM.Options.UseInitArray); -} diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h deleted file mode 100644 index 6e57103a426a..000000000000 --- a/lib/Target/AArch64/AArch64TargetObjectFile.h +++ /dev/null @@ -1,31 +0,0 @@ -//===-- AArch64TargetObjectFile.h - AArch64 Object Info ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file deals with any AArch64 specific requirements on object files. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H -#define LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H - -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { - - /// AArch64ElfTargetObjectFile - This implementation is used for ELF - /// AArch64 targets. - class AArch64ElfTargetObjectFile : public TargetLoweringObjectFileELF { - void Initialize(MCContext &Ctx, const TargetMachine &TM) override; - }; - -} // end namespace llvm - -#endif diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp deleted file mode 100644 index 0228d123bc6f..000000000000 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ /dev/null @@ -1,109 +0,0 @@ -//===- AArch64TargetTransformInfo.cpp - AArch64 specific TTI pass ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// This file implements a TargetTransformInfo analysis pass specific to the -/// AArch64 target machine. It uses the target's detailed information to provide -/// more precise answers to certain TTI queries, while letting the target -/// independent and default TTI implementations handle the rest. -/// -//===----------------------------------------------------------------------===// - -#include "AArch64.h" -#include "AArch64TargetMachine.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/CostTable.h" -#include "llvm/Target/TargetLowering.h" -using namespace llvm; - -#define DEBUG_TYPE "aarch64tti" - -// Declare the pass initialization routine locally as target-specific passes -// don't have a target-wide initialization entry point, and so we rely on the -// pass constructor initialization. -namespace llvm { -void initializeAArch64TTIPass(PassRegistry &); -} - -namespace { - -class AArch64TTI final : public ImmutablePass, public TargetTransformInfo { - const AArch64Subtarget *ST; - const AArch64TargetLowering *TLI; - -public: - AArch64TTI() : ImmutablePass(ID), ST(nullptr), TLI(nullptr) { - llvm_unreachable("This pass cannot be directly constructed"); - } - - AArch64TTI(const AArch64TargetMachine *TM) - : ImmutablePass(ID), ST(TM->getSubtargetImpl()), - TLI(TM->getTargetLowering()) { - initializeAArch64TTIPass(*PassRegistry::getPassRegistry()); - } - - virtual void initializePass() override { - pushTTIStack(this); - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const override { - TargetTransformInfo::getAnalysisUsage(AU); - } - - /// Pass identification. - static char ID; - - /// Provide necessary pointer adjustments for the two base classes. - virtual void *getAdjustedAnalysisPointer(const void *ID) override { - if (ID == &TargetTransformInfo::ID) - return (TargetTransformInfo*)this; - return this; - } - - /// \name Scalar TTI Implementations - /// @{ - - /// @} - - - /// \name Vector TTI Implementations - /// @{ - - unsigned getNumberOfRegisters(bool Vector) const override { - if (Vector) { - if (ST->hasNEON()) - return 32; - return 0; - } - return 32; - } - - unsigned getRegisterBitWidth(bool Vector) const override { - if (Vector) { - if (ST->hasNEON()) - return 128; - return 0; - } - return 64; - } - - unsigned getMaximumUnrollFactor() const override { return 2; } - /// @} -}; - -} // end anonymous namespace - -INITIALIZE_AG_PASS(AArch64TTI, TargetTransformInfo, "aarch64tti", - "AArch64 Target Transform Info", true, true, false) -char AArch64TTI::ID = 0; - -ImmutablePass * -llvm::createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM) { - return new AArch64TTI(TM); -} diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp deleted file mode 100644 index 9fe3497c6a1c..000000000000 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ /dev/null @@ -1,2677 +0,0 @@ -//==- AArch64AsmParser.cpp - Parse AArch64 assembly to MCInst instructions -==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the (GNU-style) assembly parser for the AArch64 -// architecture. -// -//===----------------------------------------------------------------------===// - - -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "MCTargetDesc/AArch64MCExpr.h" -#include "Utils/AArch64BaseInfo.h" -#include "llvm/ADT/APFloat.h" -#include "llvm/ADT/APInt.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCParser/MCAsmLexer.h" -#include "llvm/MC/MCParser/MCAsmParser.h" -#include "llvm/MC/MCParser/MCParsedAsmOperand.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCTargetAsmParser.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -namespace { - -class AArch64Operand; - -class AArch64AsmParser : public MCTargetAsmParser { - MCSubtargetInfo &STI; - MCAsmParser &Parser; - -#define GET_ASSEMBLER_HEADER -#include "AArch64GenAsmMatcher.inc" - -public: - enum AArch64MatchResultTy { - Match_FirstAArch64 = FIRST_TARGET_MATCH_RESULT_TY, -#define GET_OPERAND_DIAGNOSTIC_TYPES -#include "AArch64GenAsmMatcher.inc" - }; - - AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, - const MCInstrInfo &MII, - const MCTargetOptions &Options) - : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { - MCAsmParserExtension::Initialize(_Parser); - - // Initialize the set of available features. - setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); - } - - // These are the public interface of the MCTargetAsmParser - bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, - SMLoc NameLoc, - SmallVectorImpl &Operands) override; - - bool ParseDirective(AsmToken DirectiveID) override; - bool ParseDirectiveTLSDescCall(SMLoc L); - bool ParseDirectiveWord(unsigned Size, SMLoc L); - - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - SmallVectorImpl &Operands, - MCStreamer&Out, unsigned &ErrorInfo, - bool MatchingInlineAsm) override; - - // The rest of the sub-parsers have more freedom over interface: they return - // an OperandMatchResultTy because it's less ambiguous than true/false or - // -1/0/1 even if it is more verbose - OperandMatchResultTy - ParseOperand(SmallVectorImpl &Operands, - StringRef Mnemonic); - - OperandMatchResultTy ParseImmediate(const MCExpr *&ExprVal); - - OperandMatchResultTy ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind); - - OperandMatchResultTy - ParseNEONLane(SmallVectorImpl &Operands, - uint32_t NumLanes); - - OperandMatchResultTy - ParseRegister(SmallVectorImpl &Operands, - uint32_t &NumLanes); - - OperandMatchResultTy - ParseImmWithLSLOperand(SmallVectorImpl &Operands); - - OperandMatchResultTy - ParseCondCodeOperand(SmallVectorImpl &Operands); - - OperandMatchResultTy - ParseCRxOperand(SmallVectorImpl &Operands); - - OperandMatchResultTy - ParseFPImmOperand(SmallVectorImpl &Operands); - - OperandMatchResultTy - ParseFPImm0AndImm0Operand( SmallVectorImpl &Operands); - - template OperandMatchResultTy - ParseNamedImmOperand(SmallVectorImpl &Operands) { - return ParseNamedImmOperand(SomeNamedImmMapper(), Operands); - } - - OperandMatchResultTy - ParseNamedImmOperand(const NamedImmMapper &Mapper, - SmallVectorImpl &Operands); - - OperandMatchResultTy - ParseLSXAddressOperand(SmallVectorImpl &Operands); - - OperandMatchResultTy - ParseShiftExtend(SmallVectorImpl &Operands); - - OperandMatchResultTy - ParseSysRegOperand(SmallVectorImpl &Operands); - - bool TryParseVector(uint32_t &RegNum, SMLoc &RegEndLoc, StringRef &Layout, - SMLoc &LayoutLoc); - - OperandMatchResultTy ParseVectorList(SmallVectorImpl &); - - bool validateInstruction(MCInst &Inst, - const SmallVectorImpl &Operands); - - /// Scan the next token (which had better be an identifier) and determine - /// whether it represents a general-purpose or vector register. It returns - /// true if an identifier was found and populates its reference arguments. It - /// does not consume the token. - bool - IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, StringRef &LayoutSpec, - SMLoc &LayoutLoc) const; - -}; - -} - -namespace { - -/// Instances of this class represent a parsed AArch64 machine instruction. -class AArch64Operand : public MCParsedAsmOperand { -private: - enum KindTy { - k_ImmWithLSL, // #uimm {, LSL #amt } - k_CondCode, // eq/ne/... - k_FPImmediate, // Limited-precision floating-point imm - k_Immediate, // Including expressions referencing symbols - k_Register, - k_ShiftExtend, - k_VectorList, // A sequential list of 1 to 4 registers. - k_SysReg, // The register operand of MRS and MSR instructions - k_Token, // The mnemonic; other raw tokens the auto-generated - k_WrappedRegister // Load/store exclusive permit a wrapped register. - } Kind; - - SMLoc StartLoc, EndLoc; - - struct ImmWithLSLOp { - const MCExpr *Val; - unsigned ShiftAmount; - bool ImplicitAmount; - }; - - struct CondCodeOp { - A64CC::CondCodes Code; - }; - - struct FPImmOp { - double Val; - }; - - struct ImmOp { - const MCExpr *Val; - }; - - struct RegOp { - unsigned RegNum; - }; - - struct ShiftExtendOp { - A64SE::ShiftExtSpecifiers ShiftType; - unsigned Amount; - bool ImplicitAmount; - }; - - // A vector register list is a sequential list of 1 to 4 registers. - struct VectorListOp { - unsigned RegNum; - unsigned Count; - A64Layout::VectorLayout Layout; - }; - - struct SysRegOp { - const char *Data; - unsigned Length; - }; - - struct TokOp { - const char *Data; - unsigned Length; - }; - - union { - struct ImmWithLSLOp ImmWithLSL; - struct CondCodeOp CondCode; - struct FPImmOp FPImm; - struct ImmOp Imm; - struct RegOp Reg; - struct ShiftExtendOp ShiftExtend; - struct VectorListOp VectorList; - struct SysRegOp SysReg; - struct TokOp Tok; - }; - - AArch64Operand(KindTy K, SMLoc S, SMLoc E) - : MCParsedAsmOperand(), Kind(K), StartLoc(S), EndLoc(E) {} - -public: - AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand() { - } - - SMLoc getStartLoc() const override { return StartLoc; } - SMLoc getEndLoc() const override { return EndLoc; } - void print(raw_ostream&) const override; - void dump() const override; - - StringRef getToken() const { - assert(Kind == k_Token && "Invalid access!"); - return StringRef(Tok.Data, Tok.Length); - } - - unsigned getReg() const override { - assert((Kind == k_Register || Kind == k_WrappedRegister) - && "Invalid access!"); - return Reg.RegNum; - } - - const MCExpr *getImm() const { - assert(Kind == k_Immediate && "Invalid access!"); - return Imm.Val; - } - - A64CC::CondCodes getCondCode() const { - assert(Kind == k_CondCode && "Invalid access!"); - return CondCode.Code; - } - - static bool isNonConstantExpr(const MCExpr *E, - AArch64MCExpr::VariantKind &Variant) { - if (const AArch64MCExpr *A64E = dyn_cast(E)) { - Variant = A64E->getKind(); - return true; - } else if (!isa(E)) { - Variant = AArch64MCExpr::VK_AARCH64_None; - return true; - } - - return false; - } - - bool isCondCode() const { return Kind == k_CondCode; } - bool isToken() const override { return Kind == k_Token; } - bool isReg() const override { return Kind == k_Register; } - bool isImm() const override { return Kind == k_Immediate; } - bool isMem() const override { return false; } - bool isFPImm() const { return Kind == k_FPImmediate; } - bool isShiftOrExtend() const { return Kind == k_ShiftExtend; } - bool isSysReg() const { return Kind == k_SysReg; } - bool isImmWithLSL() const { return Kind == k_ImmWithLSL; } - bool isWrappedReg() const { return Kind == k_WrappedRegister; } - - bool isAddSubImmLSL0() const { - if (!isImmWithLSL()) return false; - if (ImmWithLSL.ShiftAmount != 0) return false; - - AArch64MCExpr::VariantKind Variant; - if (isNonConstantExpr(ImmWithLSL.Val, Variant)) { - return Variant == AArch64MCExpr::VK_AARCH64_LO12 - || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12 - || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC - || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12 - || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC - || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC_LO12; - } - - // Otherwise it should be a real immediate in range: - const MCConstantExpr *CE = cast(ImmWithLSL.Val); - return CE->getValue() >= 0 && CE->getValue() <= 0xfff; - } - - bool isAddSubImmLSL12() const { - if (!isImmWithLSL()) return false; - if (ImmWithLSL.ShiftAmount != 12) return false; - - AArch64MCExpr::VariantKind Variant; - if (isNonConstantExpr(ImmWithLSL.Val, Variant)) { - return Variant == AArch64MCExpr::VK_AARCH64_DTPREL_HI12 - || Variant == AArch64MCExpr::VK_AARCH64_TPREL_HI12; - } - - // Otherwise it should be a real immediate in range: - const MCConstantExpr *CE = cast(ImmWithLSL.Val); - return CE->getValue() >= 0 && CE->getValue() <= 0xfff; - } - - template bool isAddrRegExtend() const { - if (!isShiftOrExtend()) return false; - - A64SE::ShiftExtSpecifiers Ext = ShiftExtend.ShiftType; - if (RmSize == 32 && !(Ext == A64SE::UXTW || Ext == A64SE::SXTW)) - return false; - - if (RmSize == 64 && !(Ext == A64SE::LSL || Ext == A64SE::SXTX)) - return false; - - return ShiftExtend.Amount == Log2_32(MemSize) || ShiftExtend.Amount == 0; - } - - bool isAdrpLabel() const { - if (!isImm()) return false; - - AArch64MCExpr::VariantKind Variant; - if (isNonConstantExpr(getImm(), Variant)) { - return Variant == AArch64MCExpr::VK_AARCH64_None - || Variant == AArch64MCExpr::VK_AARCH64_GOT - || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL - || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC; - } - - return isLabel<21, 4096>(); - } - - template bool isBitfieldWidth() const { - if (!isImm()) return false; - - const MCConstantExpr *CE = dyn_cast(getImm()); - if (!CE) return false; - - return CE->getValue() >= 1 && CE->getValue() <= RegWidth; - } - - template - bool isCVTFixedPos() const { - if (!isImm()) return false; - - const MCConstantExpr *CE = dyn_cast(getImm()); - if (!CE) return false; - - return CE->getValue() >= 1 && CE->getValue() <= RegWidth; - } - - bool isFMOVImm() const { - if (!isFPImm()) return false; - - APFloat RealVal(FPImm.Val); - uint32_t ImmVal; - return A64Imms::isFPImm(RealVal, ImmVal); - } - - bool isFPZero() const { - if (!isFPImm()) return false; - - APFloat RealVal(FPImm.Val); - return RealVal.isPosZero(); - } - - template - bool isLabel() const { - if (!isImm()) return false; - - if (dyn_cast(Imm.Val)) { - return true; - } else if (const MCConstantExpr *CE = dyn_cast(Imm.Val)) { - int64_t Val = CE->getValue(); - int64_t Min = - (scale * (1LL << (field_width - 1))); - int64_t Max = scale * ((1LL << (field_width - 1)) - 1); - return (Val % scale) == 0 && Val >= Min && Val <= Max; - } - - // N.b. this disallows explicit relocation specifications via an - // AArch64MCExpr. Users needing that behaviour - return false; - } - - bool isLane1() const { - if (!isImm()) return false; - - // Because it's come through custom assembly parsing, it must always be a - // constant expression. - return cast(getImm())->getValue() == 1; - } - - bool isLoadLitLabel() const { - if (!isImm()) return false; - - AArch64MCExpr::VariantKind Variant; - if (isNonConstantExpr(getImm(), Variant)) { - return Variant == AArch64MCExpr::VK_AARCH64_None - || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL; - } - - return isLabel<19, 4>(); - } - - template bool isLogicalImm() const { - if (!isImm()) return false; - - const MCConstantExpr *CE = dyn_cast(Imm.Val); - if (!CE) return false; - - uint32_t Bits; - return A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits); - } - - template bool isLogicalImmMOV() const { - if (!isLogicalImm()) return false; - - const MCConstantExpr *CE = cast(Imm.Val); - - // The move alias for ORR is only valid if the immediate cannot be - // represented with a move (immediate) instruction; they take priority. - int UImm16, Shift; - return !A64Imms::isMOVZImm(RegWidth, CE->getValue(), UImm16, Shift) - && !A64Imms::isMOVNImm(RegWidth, CE->getValue(), UImm16, Shift); - } - - template - bool isOffsetUImm12() const { - if (!isImm()) return false; - - const MCConstantExpr *CE = dyn_cast(getImm()); - - // Assume they know what they're doing for now if they've given us a - // non-constant expression. In principle we could check for ridiculous - // things that can't possibly work or relocations that would almost - // certainly break resulting code. - if (!CE) - return true; - - int64_t Val = CE->getValue(); - - // Must be a multiple of the access size in bytes. - if ((Val & (MemSize - 1)) != 0) return false; - - // Must be 12-bit unsigned - return Val >= 0 && Val <= 0xfff * MemSize; - } - - template - bool isShift() const { - if (!isShiftOrExtend()) return false; - - if (ShiftExtend.ShiftType != SHKind) - return false; - - return is64Bit ? ShiftExtend.Amount <= 63 : ShiftExtend.Amount <= 31; - } - - bool isMOVN32Imm() const { - static const AArch64MCExpr::VariantKind PermittedModifiers[] = { - AArch64MCExpr::VK_AARCH64_SABS_G0, - AArch64MCExpr::VK_AARCH64_SABS_G1, - AArch64MCExpr::VK_AARCH64_DTPREL_G1, - AArch64MCExpr::VK_AARCH64_DTPREL_G0, - AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, - AArch64MCExpr::VK_AARCH64_TPREL_G1, - AArch64MCExpr::VK_AARCH64_TPREL_G0, - }; - const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); - - return isMoveWideImm(32, PermittedModifiers, NumModifiers); - } - - bool isMOVN64Imm() const { - static const AArch64MCExpr::VariantKind PermittedModifiers[] = { - AArch64MCExpr::VK_AARCH64_SABS_G0, - AArch64MCExpr::VK_AARCH64_SABS_G1, - AArch64MCExpr::VK_AARCH64_SABS_G2, - AArch64MCExpr::VK_AARCH64_DTPREL_G2, - AArch64MCExpr::VK_AARCH64_DTPREL_G1, - AArch64MCExpr::VK_AARCH64_DTPREL_G0, - AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, - AArch64MCExpr::VK_AARCH64_TPREL_G2, - AArch64MCExpr::VK_AARCH64_TPREL_G1, - AArch64MCExpr::VK_AARCH64_TPREL_G0, - }; - const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); - - return isMoveWideImm(64, PermittedModifiers, NumModifiers); - } - - - bool isMOVZ32Imm() const { - static const AArch64MCExpr::VariantKind PermittedModifiers[] = { - AArch64MCExpr::VK_AARCH64_ABS_G0, - AArch64MCExpr::VK_AARCH64_ABS_G1, - AArch64MCExpr::VK_AARCH64_SABS_G0, - AArch64MCExpr::VK_AARCH64_SABS_G1, - AArch64MCExpr::VK_AARCH64_DTPREL_G1, - AArch64MCExpr::VK_AARCH64_DTPREL_G0, - AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, - AArch64MCExpr::VK_AARCH64_TPREL_G1, - AArch64MCExpr::VK_AARCH64_TPREL_G0, - }; - const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); - - return isMoveWideImm(32, PermittedModifiers, NumModifiers); - } - - bool isMOVZ64Imm() const { - static const AArch64MCExpr::VariantKind PermittedModifiers[] = { - AArch64MCExpr::VK_AARCH64_ABS_G0, - AArch64MCExpr::VK_AARCH64_ABS_G1, - AArch64MCExpr::VK_AARCH64_ABS_G2, - AArch64MCExpr::VK_AARCH64_ABS_G3, - AArch64MCExpr::VK_AARCH64_SABS_G0, - AArch64MCExpr::VK_AARCH64_SABS_G1, - AArch64MCExpr::VK_AARCH64_SABS_G2, - AArch64MCExpr::VK_AARCH64_DTPREL_G2, - AArch64MCExpr::VK_AARCH64_DTPREL_G1, - AArch64MCExpr::VK_AARCH64_DTPREL_G0, - AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, - AArch64MCExpr::VK_AARCH64_TPREL_G2, - AArch64MCExpr::VK_AARCH64_TPREL_G1, - AArch64MCExpr::VK_AARCH64_TPREL_G0, - }; - const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); - - return isMoveWideImm(64, PermittedModifiers, NumModifiers); - } - - bool isMOVK32Imm() const { - static const AArch64MCExpr::VariantKind PermittedModifiers[] = { - AArch64MCExpr::VK_AARCH64_ABS_G0_NC, - AArch64MCExpr::VK_AARCH64_ABS_G1_NC, - AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC, - AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC, - AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC, - AArch64MCExpr::VK_AARCH64_TPREL_G1_NC, - AArch64MCExpr::VK_AARCH64_TPREL_G0_NC, - }; - const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); - - return isMoveWideImm(32, PermittedModifiers, NumModifiers); - } - - bool isMOVK64Imm() const { - static const AArch64MCExpr::VariantKind PermittedModifiers[] = { - AArch64MCExpr::VK_AARCH64_ABS_G0_NC, - AArch64MCExpr::VK_AARCH64_ABS_G1_NC, - AArch64MCExpr::VK_AARCH64_ABS_G2_NC, - AArch64MCExpr::VK_AARCH64_ABS_G3, - AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC, - AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC, - AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC, - AArch64MCExpr::VK_AARCH64_TPREL_G1_NC, - AArch64MCExpr::VK_AARCH64_TPREL_G0_NC, - }; - const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); - - return isMoveWideImm(64, PermittedModifiers, NumModifiers); - } - - bool isMoveWideImm(unsigned RegWidth, - const AArch64MCExpr::VariantKind *PermittedModifiers, - unsigned NumModifiers) const { - if (!isImmWithLSL()) return false; - - if (ImmWithLSL.ShiftAmount % 16 != 0) return false; - if (ImmWithLSL.ShiftAmount >= RegWidth) return false; - - AArch64MCExpr::VariantKind Modifier; - if (isNonConstantExpr(ImmWithLSL.Val, Modifier)) { - // E.g. "#:abs_g0:sym, lsl #16" makes no sense. - if (!ImmWithLSL.ImplicitAmount) return false; - - for (unsigned i = 0; i < NumModifiers; ++i) - if (PermittedModifiers[i] == Modifier) return true; - - return false; - } - - const MCConstantExpr *CE = dyn_cast(ImmWithLSL.Val); - return CE && CE->getValue() >= 0 && CE->getValue() <= 0xffff; - } - - template - bool isMoveWideMovAlias() const { - if (!isImm()) return false; - - const MCConstantExpr *CE = dyn_cast(getImm()); - if (!CE) return false; - - int UImm16, Shift; - uint64_t Value = CE->getValue(); - - // If this is a 32-bit instruction then all bits above 32 should be the - // same: either of these is fine because signed/unsigned values should be - // permitted. - if (RegWidth == 32) { - if ((Value >> 32) != 0 && (Value >> 32) != 0xffffffff) - return false; - - Value &= 0xffffffffULL; - } - - return isValidImm(RegWidth, Value, UImm16, Shift); - } - - bool isMSRWithReg() const { - if (!isSysReg()) return false; - - bool IsKnownRegister; - StringRef Name(SysReg.Data, SysReg.Length); - A64SysReg::MSRMapper().fromString(Name, IsKnownRegister); - - return IsKnownRegister; - } - - bool isMSRPState() const { - if (!isSysReg()) return false; - - bool IsKnownRegister; - StringRef Name(SysReg.Data, SysReg.Length); - A64PState::PStateMapper().fromString(Name, IsKnownRegister); - - return IsKnownRegister; - } - - bool isMRS() const { - if (!isSysReg()) return false; - - // First check against specific MSR-only (write-only) registers - bool IsKnownRegister; - StringRef Name(SysReg.Data, SysReg.Length); - A64SysReg::MRSMapper().fromString(Name, IsKnownRegister); - - return IsKnownRegister; - } - - bool isPRFM() const { - if (!isImm()) return false; - - const MCConstantExpr *CE = dyn_cast(getImm()); - - if (!CE) - return false; - - return CE->getValue() >= 0 && CE->getValue() <= 31; - } - - template bool isRegExtend() const { - if (!isShiftOrExtend()) return false; - - if (ShiftExtend.ShiftType != SHKind) - return false; - - return ShiftExtend.Amount <= 4; - } - - bool isRegExtendLSL() const { - if (!isShiftOrExtend()) return false; - - if (ShiftExtend.ShiftType != A64SE::LSL) - return false; - - return !ShiftExtend.ImplicitAmount && ShiftExtend.Amount <= 4; - } - - // if 0 < value <= w, return true - bool isShrFixedWidth(int w) const { - if (!isImm()) - return false; - const MCConstantExpr *CE = dyn_cast(getImm()); - if (!CE) - return false; - int64_t Value = CE->getValue(); - return Value > 0 && Value <= w; - } - - bool isShrImm8() const { return isShrFixedWidth(8); } - - bool isShrImm16() const { return isShrFixedWidth(16); } - - bool isShrImm32() const { return isShrFixedWidth(32); } - - bool isShrImm64() const { return isShrFixedWidth(64); } - - // if 0 <= value < w, return true - bool isShlFixedWidth(int w) const { - if (!isImm()) - return false; - const MCConstantExpr *CE = dyn_cast(getImm()); - if (!CE) - return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < w; - } - - bool isShlImm8() const { return isShlFixedWidth(8); } - - bool isShlImm16() const { return isShlFixedWidth(16); } - - bool isShlImm32() const { return isShlFixedWidth(32); } - - bool isShlImm64() const { return isShlFixedWidth(64); } - - bool isNeonMovImmShiftLSL() const { - if (!isShiftOrExtend()) - return false; - - if (ShiftExtend.ShiftType != A64SE::LSL) - return false; - - // Valid shift amount is 0, 8, 16 and 24. - return ShiftExtend.Amount % 8 == 0 && ShiftExtend.Amount <= 24; - } - - bool isNeonMovImmShiftLSLH() const { - if (!isShiftOrExtend()) - return false; - - if (ShiftExtend.ShiftType != A64SE::LSL) - return false; - - // Valid shift amount is 0 and 8. - return ShiftExtend.Amount == 0 || ShiftExtend.Amount == 8; - } - - bool isNeonMovImmShiftMSL() const { - if (!isShiftOrExtend()) - return false; - - if (ShiftExtend.ShiftType != A64SE::MSL) - return false; - - // Valid shift amount is 8 and 16. - return ShiftExtend.Amount == 8 || ShiftExtend.Amount == 16; - } - - template - bool isVectorList() const { - return Kind == k_VectorList && VectorList.Layout == Layout && - VectorList.Count == Count; - } - - template bool isSImm7Scaled() const { - if (!isImm()) - return false; - - const MCConstantExpr *CE = dyn_cast(getImm()); - if (!CE) return false; - - int64_t Val = CE->getValue(); - if (Val % MemSize != 0) return false; - - Val /= MemSize; - - return Val >= -64 && Val < 64; - } - - template - bool isSImm() const { - if (!isImm()) return false; - - const MCConstantExpr *CE = dyn_cast(getImm()); - if (!CE) return false; - - return CE->getValue() >= -(1LL << (BitWidth - 1)) - && CE->getValue() < (1LL << (BitWidth - 1)); - } - - template - bool isUImm() const { - if (!isImm()) return false; - - const MCConstantExpr *CE = dyn_cast(getImm()); - if (!CE) return false; - - return CE->getValue() >= 0 && CE->getValue() < (1LL << bitWidth); - } - - bool isUImm() const { - if (!isImm()) return false; - - return isa(getImm()); - } - - bool isNeonUImm64Mask() const { - if (!isImm()) - return false; - - const MCConstantExpr *CE = dyn_cast(getImm()); - if (!CE) - return false; - - uint64_t Value = CE->getValue(); - - // i64 value with each byte being either 0x00 or 0xff. - for (unsigned i = 0; i < 8; ++i, Value >>= 8) - if ((Value & 0xff) != 0 && (Value & 0xff) != 0xff) - return false; - return true; - } - - // if value == N, return true - template - bool isExactImm() const { - if (!isImm()) return false; - - const MCConstantExpr *CE = dyn_cast(getImm()); - if (!CE) return false; - - return CE->getValue() == N; - } - - bool isFPZeroIZero() const { - return isFPZero(); - } - - static AArch64Operand *CreateImmWithLSL(const MCExpr *Val, - unsigned ShiftAmount, - bool ImplicitAmount, - SMLoc S,SMLoc E) { - AArch64Operand *Op = new AArch64Operand(k_ImmWithLSL, S, E); - Op->ImmWithLSL.Val = Val; - Op->ImmWithLSL.ShiftAmount = ShiftAmount; - Op->ImmWithLSL.ImplicitAmount = ImplicitAmount; - return Op; - } - - static AArch64Operand *CreateCondCode(A64CC::CondCodes Code, - SMLoc S, SMLoc E) { - AArch64Operand *Op = new AArch64Operand(k_CondCode, S, E); - Op->CondCode.Code = Code; - return Op; - } - - static AArch64Operand *CreateFPImm(double Val, - SMLoc S, SMLoc E) { - AArch64Operand *Op = new AArch64Operand(k_FPImmediate, S, E); - Op->FPImm.Val = Val; - return Op; - } - - static AArch64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { - AArch64Operand *Op = new AArch64Operand(k_Immediate, S, E); - Op->Imm.Val = Val; - return Op; - } - - static AArch64Operand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) { - AArch64Operand *Op = new AArch64Operand(k_Register, S, E); - Op->Reg.RegNum = RegNum; - return Op; - } - - static AArch64Operand *CreateWrappedReg(unsigned RegNum, SMLoc S, SMLoc E) { - AArch64Operand *Op = new AArch64Operand(k_WrappedRegister, S, E); - Op->Reg.RegNum = RegNum; - return Op; - } - - static AArch64Operand *CreateShiftExtend(A64SE::ShiftExtSpecifiers ShiftTyp, - unsigned Amount, - bool ImplicitAmount, - SMLoc S, SMLoc E) { - AArch64Operand *Op = new AArch64Operand(k_ShiftExtend, S, E); - Op->ShiftExtend.ShiftType = ShiftTyp; - Op->ShiftExtend.Amount = Amount; - Op->ShiftExtend.ImplicitAmount = ImplicitAmount; - return Op; - } - - static AArch64Operand *CreateSysReg(StringRef Str, SMLoc S) { - AArch64Operand *Op = new AArch64Operand(k_SysReg, S, S); - Op->Tok.Data = Str.data(); - Op->Tok.Length = Str.size(); - return Op; - } - - static AArch64Operand *CreateVectorList(unsigned RegNum, unsigned Count, - A64Layout::VectorLayout Layout, - SMLoc S, SMLoc E) { - AArch64Operand *Op = new AArch64Operand(k_VectorList, S, E); - Op->VectorList.RegNum = RegNum; - Op->VectorList.Count = Count; - Op->VectorList.Layout = Layout; - Op->StartLoc = S; - Op->EndLoc = E; - return Op; - } - - static AArch64Operand *CreateToken(StringRef Str, SMLoc S) { - AArch64Operand *Op = new AArch64Operand(k_Token, S, S); - Op->Tok.Data = Str.data(); - Op->Tok.Length = Str.size(); - return Op; - } - - - void addExpr(MCInst &Inst, const MCExpr *Expr) const { - // Add as immediates when possible. - if (const MCConstantExpr *CE = dyn_cast(Expr)) - Inst.addOperand(MCOperand::CreateImm(CE->getValue())); - else - Inst.addOperand(MCOperand::CreateExpr(Expr)); - } - - template - void addBFILSBOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = cast(getImm()); - unsigned EncodedVal = (RegWidth - CE->getValue()) % RegWidth; - Inst.addOperand(MCOperand::CreateImm(EncodedVal)); - } - - void addBFIWidthOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = cast(getImm()); - Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1)); - } - - void addBFXWidthOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - uint64_t LSB = Inst.getOperand(Inst.getNumOperands()-1).getImm(); - const MCConstantExpr *CE = cast(getImm()); - - Inst.addOperand(MCOperand::CreateImm(LSB + CE->getValue() - 1)); - } - - void addCondCodeOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getCondCode())); - } - - void addCVTFixedPosOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - const MCConstantExpr *CE = cast(getImm()); - Inst.addOperand(MCOperand::CreateImm(64 - CE->getValue())); - } - - void addFMOVImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - APFloat RealVal(FPImm.Val); - uint32_t ImmVal; - A64Imms::isFPImm(RealVal, ImmVal); - - Inst.addOperand(MCOperand::CreateImm(ImmVal)); - } - - void addFPZeroOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands"); - Inst.addOperand(MCOperand::CreateImm(0)); - } - - void addFPZeroIZeroOperands(MCInst &Inst, unsigned N) const { - addFPZeroOperands(Inst, N); - } - - void addInvCondCodeOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - unsigned Encoded = A64InvertCondCode(getCondCode()); - Inst.addOperand(MCOperand::CreateImm(Encoded)); - } - - void addRegOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(getReg())); - } - - void addImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - - template - void addSImm7ScaledOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - const MCConstantExpr *CE = cast(getImm()); - uint64_t Val = CE->getValue() / MemSize; - Inst.addOperand(MCOperand::CreateImm(Val & 0x7f)); - } - - template - void addSImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - const MCConstantExpr *CE = cast(getImm()); - uint64_t Val = CE->getValue(); - Inst.addOperand(MCOperand::CreateImm(Val & ((1ULL << BitWidth) - 1))); - } - - void addImmWithLSLOperands(MCInst &Inst, unsigned N) const { - assert (N == 1 && "Invalid number of operands!"); - - addExpr(Inst, ImmWithLSL.Val); - } - - template - void addLabelOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - const MCConstantExpr *CE = dyn_cast(Imm.Val); - - if (!CE) { - addExpr(Inst, Imm.Val); - return; - } - - int64_t Val = CE->getValue(); - assert(Val % scale == 0 && "Unaligned immediate in instruction"); - Val /= scale; - - Inst.addOperand(MCOperand::CreateImm(Val & ((1LL << field_width) - 1))); - } - - template - void addOffsetUImm12Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - if (const MCConstantExpr *CE = dyn_cast(getImm())) { - Inst.addOperand(MCOperand::CreateImm(CE->getValue() / MemSize)); - } else { - Inst.addOperand(MCOperand::CreateExpr(getImm())); - } - } - - template - void addLogicalImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands"); - const MCConstantExpr *CE = cast(Imm.Val); - - uint32_t Bits; - A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits); - - Inst.addOperand(MCOperand::CreateImm(Bits)); - } - - void addMRSOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - bool Valid; - StringRef Name(SysReg.Data, SysReg.Length); - uint32_t Bits = A64SysReg::MRSMapper().fromString(Name, Valid); - - Inst.addOperand(MCOperand::CreateImm(Bits)); - } - - void addMSRWithRegOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - bool Valid; - StringRef Name(SysReg.Data, SysReg.Length); - uint32_t Bits = A64SysReg::MSRMapper().fromString(Name, Valid); - - Inst.addOperand(MCOperand::CreateImm(Bits)); - } - - void addMSRPStateOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - bool Valid; - StringRef Name(SysReg.Data, SysReg.Length); - uint32_t Bits = A64PState::PStateMapper().fromString(Name, Valid); - - Inst.addOperand(MCOperand::CreateImm(Bits)); - } - - void addMoveWideImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 2 && "Invalid number of operands!"); - - addExpr(Inst, ImmWithLSL.Val); - - AArch64MCExpr::VariantKind Variant; - if (!isNonConstantExpr(ImmWithLSL.Val, Variant)) { - Inst.addOperand(MCOperand::CreateImm(ImmWithLSL.ShiftAmount / 16)); - return; - } - - // We know it's relocated - switch (Variant) { - case AArch64MCExpr::VK_AARCH64_ABS_G0: - case AArch64MCExpr::VK_AARCH64_ABS_G0_NC: - case AArch64MCExpr::VK_AARCH64_SABS_G0: - case AArch64MCExpr::VK_AARCH64_DTPREL_G0: - case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC: - case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC: - case AArch64MCExpr::VK_AARCH64_TPREL_G0: - case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC: - Inst.addOperand(MCOperand::CreateImm(0)); - break; - case AArch64MCExpr::VK_AARCH64_ABS_G1: - case AArch64MCExpr::VK_AARCH64_ABS_G1_NC: - case AArch64MCExpr::VK_AARCH64_SABS_G1: - case AArch64MCExpr::VK_AARCH64_DTPREL_G1: - case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC: - case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1: - case AArch64MCExpr::VK_AARCH64_TPREL_G1: - case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC: - Inst.addOperand(MCOperand::CreateImm(1)); - break; - case AArch64MCExpr::VK_AARCH64_ABS_G2: - case AArch64MCExpr::VK_AARCH64_ABS_G2_NC: - case AArch64MCExpr::VK_AARCH64_SABS_G2: - case AArch64MCExpr::VK_AARCH64_DTPREL_G2: - case AArch64MCExpr::VK_AARCH64_TPREL_G2: - Inst.addOperand(MCOperand::CreateImm(2)); - break; - case AArch64MCExpr::VK_AARCH64_ABS_G3: - Inst.addOperand(MCOperand::CreateImm(3)); - break; - default: llvm_unreachable("Inappropriate move wide relocation"); - } - } - - template - void addMoveWideMovAliasOperands(MCInst &Inst, unsigned N) const { - assert(N == 2 && "Invalid number of operands!"); - int UImm16, Shift; - - const MCConstantExpr *CE = cast(getImm()); - uint64_t Value = CE->getValue(); - - if (RegWidth == 32) { - Value &= 0xffffffffULL; - } - - bool Valid = isValidImm(RegWidth, Value, UImm16, Shift); - (void)Valid; - assert(Valid && "Invalid immediates should have been weeded out by now"); - - Inst.addOperand(MCOperand::CreateImm(UImm16)); - Inst.addOperand(MCOperand::CreateImm(Shift)); - } - - void addPRFMOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - const MCConstantExpr *CE = cast(getImm()); - assert(CE->getValue() >= 0 && CE->getValue() <= 31 - && "PRFM operand should be 5-bits"); - - Inst.addOperand(MCOperand::CreateImm(CE->getValue())); - } - - // For Add-sub (extended register) operands. - void addRegExtendOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount)); - } - - // For Vector Immediates shifted imm operands. - void addNeonMovImmShiftLSLOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - if (ShiftExtend.Amount % 8 != 0 || ShiftExtend.Amount > 24) - llvm_unreachable("Invalid shift amount for vector immediate inst."); - - // Encode LSL shift amount 0, 8, 16, 24 as 0, 1, 2, 3. - int64_t Imm = ShiftExtend.Amount / 8; - Inst.addOperand(MCOperand::CreateImm(Imm)); - } - - void addNeonMovImmShiftLSLHOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - if (ShiftExtend.Amount != 0 && ShiftExtend.Amount != 8) - llvm_unreachable("Invalid shift amount for vector immediate inst."); - - // Encode LSLH shift amount 0, 8 as 0, 1. - int64_t Imm = ShiftExtend.Amount / 8; - Inst.addOperand(MCOperand::CreateImm(Imm)); - } - - void addNeonMovImmShiftMSLOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - if (ShiftExtend.Amount != 8 && ShiftExtend.Amount != 16) - llvm_unreachable("Invalid shift amount for vector immediate inst."); - - // Encode MSL shift amount 8, 16 as 0, 1. - int64_t Imm = ShiftExtend.Amount / 8 - 1; - Inst.addOperand(MCOperand::CreateImm(Imm)); - } - - // For the extend in load-store (register offset) instructions. - template - void addAddrRegExtendOperands(MCInst &Inst, unsigned N) const { - addAddrRegExtendOperands(Inst, N, MemSize); - } - - void addAddrRegExtendOperands(MCInst &Inst, unsigned N, - unsigned MemSize) const { - assert(N == 1 && "Invalid number of operands!"); - - // First bit of Option is set in instruction classes, the high two bits are - // as follows: - unsigned OptionHi = 0; - switch (ShiftExtend.ShiftType) { - case A64SE::UXTW: - case A64SE::LSL: - OptionHi = 1; - break; - case A64SE::SXTW: - case A64SE::SXTX: - OptionHi = 3; - break; - default: - llvm_unreachable("Invalid extend type for register offset"); - } - - unsigned S = 0; - if (MemSize == 1 && !ShiftExtend.ImplicitAmount) - S = 1; - else if (MemSize != 1 && ShiftExtend.Amount != 0) - S = 1; - - Inst.addOperand(MCOperand::CreateImm((OptionHi << 1) | S)); - } - void addShiftOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount)); - } - - void addNeonUImm64MaskOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - - // A bit from each byte in the constant forms the encoded immediate - const MCConstantExpr *CE = dyn_cast(getImm()); - uint64_t Value = CE->getValue(); - - unsigned Imm = 0; - for (unsigned i = 0; i < 8; ++i, Value >>= 8) { - Imm |= (Value & 1) << i; - } - Inst.addOperand(MCOperand::CreateImm(Imm)); - } - - void addVectorListOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); - } -}; - -} // end anonymous namespace. - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseOperand(SmallVectorImpl &Operands, - StringRef Mnemonic) { - - // See if the operand has a custom parser - OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); - - // It could either succeed, fail or just not care. - if (ResTy != MatchOperand_NoMatch) - return ResTy; - - switch (getLexer().getKind()) { - default: - Error(Parser.getTok().getLoc(), "unexpected token in operand"); - return MatchOperand_ParseFail; - case AsmToken::Identifier: { - // It might be in the LSL/UXTB family ... - OperandMatchResultTy GotShift = ParseShiftExtend(Operands); - - // We can only continue if no tokens were eaten. - if (GotShift != MatchOperand_NoMatch) - return GotShift; - - // ... or it might be a register ... - uint32_t NumLanes = 0; - OperandMatchResultTy GotReg = ParseRegister(Operands, NumLanes); - assert(GotReg != MatchOperand_ParseFail - && "register parsing shouldn't partially succeed"); - - if (GotReg == MatchOperand_Success) { - if (Parser.getTok().is(AsmToken::LBrac)) - return ParseNEONLane(Operands, NumLanes); - else - return MatchOperand_Success; - } - // ... or it might be a symbolish thing - } - // Fall through - case AsmToken::LParen: // E.g. (strcmp-4) - case AsmToken::Integer: // 1f, 2b labels - case AsmToken::String: // quoted labels - case AsmToken::Dot: // . is Current location - case AsmToken::Dollar: // $ is PC - case AsmToken::Colon: { - SMLoc StartLoc = Parser.getTok().getLoc(); - SMLoc EndLoc; - const MCExpr *ImmVal = nullptr; - - if (ParseImmediate(ImmVal) != MatchOperand_Success) - return MatchOperand_ParseFail; - - EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc)); - return MatchOperand_Success; - } - case AsmToken::Hash: { // Immediates - SMLoc StartLoc = Parser.getTok().getLoc(); - SMLoc EndLoc; - const MCExpr *ImmVal = nullptr; - Parser.Lex(); - - if (ParseImmediate(ImmVal) != MatchOperand_Success) - return MatchOperand_ParseFail; - - EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc)); - return MatchOperand_Success; - } - case AsmToken::LBrac: { - SMLoc Loc = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateToken("[", Loc)); - Parser.Lex(); // Eat '[' - - // There's no comma after a '[', so we can parse the next operand - // immediately. - return ParseOperand(Operands, Mnemonic); - } - // The following will likely be useful later, but not in very early cases - case AsmToken::LCurly: // SIMD vector list is not parsed here - llvm_unreachable("Don't know how to deal with '{' in operand"); - return MatchOperand_ParseFail; - } -} - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseImmediate(const MCExpr *&ExprVal) { - if (getLexer().is(AsmToken::Colon)) { - AArch64MCExpr::VariantKind RefKind; - - OperandMatchResultTy ResTy = ParseRelocPrefix(RefKind); - if (ResTy != MatchOperand_Success) - return ResTy; - - const MCExpr *SubExprVal; - if (getParser().parseExpression(SubExprVal)) - return MatchOperand_ParseFail; - - ExprVal = AArch64MCExpr::Create(RefKind, SubExprVal, getContext()); - return MatchOperand_Success; - } - - // No weird AArch64MCExpr prefix - return getParser().parseExpression(ExprVal) - ? MatchOperand_ParseFail : MatchOperand_Success; -} - -// A lane attached to a NEON register. "[N]", which should yield three tokens: -// '[', N, ']'. A hash is not allowed to precede the immediate here. -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseNEONLane(SmallVectorImpl &Operands, - uint32_t NumLanes) { - SMLoc Loc = Parser.getTok().getLoc(); - - assert(Parser.getTok().is(AsmToken::LBrac) && "inappropriate operand"); - Operands.push_back(AArch64Operand::CreateToken("[", Loc)); - Parser.Lex(); // Eat '[' - - if (Parser.getTok().isNot(AsmToken::Integer)) { - Error(Parser.getTok().getLoc(), "expected lane number"); - return MatchOperand_ParseFail; - } - - if (Parser.getTok().getIntVal() >= NumLanes) { - Error(Parser.getTok().getLoc(), "lane number incompatible with layout"); - return MatchOperand_ParseFail; - } - - const MCExpr *Lane = MCConstantExpr::Create(Parser.getTok().getIntVal(), - getContext()); - SMLoc S = Parser.getTok().getLoc(); - Parser.Lex(); // Eat actual lane - SMLoc E = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateImm(Lane, S, E)); - - - if (Parser.getTok().isNot(AsmToken::RBrac)) { - Error(Parser.getTok().getLoc(), "expected ']' after lane"); - return MatchOperand_ParseFail; - } - - Operands.push_back(AArch64Operand::CreateToken("]", Loc)); - Parser.Lex(); // Eat ']' - - return MatchOperand_Success; -} - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind) { - assert(getLexer().is(AsmToken::Colon) && "expected a ':'"); - Parser.Lex(); - - if (getLexer().isNot(AsmToken::Identifier)) { - Error(Parser.getTok().getLoc(), - "expected relocation specifier in operand after ':'"); - return MatchOperand_ParseFail; - } - - std::string LowerCase = Parser.getTok().getIdentifier().lower(); - RefKind = StringSwitch(LowerCase) - .Case("got", AArch64MCExpr::VK_AARCH64_GOT) - .Case("got_lo12", AArch64MCExpr::VK_AARCH64_GOT_LO12) - .Case("lo12", AArch64MCExpr::VK_AARCH64_LO12) - .Case("abs_g0", AArch64MCExpr::VK_AARCH64_ABS_G0) - .Case("abs_g0_nc", AArch64MCExpr::VK_AARCH64_ABS_G0_NC) - .Case("abs_g1", AArch64MCExpr::VK_AARCH64_ABS_G1) - .Case("abs_g1_nc", AArch64MCExpr::VK_AARCH64_ABS_G1_NC) - .Case("abs_g2", AArch64MCExpr::VK_AARCH64_ABS_G2) - .Case("abs_g2_nc", AArch64MCExpr::VK_AARCH64_ABS_G2_NC) - .Case("abs_g3", AArch64MCExpr::VK_AARCH64_ABS_G3) - .Case("abs_g0_s", AArch64MCExpr::VK_AARCH64_SABS_G0) - .Case("abs_g1_s", AArch64MCExpr::VK_AARCH64_SABS_G1) - .Case("abs_g2_s", AArch64MCExpr::VK_AARCH64_SABS_G2) - .Case("dtprel_g2", AArch64MCExpr::VK_AARCH64_DTPREL_G2) - .Case("dtprel_g1", AArch64MCExpr::VK_AARCH64_DTPREL_G1) - .Case("dtprel_g1_nc", AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC) - .Case("dtprel_g0", AArch64MCExpr::VK_AARCH64_DTPREL_G0) - .Case("dtprel_g0_nc", AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC) - .Case("dtprel_hi12", AArch64MCExpr::VK_AARCH64_DTPREL_HI12) - .Case("dtprel_lo12", AArch64MCExpr::VK_AARCH64_DTPREL_LO12) - .Case("dtprel_lo12_nc", AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC) - .Case("gottprel_g1", AArch64MCExpr::VK_AARCH64_GOTTPREL_G1) - .Case("gottprel_g0_nc", AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC) - .Case("gottprel", AArch64MCExpr::VK_AARCH64_GOTTPREL) - .Case("gottprel_lo12", AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12) - .Case("tprel_g2", AArch64MCExpr::VK_AARCH64_TPREL_G2) - .Case("tprel_g1", AArch64MCExpr::VK_AARCH64_TPREL_G1) - .Case("tprel_g1_nc", AArch64MCExpr::VK_AARCH64_TPREL_G1_NC) - .Case("tprel_g0", AArch64MCExpr::VK_AARCH64_TPREL_G0) - .Case("tprel_g0_nc", AArch64MCExpr::VK_AARCH64_TPREL_G0_NC) - .Case("tprel_hi12", AArch64MCExpr::VK_AARCH64_TPREL_HI12) - .Case("tprel_lo12", AArch64MCExpr::VK_AARCH64_TPREL_LO12) - .Case("tprel_lo12_nc", AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC) - .Case("tlsdesc", AArch64MCExpr::VK_AARCH64_TLSDESC) - .Case("tlsdesc_lo12", AArch64MCExpr::VK_AARCH64_TLSDESC_LO12) - .Default(AArch64MCExpr::VK_AARCH64_None); - - if (RefKind == AArch64MCExpr::VK_AARCH64_None) { - Error(Parser.getTok().getLoc(), - "expected relocation specifier in operand after ':'"); - return MatchOperand_ParseFail; - } - Parser.Lex(); // Eat identifier - - if (getLexer().isNot(AsmToken::Colon)) { - Error(Parser.getTok().getLoc(), - "expected ':' after relocation specifier"); - return MatchOperand_ParseFail; - } - Parser.Lex(); - return MatchOperand_Success; -} - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseImmWithLSLOperand( - SmallVectorImpl &Operands) { - - SMLoc S = Parser.getTok().getLoc(); - - if (Parser.getTok().is(AsmToken::Hash)) - Parser.Lex(); // Eat '#' - else if (Parser.getTok().isNot(AsmToken::Integer)) - // Operand should start from # or should be integer, emit error otherwise. - return MatchOperand_NoMatch; - - const MCExpr *Imm; - if (ParseImmediate(Imm) != MatchOperand_Success) - return MatchOperand_ParseFail; - else if (Parser.getTok().isNot(AsmToken::Comma)) { - SMLoc E = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, 0, true, S, E)); - return MatchOperand_Success; - } - - // Eat ',' - Parser.Lex(); - - // The optional operand must be "lsl #N" where N is non-negative. - if (Parser.getTok().is(AsmToken::Identifier) - && Parser.getTok().getIdentifier().equals_lower("lsl")) { - Parser.Lex(); - - if (Parser.getTok().is(AsmToken::Hash)) { - Parser.Lex(); - - if (Parser.getTok().isNot(AsmToken::Integer)) { - Error(Parser.getTok().getLoc(), "only 'lsl #+N' valid after immediate"); - return MatchOperand_ParseFail; - } - } - } - - int64_t ShiftAmount = Parser.getTok().getIntVal(); - - if (ShiftAmount < 0) { - Error(Parser.getTok().getLoc(), "positive shift amount required"); - return MatchOperand_ParseFail; - } - Parser.Lex(); // Eat the number - - SMLoc E = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, ShiftAmount, - false, S, E)); - return MatchOperand_Success; -} - - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseCondCodeOperand( - SmallVectorImpl &Operands) { - if (Parser.getTok().isNot(AsmToken::Identifier)) - return MatchOperand_NoMatch; - - StringRef Tok = Parser.getTok().getIdentifier(); - A64CC::CondCodes CondCode = A64StringToCondCode(Tok); - - if (CondCode == A64CC::Invalid) - return MatchOperand_NoMatch; - - SMLoc S = Parser.getTok().getLoc(); - Parser.Lex(); // Eat condition code - SMLoc E = Parser.getTok().getLoc(); - - Operands.push_back(AArch64Operand::CreateCondCode(CondCode, S, E)); - return MatchOperand_Success; -} - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseCRxOperand( - SmallVectorImpl &Operands) { - SMLoc S = Parser.getTok().getLoc(); - if (Parser.getTok().isNot(AsmToken::Identifier)) { - Error(S, "Expected cN operand where 0 <= N <= 15"); - return MatchOperand_ParseFail; - } - - StringRef Tok = Parser.getTok().getIdentifier(); - if (Tok[0] != 'c' && Tok[0] != 'C') { - Error(S, "Expected cN operand where 0 <= N <= 15"); - return MatchOperand_ParseFail; - } - - uint32_t CRNum; - bool BadNum = Tok.drop_front().getAsInteger(10, CRNum); - if (BadNum || CRNum > 15) { - Error(S, "Expected cN operand where 0 <= N <= 15"); - return MatchOperand_ParseFail; - } - - const MCExpr *CRImm = MCConstantExpr::Create(CRNum, getContext()); - - Parser.Lex(); - SMLoc E = Parser.getTok().getLoc(); - - Operands.push_back(AArch64Operand::CreateImm(CRImm, S, E)); - return MatchOperand_Success; -} - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseFPImmOperand( - SmallVectorImpl &Operands) { - - SMLoc S = Parser.getTok().getLoc(); - - bool Hash = false; - if (Parser.getTok().is(AsmToken::Hash)) { - Parser.Lex(); // Eat '#' - Hash = true; - } - - bool Negative = false; - if (Parser.getTok().is(AsmToken::Minus)) { - Negative = true; - Parser.Lex(); // Eat '-' - } else if (Parser.getTok().is(AsmToken::Plus)) { - Parser.Lex(); // Eat '+' - } - - if (Parser.getTok().isNot(AsmToken::Real)) { - if (!Hash) - return MatchOperand_NoMatch; - Error(S, "Expected floating-point immediate"); - return MatchOperand_ParseFail; - } - - APFloat RealVal(APFloat::IEEEdouble, Parser.getTok().getString()); - if (Negative) RealVal.changeSign(); - double DblVal = RealVal.convertToDouble(); - - Parser.Lex(); // Eat real number - SMLoc E = Parser.getTok().getLoc(); - - Operands.push_back(AArch64Operand::CreateFPImm(DblVal, S, E)); - return MatchOperand_Success; -} - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseFPImm0AndImm0Operand( - SmallVectorImpl &Operands) { - - SMLoc S = Parser.getTok().getLoc(); - - bool Hash = false; - if (Parser.getTok().is(AsmToken::Hash)) { - Parser.Lex(); // Eat '#' - Hash = true; - } - - APFloat RealVal(0.0); - if (Parser.getTok().is(AsmToken::Real)) { - if(Parser.getTok().getString() != "0.0") { - Error(S, "only #0.0 is acceptable as immediate"); - return MatchOperand_ParseFail; - } - } - else if (Parser.getTok().is(AsmToken::Integer)) { - if(Parser.getTok().getIntVal() != 0) { - Error(S, "only #0.0 is acceptable as immediate"); - return MatchOperand_ParseFail; - } - } - else { - if (!Hash) - return MatchOperand_NoMatch; - Error(S, "only #0.0 is acceptable as immediate"); - return MatchOperand_ParseFail; - } - - Parser.Lex(); // Eat real number - SMLoc E = Parser.getTok().getLoc(); - - Operands.push_back(AArch64Operand::CreateFPImm(0.0, S, E)); - return MatchOperand_Success; -} - -// Automatically generated -static unsigned MatchRegisterName(StringRef Name); - -bool -AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, - StringRef &Layout, - SMLoc &LayoutLoc) const { - const AsmToken &Tok = Parser.getTok(); - - if (Tok.isNot(AsmToken::Identifier)) - return false; - - std::string LowerReg = Tok.getString().lower(); - size_t DotPos = LowerReg.find('.'); - - bool IsVec128 = false; - SMLoc S = Tok.getLoc(); - RegEndLoc = SMLoc::getFromPointer(S.getPointer() + DotPos); - - if (DotPos == std::string::npos) { - Layout = StringRef(); - } else { - // Everything afterwards needs to be a literal token, expected to be - // '.2d','.b' etc for vector registers. - - // This StringSwitch validates the input and (perhaps more importantly) - // gives us a permanent string to use in the token (a pointer into LowerReg - // would go out of scope when we return). - LayoutLoc = SMLoc::getFromPointer(S.getPointer() + DotPos + 1); - StringRef LayoutText = StringRef(LowerReg).substr(DotPos); - - // See if it's a 128-bit layout first. - Layout = StringSwitch(LayoutText) - .Case(".q", ".q").Case(".1q", ".1q") - .Case(".d", ".d").Case(".2d", ".2d") - .Case(".s", ".s").Case(".4s", ".4s") - .Case(".h", ".h").Case(".8h", ".8h") - .Case(".b", ".b").Case(".16b", ".16b") - .Default(""); - - if (Layout.size() != 0) - IsVec128 = true; - else { - Layout = StringSwitch(LayoutText) - .Case(".1d", ".1d") - .Case(".2s", ".2s") - .Case(".4h", ".4h") - .Case(".8b", ".8b") - .Default(""); - } - - if (Layout.size() == 0) { - // If we've still not pinned it down the register is malformed. - return false; - } - } - - RegNum = MatchRegisterName(LowerReg.substr(0, DotPos)); - if (RegNum == AArch64::NoRegister) { - RegNum = StringSwitch(LowerReg.substr(0, DotPos)) - .Case("ip0", AArch64::X16) - .Case("ip1", AArch64::X17) - .Case("fp", AArch64::X29) - .Case("lr", AArch64::X30) - .Case("v0", IsVec128 ? AArch64::Q0 : AArch64::D0) - .Case("v1", IsVec128 ? AArch64::Q1 : AArch64::D1) - .Case("v2", IsVec128 ? AArch64::Q2 : AArch64::D2) - .Case("v3", IsVec128 ? AArch64::Q3 : AArch64::D3) - .Case("v4", IsVec128 ? AArch64::Q4 : AArch64::D4) - .Case("v5", IsVec128 ? AArch64::Q5 : AArch64::D5) - .Case("v6", IsVec128 ? AArch64::Q6 : AArch64::D6) - .Case("v7", IsVec128 ? AArch64::Q7 : AArch64::D7) - .Case("v8", IsVec128 ? AArch64::Q8 : AArch64::D8) - .Case("v9", IsVec128 ? AArch64::Q9 : AArch64::D9) - .Case("v10", IsVec128 ? AArch64::Q10 : AArch64::D10) - .Case("v11", IsVec128 ? AArch64::Q11 : AArch64::D11) - .Case("v12", IsVec128 ? AArch64::Q12 : AArch64::D12) - .Case("v13", IsVec128 ? AArch64::Q13 : AArch64::D13) - .Case("v14", IsVec128 ? AArch64::Q14 : AArch64::D14) - .Case("v15", IsVec128 ? AArch64::Q15 : AArch64::D15) - .Case("v16", IsVec128 ? AArch64::Q16 : AArch64::D16) - .Case("v17", IsVec128 ? AArch64::Q17 : AArch64::D17) - .Case("v18", IsVec128 ? AArch64::Q18 : AArch64::D18) - .Case("v19", IsVec128 ? AArch64::Q19 : AArch64::D19) - .Case("v20", IsVec128 ? AArch64::Q20 : AArch64::D20) - .Case("v21", IsVec128 ? AArch64::Q21 : AArch64::D21) - .Case("v22", IsVec128 ? AArch64::Q22 : AArch64::D22) - .Case("v23", IsVec128 ? AArch64::Q23 : AArch64::D23) - .Case("v24", IsVec128 ? AArch64::Q24 : AArch64::D24) - .Case("v25", IsVec128 ? AArch64::Q25 : AArch64::D25) - .Case("v26", IsVec128 ? AArch64::Q26 : AArch64::D26) - .Case("v27", IsVec128 ? AArch64::Q27 : AArch64::D27) - .Case("v28", IsVec128 ? AArch64::Q28 : AArch64::D28) - .Case("v29", IsVec128 ? AArch64::Q29 : AArch64::D29) - .Case("v30", IsVec128 ? AArch64::Q30 : AArch64::D30) - .Case("v31", IsVec128 ? AArch64::Q31 : AArch64::D31) - .Default(AArch64::NoRegister); - } - if (RegNum == AArch64::NoRegister) - return false; - - return true; -} - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseRegister(SmallVectorImpl &Operands, - uint32_t &NumLanes) { - unsigned RegNum; - StringRef Layout; - SMLoc RegEndLoc, LayoutLoc; - SMLoc S = Parser.getTok().getLoc(); - - if (!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc)) - return MatchOperand_NoMatch; - - Operands.push_back(AArch64Operand::CreateReg(RegNum, S, RegEndLoc)); - - if (Layout.size() != 0) { - unsigned long long TmpLanes = 0; - llvm::getAsUnsignedInteger(Layout.substr(1), 10, TmpLanes); - if (TmpLanes != 0) { - NumLanes = TmpLanes; - } else { - // If the number of lanes isn't specified explicitly, a valid instruction - // will have an element specifier and be capable of acting on the entire - // vector register. - switch (Layout.back()) { - default: llvm_unreachable("Invalid layout specifier"); - case 'b': NumLanes = 16; break; - case 'h': NumLanes = 8; break; - case 's': NumLanes = 4; break; - case 'd': NumLanes = 2; break; - case 'q': NumLanes = 1; break; - } - } - - Operands.push_back(AArch64Operand::CreateToken(Layout, LayoutLoc)); - } - - Parser.Lex(); - return MatchOperand_Success; -} - -bool -AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, - SMLoc &EndLoc) { - // This callback is used for things like DWARF frame directives in - // assembly. They don't care about things like NEON layouts or lanes, they - // just want to be able to produce the DWARF register number. - StringRef LayoutSpec; - SMLoc RegEndLoc, LayoutLoc; - StartLoc = Parser.getTok().getLoc(); - - if (!IdentifyRegister(RegNo, RegEndLoc, LayoutSpec, LayoutLoc)) - return true; - - Parser.Lex(); - EndLoc = Parser.getTok().getLoc(); - - return false; -} - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseNamedImmOperand(const NamedImmMapper &Mapper, - SmallVectorImpl &Operands) { - // Since these operands occur in very limited circumstances, without - // alternatives, we actually signal an error if there is no match. If relaxing - // this, beware of unintended consequences: an immediate will be accepted - // during matching, no matter how it gets into the AArch64Operand. - const AsmToken &Tok = Parser.getTok(); - SMLoc S = Tok.getLoc(); - - if (Tok.is(AsmToken::Identifier)) { - bool ValidName; - uint32_t Code = Mapper.fromString(Tok.getString().lower(), ValidName); - - if (!ValidName) { - Error(S, "operand specifier not recognised"); - return MatchOperand_ParseFail; - } - - Parser.Lex(); // We're done with the identifier. Eat it - - SMLoc E = Parser.getTok().getLoc(); - const MCExpr *Imm = MCConstantExpr::Create(Code, getContext()); - Operands.push_back(AArch64Operand::CreateImm(Imm, S, E)); - return MatchOperand_Success; - } else if (Tok.is(AsmToken::Hash)) { - Parser.Lex(); - - const MCExpr *ImmVal; - if (ParseImmediate(ImmVal) != MatchOperand_Success) - return MatchOperand_ParseFail; - - const MCConstantExpr *CE = dyn_cast(ImmVal); - if (!CE || CE->getValue() < 0 || !Mapper.validImm(CE->getValue())) { - Error(S, "Invalid immediate for instruction"); - return MatchOperand_ParseFail; - } - - SMLoc E = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateImm(ImmVal, S, E)); - return MatchOperand_Success; - } - - Error(S, "unexpected operand for instruction"); - return MatchOperand_ParseFail; -} - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseSysRegOperand( - SmallVectorImpl &Operands) { - const AsmToken &Tok = Parser.getTok(); - - // Any MSR/MRS operand will be an identifier, and we want to store it as some - // kind of string: SPSel is valid for two different forms of MSR with two - // different encodings. There's no collision at the moment, but the potential - // is there. - if (!Tok.is(AsmToken::Identifier)) { - return MatchOperand_NoMatch; - } - - SMLoc S = Tok.getLoc(); - Operands.push_back(AArch64Operand::CreateSysReg(Tok.getString(), S)); - Parser.Lex(); // Eat identifier - - return MatchOperand_Success; -} - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseLSXAddressOperand( - SmallVectorImpl &Operands) { - SMLoc S = Parser.getTok().getLoc(); - - unsigned RegNum; - SMLoc RegEndLoc, LayoutLoc; - StringRef Layout; - if(!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc) - || !AArch64MCRegisterClasses[AArch64::GPR64xspRegClassID].contains(RegNum) - || Layout.size() != 0) { - // Check Layout.size because we don't want to let "x3.4s" or similar - // through. - return MatchOperand_NoMatch; - } - Parser.Lex(); // Eat register - - if (Parser.getTok().is(AsmToken::RBrac)) { - // We're done - SMLoc E = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E)); - return MatchOperand_Success; - } - - // Otherwise, only ", #0" is valid - - if (Parser.getTok().isNot(AsmToken::Comma)) { - Error(Parser.getTok().getLoc(), "expected ',' or ']' after register"); - return MatchOperand_ParseFail; - } - Parser.Lex(); // Eat ',' - - if (Parser.getTok().isNot(AsmToken::Hash)) { - Error(Parser.getTok().getLoc(), "expected '#0'"); - return MatchOperand_ParseFail; - } - Parser.Lex(); // Eat '#' - - if (Parser.getTok().isNot(AsmToken::Integer) - || Parser.getTok().getIntVal() != 0 ) { - Error(Parser.getTok().getLoc(), "expected '#0'"); - return MatchOperand_ParseFail; - } - Parser.Lex(); // Eat '0' - - SMLoc E = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E)); - return MatchOperand_Success; -} - -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseShiftExtend( - SmallVectorImpl &Operands) { - StringRef IDVal = Parser.getTok().getIdentifier(); - std::string LowerID = IDVal.lower(); - - A64SE::ShiftExtSpecifiers Spec = - StringSwitch(LowerID) - .Case("lsl", A64SE::LSL) - .Case("msl", A64SE::MSL) - .Case("lsr", A64SE::LSR) - .Case("asr", A64SE::ASR) - .Case("ror", A64SE::ROR) - .Case("uxtb", A64SE::UXTB) - .Case("uxth", A64SE::UXTH) - .Case("uxtw", A64SE::UXTW) - .Case("uxtx", A64SE::UXTX) - .Case("sxtb", A64SE::SXTB) - .Case("sxth", A64SE::SXTH) - .Case("sxtw", A64SE::SXTW) - .Case("sxtx", A64SE::SXTX) - .Default(A64SE::Invalid); - - if (Spec == A64SE::Invalid) - return MatchOperand_NoMatch; - - // Eat the shift - SMLoc S, E; - S = Parser.getTok().getLoc(); - Parser.Lex(); - - if (Spec != A64SE::LSL && Spec != A64SE::LSR && Spec != A64SE::ASR && - Spec != A64SE::ROR && Spec != A64SE::MSL) { - // The shift amount can be omitted for the extending versions, but not real - // shifts: - // add x0, x0, x0, uxtb - // is valid, and equivalent to - // add x0, x0, x0, uxtb #0 - - if (Parser.getTok().is(AsmToken::Comma) || - Parser.getTok().is(AsmToken::EndOfStatement) || - Parser.getTok().is(AsmToken::RBrac)) { - Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, 0, true, - S, E)); - return MatchOperand_Success; - } - } - - // Eat # at beginning of immediate - if (!Parser.getTok().is(AsmToken::Hash)) { - Error(Parser.getTok().getLoc(), - "expected #imm after shift specifier"); - return MatchOperand_ParseFail; - } - Parser.Lex(); - - // Make sure we do actually have a number - if (!Parser.getTok().is(AsmToken::Integer)) { - Error(Parser.getTok().getLoc(), - "expected integer shift amount"); - return MatchOperand_ParseFail; - } - unsigned Amount = Parser.getTok().getIntVal(); - Parser.Lex(); - E = Parser.getTok().getLoc(); - - Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, Amount, false, - S, E)); - - return MatchOperand_Success; -} - -/// Try to parse a vector register token, If it is a vector register, -/// the token is eaten and return true. Otherwise return false. -bool AArch64AsmParser::TryParseVector(uint32_t &RegNum, SMLoc &RegEndLoc, - StringRef &Layout, SMLoc &LayoutLoc) { - bool IsVector = true; - - if (!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc)) - IsVector = false; - else if (!AArch64MCRegisterClasses[AArch64::FPR64RegClassID] - .contains(RegNum) && - !AArch64MCRegisterClasses[AArch64::FPR128RegClassID] - .contains(RegNum)) - IsVector = false; - else if (Layout.size() == 0) - IsVector = false; - - if (!IsVector) - Error(Parser.getTok().getLoc(), "expected vector type register"); - - Parser.Lex(); // Eat this token. - return IsVector; -} - - -// A vector list contains 1-4 consecutive registers. -// Now there are two kinds of vector list when number of vector > 1: -// (1) {Vn.layout, Vn+1.layout, ... , Vm.layout} -// (2) {Vn.layout - Vm.layout} -// If the layout is like .b/.h/.s/.d, also parse the lane. -AArch64AsmParser::OperandMatchResultTy AArch64AsmParser::ParseVectorList( - SmallVectorImpl &Operands) { - if (Parser.getTok().isNot(AsmToken::LCurly)) { - Error(Parser.getTok().getLoc(), "'{' expected"); - return MatchOperand_ParseFail; - } - SMLoc SLoc = Parser.getTok().getLoc(); - Parser.Lex(); // Eat '{' token. - - unsigned Reg, Count = 1; - StringRef LayoutStr; - SMLoc RegEndLoc, LayoutLoc; - if (!TryParseVector(Reg, RegEndLoc, LayoutStr, LayoutLoc)) - return MatchOperand_ParseFail; - - if (Parser.getTok().is(AsmToken::Minus)) { - Parser.Lex(); // Eat the minus. - - unsigned Reg2; - StringRef LayoutStr2; - SMLoc RegEndLoc2, LayoutLoc2; - SMLoc RegLoc2 = Parser.getTok().getLoc(); - - if (!TryParseVector(Reg2, RegEndLoc2, LayoutStr2, LayoutLoc2)) - return MatchOperand_ParseFail; - unsigned Space = (Reg < Reg2) ? (Reg2 - Reg) : (Reg2 + 32 - Reg); - - if (LayoutStr != LayoutStr2) { - Error(LayoutLoc2, "expected the same vector layout"); - return MatchOperand_ParseFail; - } - if (Space == 0 || Space > 3) { - Error(RegLoc2, "invalid number of vectors"); - return MatchOperand_ParseFail; - } - - Count += Space; - } else { - unsigned LastReg = Reg; - while (Parser.getTok().is(AsmToken::Comma)) { - Parser.Lex(); // Eat the comma. - unsigned Reg2; - StringRef LayoutStr2; - SMLoc RegEndLoc2, LayoutLoc2; - SMLoc RegLoc2 = Parser.getTok().getLoc(); - - if (!TryParseVector(Reg2, RegEndLoc2, LayoutStr2, LayoutLoc2)) - return MatchOperand_ParseFail; - unsigned Space = (LastReg < Reg2) ? (Reg2 - LastReg) - : (Reg2 + 32 - LastReg); - Count++; - - // The space between two vectors should be 1. And they should have the same layout. - // Total count shouldn't be great than 4 - if (Space != 1) { - Error(RegLoc2, "invalid space between two vectors"); - return MatchOperand_ParseFail; - } - if (LayoutStr != LayoutStr2) { - Error(LayoutLoc2, "expected the same vector layout"); - return MatchOperand_ParseFail; - } - if (Count > 4) { - Error(RegLoc2, "invalid number of vectors"); - return MatchOperand_ParseFail; - } - - LastReg = Reg2; - } - } - - if (Parser.getTok().isNot(AsmToken::RCurly)) { - Error(Parser.getTok().getLoc(), "'}' expected"); - return MatchOperand_ParseFail; - } - SMLoc ELoc = Parser.getTok().getLoc(); - Parser.Lex(); // Eat '}' token. - - A64Layout::VectorLayout Layout = A64StringToVectorLayout(LayoutStr); - if (Count > 1) { // If count > 1, create vector list using super register. - bool IsVec64 = (Layout < A64Layout::VL_16B); - static unsigned SupRegIDs[3][2] = { - { AArch64::QPairRegClassID, AArch64::DPairRegClassID }, - { AArch64::QTripleRegClassID, AArch64::DTripleRegClassID }, - { AArch64::QQuadRegClassID, AArch64::DQuadRegClassID } - }; - unsigned SupRegID = SupRegIDs[Count - 2][static_cast(IsVec64)]; - unsigned Sub0 = IsVec64 ? AArch64::dsub_0 : AArch64::qsub_0; - const MCRegisterInfo *MRI = getContext().getRegisterInfo(); - Reg = MRI->getMatchingSuperReg(Reg, Sub0, - &AArch64MCRegisterClasses[SupRegID]); - } - Operands.push_back( - AArch64Operand::CreateVectorList(Reg, Count, Layout, SLoc, ELoc)); - - if (Parser.getTok().is(AsmToken::LBrac)) { - uint32_t NumLanes = 0; - switch(Layout) { - case A64Layout::VL_B : NumLanes = 16; break; - case A64Layout::VL_H : NumLanes = 8; break; - case A64Layout::VL_S : NumLanes = 4; break; - case A64Layout::VL_D : NumLanes = 2; break; - default: - SMLoc Loc = getLexer().getLoc(); - Error(Loc, "expected comma before next operand"); - return MatchOperand_ParseFail; - } - return ParseNEONLane(Operands, NumLanes); - } else { - return MatchOperand_Success; - } -} - -// FIXME: We would really like to be able to tablegen'erate this. -bool AArch64AsmParser:: -validateInstruction(MCInst &Inst, - const SmallVectorImpl &Operands) { - switch (Inst.getOpcode()) { - case AArch64::BFIwwii: - case AArch64::BFIxxii: - case AArch64::SBFIZwwii: - case AArch64::SBFIZxxii: - case AArch64::UBFIZwwii: - case AArch64::UBFIZxxii: { - unsigned ImmOps = Inst.getNumOperands() - 2; - int64_t ImmR = Inst.getOperand(ImmOps).getImm(); - int64_t ImmS = Inst.getOperand(ImmOps+1).getImm(); - - if (ImmR != 0 && ImmS >= ImmR) { - return Error(Operands[4]->getStartLoc(), - "requested insert overflows register"); - } - return false; - } - case AArch64::BFXILwwii: - case AArch64::BFXILxxii: - case AArch64::SBFXwwii: - case AArch64::SBFXxxii: - case AArch64::UBFXwwii: - case AArch64::UBFXxxii: { - unsigned ImmOps = Inst.getNumOperands() - 2; - int64_t ImmR = Inst.getOperand(ImmOps).getImm(); - int64_t ImmS = Inst.getOperand(ImmOps+1).getImm(); - int64_t RegWidth = 0; - switch (Inst.getOpcode()) { - case AArch64::SBFXxxii: case AArch64::UBFXxxii: case AArch64::BFXILxxii: - RegWidth = 64; - break; - case AArch64::SBFXwwii: case AArch64::UBFXwwii: case AArch64::BFXILwwii: - RegWidth = 32; - break; - } - - if (ImmS >= RegWidth || ImmS < ImmR) { - return Error(Operands[4]->getStartLoc(), - "requested extract overflows register"); - } - return false; - } - case AArch64::ICix: { - int64_t ImmVal = Inst.getOperand(0).getImm(); - A64IC::ICValues ICOp = static_cast(ImmVal); - if (!A64IC::NeedsRegister(ICOp)) { - return Error(Operands[1]->getStartLoc(), - "specified IC op does not use a register"); - } - return false; - } - case AArch64::ICi: { - int64_t ImmVal = Inst.getOperand(0).getImm(); - A64IC::ICValues ICOp = static_cast(ImmVal); - if (A64IC::NeedsRegister(ICOp)) { - return Error(Operands[1]->getStartLoc(), - "specified IC op requires a register"); - } - return false; - } - case AArch64::TLBIix: { - int64_t ImmVal = Inst.getOperand(0).getImm(); - A64TLBI::TLBIValues TLBIOp = static_cast(ImmVal); - if (!A64TLBI::NeedsRegister(TLBIOp)) { - return Error(Operands[1]->getStartLoc(), - "specified TLBI op does not use a register"); - } - return false; - } - case AArch64::TLBIi: { - int64_t ImmVal = Inst.getOperand(0).getImm(); - A64TLBI::TLBIValues TLBIOp = static_cast(ImmVal); - if (A64TLBI::NeedsRegister(TLBIOp)) { - return Error(Operands[1]->getStartLoc(), - "specified TLBI op requires a register"); - } - return false; - } - } - - return false; -} - - -// Parses the instruction *together with* all operands, appending each parsed -// operand to the "Operands" list -bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info, - StringRef Name, SMLoc NameLoc, - SmallVectorImpl &Operands) { - StringRef PatchedName = StringSwitch(Name.lower()) - .Case("beq", "b.eq") - .Case("bne", "b.ne") - .Case("bhs", "b.hs") - .Case("bcs", "b.cs") - .Case("blo", "b.lo") - .Case("bcc", "b.cc") - .Case("bmi", "b.mi") - .Case("bpl", "b.pl") - .Case("bvs", "b.vs") - .Case("bvc", "b.vc") - .Case("bhi", "b.hi") - .Case("bls", "b.ls") - .Case("bge", "b.ge") - .Case("blt", "b.lt") - .Case("bgt", "b.gt") - .Case("ble", "b.le") - .Case("bal", "b.al") - .Case("bnv", "b.nv") - .Default(Name); - - size_t CondCodePos = PatchedName.find('.'); - - StringRef Mnemonic = PatchedName.substr(0, CondCodePos); - Operands.push_back(AArch64Operand::CreateToken(Mnemonic, NameLoc)); - - if (CondCodePos != StringRef::npos) { - // We have a condition code - SMLoc S = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 1); - StringRef CondStr = PatchedName.substr(CondCodePos + 1, StringRef::npos); - A64CC::CondCodes Code; - - Code = A64StringToCondCode(CondStr); - - if (Code == A64CC::Invalid) { - Error(S, "invalid condition code"); - Parser.eatToEndOfStatement(); - return true; - } - - SMLoc DotL = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos); - - Operands.push_back(AArch64Operand::CreateToken(".", DotL)); - SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 3); - Operands.push_back(AArch64Operand::CreateCondCode(Code, S, E)); - } - - // Now we parse the operands of this instruction - if (getLexer().isNot(AsmToken::EndOfStatement)) { - // Read the first operand. - if (ParseOperand(Operands, Mnemonic)) { - Parser.eatToEndOfStatement(); - return true; - } - - while (getLexer().is(AsmToken::Comma)) { - Parser.Lex(); // Eat the comma. - - // Parse and remember the operand. - if (ParseOperand(Operands, Mnemonic)) { - Parser.eatToEndOfStatement(); - return true; - } - - - // After successfully parsing some operands there are two special cases to - // consider (i.e. notional operands not separated by commas). Both are due - // to memory specifiers: - // + An RBrac will end an address for load/store/prefetch - // + An '!' will indicate a pre-indexed operation. - // - // It's someone else's responsibility to make sure these tokens are sane - // in the given context! - if (Parser.getTok().is(AsmToken::RBrac)) { - SMLoc Loc = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateToken("]", Loc)); - Parser.Lex(); - } - - if (Parser.getTok().is(AsmToken::Exclaim)) { - SMLoc Loc = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateToken("!", Loc)); - Parser.Lex(); - } - } - } - - if (getLexer().isNot(AsmToken::EndOfStatement)) { - SMLoc Loc = getLexer().getLoc(); - Parser.eatToEndOfStatement(); - return Error(Loc, "expected comma before next operand"); - } - - // Eat the EndOfStatement - Parser.Lex(); - - return false; -} - -bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { - StringRef IDVal = DirectiveID.getIdentifier(); - if (IDVal == ".hword") - return ParseDirectiveWord(2, DirectiveID.getLoc()); - else if (IDVal == ".word") - return ParseDirectiveWord(4, DirectiveID.getLoc()); - else if (IDVal == ".xword") - return ParseDirectiveWord(8, DirectiveID.getLoc()); - else if (IDVal == ".tlsdesccall") - return ParseDirectiveTLSDescCall(DirectiveID.getLoc()); - - return true; -} - -/// parseDirectiveWord -/// ::= .word [ expression (, expression)* ] -bool AArch64AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { - if (getLexer().isNot(AsmToken::EndOfStatement)) { - for (;;) { - const MCExpr *Value; - if (getParser().parseExpression(Value)) - return false; - - getParser().getStreamer().EmitValue(Value, Size); - - if (getLexer().is(AsmToken::EndOfStatement)) - break; - - // FIXME: Improve diagnostic. - if (getLexer().isNot(AsmToken::Comma)) { - Error(L, "unexpected token in directive"); - return false; - } - Parser.Lex(); - } - } - - Parser.Lex(); - return false; -} - -// parseDirectiveTLSDescCall: -// ::= .tlsdesccall symbol -bool AArch64AsmParser::ParseDirectiveTLSDescCall(SMLoc L) { - StringRef Name; - if (getParser().parseIdentifier(Name)) { - Error(L, "expected symbol after directive"); - return false; - } - - MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); - const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext()); - - MCInst Inst; - Inst.setOpcode(AArch64::TLSDESCCALL); - Inst.addOperand(MCOperand::CreateExpr(Expr)); - - getParser().getStreamer().EmitInstruction(Inst, STI); - return false; -} - - -bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - SmallVectorImpl &Operands, - MCStreamer &Out, unsigned &ErrorInfo, - bool MatchingInlineAsm) { - MCInst Inst; - unsigned MatchResult; - MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, - MatchingInlineAsm); - - if (ErrorInfo != ~0U && ErrorInfo >= Operands.size()) - return Error(IDLoc, "too few operands for instruction"); - - switch (MatchResult) { - default: break; - case Match_Success: - if (validateInstruction(Inst, Operands)) - return true; - - Out.EmitInstruction(Inst, STI); - return false; - case Match_MissingFeature: - Error(IDLoc, "instruction requires a CPU feature not currently enabled"); - return true; - case Match_InvalidOperand: { - SMLoc ErrorLoc = IDLoc; - if (ErrorInfo != ~0U) { - ErrorLoc = ((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(); - if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; - } - - return Error(ErrorLoc, "invalid operand for instruction"); - } - case Match_MnemonicFail: - return Error(IDLoc, "invalid instruction"); - - case Match_AddSubRegExtendSmall: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected '[su]xt[bhw]' or 'lsl' with optional integer in range [0, 4]"); - case Match_AddSubRegExtendLarge: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]"); - case Match_AddSubRegShift32: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]"); - case Match_AddSubRegShift64: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]"); - case Match_AddSubSecondSource: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected compatible register, symbol or integer in range [0, 4095]"); - case Match_CVTFixedPos32: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [1, 32]"); - case Match_CVTFixedPos64: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [1, 64]"); - case Match_CondCode: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected AArch64 condition code"); - case Match_FPImm: - // Any situation which allows a nontrivial floating-point constant also - // allows a register. - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected compatible register or floating-point constant"); - case Match_FPZero: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected floating-point constant #0.0 or invalid register type"); - case Match_Label: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected label or encodable integer pc offset"); - case Match_Lane1: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected lane specifier '[1]'"); - case Match_LoadStoreExtend32_1: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'uxtw' or 'sxtw' with optional shift of #0"); - case Match_LoadStoreExtend32_2: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'uxtw' or 'sxtw' with optional shift of #0 or #1"); - case Match_LoadStoreExtend32_4: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'uxtw' or 'sxtw' with optional shift of #0 or #2"); - case Match_LoadStoreExtend32_8: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'uxtw' or 'sxtw' with optional shift of #0 or #3"); - case Match_LoadStoreExtend32_16: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'lsl' or 'sxtw' with optional shift of #0 or #4"); - case Match_LoadStoreExtend64_1: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'lsl' or 'sxtx' with optional shift of #0"); - case Match_LoadStoreExtend64_2: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'lsl' or 'sxtx' with optional shift of #0 or #1"); - case Match_LoadStoreExtend64_4: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'lsl' or 'sxtx' with optional shift of #0 or #2"); - case Match_LoadStoreExtend64_8: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'lsl' or 'sxtx' with optional shift of #0 or #3"); - case Match_LoadStoreExtend64_16: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'lsl' or 'sxtx' with optional shift of #0 or #4"); - case Match_LoadStoreSImm7_4: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer multiple of 4 in range [-256, 252]"); - case Match_LoadStoreSImm7_8: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer multiple of 8 in range [-512, 504]"); - case Match_LoadStoreSImm7_16: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer multiple of 16 in range [-1024, 1008]"); - case Match_LoadStoreSImm9: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [-256, 255]"); - case Match_LoadStoreUImm12_1: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected symbolic reference or integer in range [0, 4095]"); - case Match_LoadStoreUImm12_2: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected symbolic reference or integer in range [0, 8190]"); - case Match_LoadStoreUImm12_4: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected symbolic reference or integer in range [0, 16380]"); - case Match_LoadStoreUImm12_8: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected symbolic reference or integer in range [0, 32760]"); - case Match_LoadStoreUImm12_16: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected symbolic reference or integer in range [0, 65520]"); - case Match_LogicalSecondSource: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected compatible register or logical immediate"); - case Match_MOVWUImm16: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected relocated symbol or integer in range [0, 65535]"); - case Match_MRS: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected readable system register"); - case Match_MSR: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected writable system register or pstate"); - case Match_NamedImm_at: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected symbolic 'at' operand: s1e[0-3][rw] or s12e[01][rw]"); - case Match_NamedImm_dbarrier: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 15] or symbolic barrier operand"); - case Match_NamedImm_dc: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected symbolic 'dc' operand"); - case Match_NamedImm_ic: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'ic' operand: 'ialluis', 'iallu' or 'ivau'"); - case Match_NamedImm_isb: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 15] or 'sy'"); - case Match_NamedImm_prefetch: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected prefetch hint: p(ld|st|i)l[123](strm|keep)"); - case Match_NamedImm_tlbi: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected translation buffer invalidation operand"); - case Match_UImm16: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 65535]"); - case Match_UImm3: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 7]"); - case Match_UImm4: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 15]"); - case Match_UImm5: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 31]"); - case Match_UImm6: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 63]"); - case Match_UImm7: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 127]"); - case Match_Width32: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [, 31]"); - case Match_Width64: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [, 63]"); - case Match_ShrImm8: - return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [1, 8]"); - case Match_ShrImm16: - return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [1, 16]"); - case Match_ShrImm32: - return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [1, 32]"); - case Match_ShrImm64: - return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [1, 64]"); - case Match_ShlImm8: - return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 7]"); - case Match_ShlImm16: - return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 15]"); - case Match_ShlImm32: - return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 31]"); - case Match_ShlImm64: - return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 63]"); - } - - llvm_unreachable("Implement any new match types added!"); - return true; -} - -void AArch64Operand::print(raw_ostream &OS) const { - switch (Kind) { - case k_CondCode: - OS << ""; - break; - case k_FPImmediate: - OS << ""; - break; - case k_ImmWithLSL: - OS << ""; - break; - case k_Immediate: - getImm()->print(OS); - break; - case k_Register: - OS << "'; - break; - case k_Token: - OS << '\'' << getToken() << '\''; - break; - case k_ShiftExtend: - OS << ""; - break; - case k_SysReg: { - StringRef Name(SysReg.Data, SysReg.Length); - OS << "'; - break; - } - default: - llvm_unreachable("No idea how to print this kind of operand"); - break; - } -} - -void AArch64Operand::dump() const { - print(errs()); -} - - -/// Force static initialization. -extern "C" void LLVMInitializeAArch64AsmParser() { - RegisterMCAsmParser X(TheAArch64leTarget); - RegisterMCAsmParser Y(TheAArch64beTarget); -} - -#define GET_REGISTER_MATCHER -#define GET_MATCHER_IMPLEMENTATION -#include "AArch64GenAsmMatcher.inc" diff --git a/lib/Target/AArch64/AsmParser/CMakeLists.txt b/lib/Target/AArch64/AsmParser/CMakeLists.txt deleted file mode 100644 index e81ec70437a4..000000000000 --- a/lib/Target/AArch64/AsmParser/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_llvm_library(LLVMAArch64AsmParser - AArch64AsmParser.cpp - ) diff --git a/lib/Target/AArch64/AsmParser/LLVMBuild.txt b/lib/Target/AArch64/AsmParser/LLVMBuild.txt deleted file mode 100644 index 2d8f63212378..000000000000 --- a/lib/Target/AArch64/AsmParser/LLVMBuild.txt +++ /dev/null @@ -1,23 +0,0 @@ -;===- ./lib/Target/AArch64/AsmParser/LLVMBuild.txt -------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = AArch64AsmParser -parent = AArch64 -required_libraries = AArch64Desc AArch64Info AArch64Utils MC MCParser Support -add_to_library_groups = AArch64 diff --git a/lib/Target/AArch64/AsmParser/Makefile b/lib/Target/AArch64/AsmParser/Makefile deleted file mode 100644 index 56c9ef52ea58..000000000000 --- a/lib/Target/AArch64/AsmParser/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/AArch64/AsmParser/Makefile ---------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMAArch64AsmParser - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt deleted file mode 100644 index dfc10afcdcfe..000000000000 --- a/lib/Target/AArch64/CMakeLists.txt +++ /dev/null @@ -1,37 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS AArch64.td) - -tablegen(LLVM AArch64GenAsmMatcher.inc -gen-asm-matcher) -tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer) -tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv) -tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler) -tablegen(LLVM AArch64GenInstrInfo.inc -gen-instr-info) -tablegen(LLVM AArch64GenMCCodeEmitter.inc -gen-emitter -mc-emitter) -tablegen(LLVM AArch64GenMCPseudoLowering.inc -gen-pseudo-lowering) -tablegen(LLVM AArch64GenRegisterInfo.inc -gen-register-info) -tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel) -tablegen(LLVM AArch64GenSubtargetInfo.inc -gen-subtarget) -add_public_tablegen_target(AArch64CommonTableGen) - -add_llvm_target(AArch64CodeGen - AArch64AsmPrinter.cpp - AArch64BranchFixupPass.cpp - AArch64FrameLowering.cpp - AArch64ISelDAGToDAG.cpp - AArch64ISelLowering.cpp - AArch64InstrInfo.cpp - AArch64MachineFunctionInfo.cpp - AArch64MCInstLower.cpp - AArch64RegisterInfo.cpp - AArch64SelectionDAGInfo.cpp - AArch64Subtarget.cpp - AArch64TargetMachine.cpp - AArch64TargetObjectFile.cpp - AArch64TargetTransformInfo.cpp - ) - -add_subdirectory(AsmParser) -add_subdirectory(Disassembler) -add_subdirectory(InstPrinter) -add_subdirectory(MCTargetDesc) -add_subdirectory(TargetInfo) -add_subdirectory(Utils) diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp deleted file mode 100644 index 01f1497dc33f..000000000000 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ /dev/null @@ -1,1572 +0,0 @@ -//===- AArch64Disassembler.cpp - Disassembler for AArch64 ISA -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the functions necessary to decode AArch64 instruction -// bitpatterns into MCInsts (with the help of TableGenerated information from -// the instruction definitions). -// -//===----------------------------------------------------------------------===// - -#include "AArch64.h" -#include "AArch64RegisterInfo.h" -#include "AArch64Subtarget.h" -#include "Utils/AArch64BaseInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDisassembler.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCFixedLenDisassembler.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstrDesc.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MemoryObject.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -#define DEBUG_TYPE "arm-disassembler" - -typedef MCDisassembler::DecodeStatus DecodeStatus; - -namespace { -/// AArch64 disassembler for all AArch64 platforms. -class AArch64Disassembler : public MCDisassembler { -public: - /// Initializes the disassembler. - /// - AArch64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx) - : MCDisassembler(STI, Ctx) { - } - - ~AArch64Disassembler() {} - - /// See MCDisassembler. - DecodeStatus getInstruction(MCInst &instr, - uint64_t &size, - const MemoryObject ®ion, - uint64_t address, - raw_ostream &vStream, - raw_ostream &cStream) const override; -}; - -} - -// Forward-declarations used in the auto-generated files. -static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus -DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); - -static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus -DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); - -static DecodeStatus DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeFPR64LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeFPR128LoRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeGPR64noxzrRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeQPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeDTripleRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeQTripleRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeDQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeQQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst, - unsigned OptionHiS, - uint64_t Address, - const void *Decoder); - - -static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst, - unsigned Imm6Bits, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst, - unsigned Imm6Bits, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst, - unsigned RmBits, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeShiftRightImm8(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeShiftRightImm16(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeShiftRightImm32(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeShiftLeftImm8(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeShiftLeftImm16(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeShiftLeftImm32(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeShiftLeftImm64(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); - -template -static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, - unsigned FullImm, - uint64_t Address, - const void *Decoder); - -template -static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst, - unsigned Bits, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst, - unsigned ShiftAmount, - uint64_t Address, - const void *Decoder); -template -static DecodeStatus -DecodeNeonMovImmShiftOperand(llvm::MCInst &Inst, unsigned ShiftAmount, - uint64_t Address, const void *Decoder); - -static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst, - unsigned ShiftAmount, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder); - -template -static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder); - -static DecodeStatus -DecodeSysRegOperand(const A64SysReg::SysRegMapper &InstMapper, - llvm::MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); - -static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder); - - -static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeVLDSTPostInstruction(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeVLDSTLanePostInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeSHLLInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder); - -static bool Check(DecodeStatus &Out, DecodeStatus In); - -#include "AArch64GenDisassemblerTables.inc" - -static bool Check(DecodeStatus &Out, DecodeStatus In) { - switch (In) { - case MCDisassembler::Success: - // Out stays the same. - return true; - case MCDisassembler::SoftFail: - Out = In; - return true; - case MCDisassembler::Fail: - Out = In; - return false; - } - llvm_unreachable("Invalid DecodeStatus!"); -} - -DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size, - const MemoryObject &Region, - uint64_t Address, - raw_ostream &os, - raw_ostream &cs) const { - CommentStream = &cs; - - uint8_t bytes[4]; - - // We want to read exactly 4 bytes of data. - if (Region.readBytes(Address, 4, bytes) == -1) { - Size = 0; - return MCDisassembler::Fail; - } - - // Encoded as a small-endian 32-bit word in the stream. - uint32_t insn = (bytes[3] << 24) | - (bytes[2] << 16) | - (bytes[1] << 8) | - (bytes[0] << 0); - - // Calling the auto-generated decoder function. - DecodeStatus result = decodeInstruction(DecoderTableA6432, MI, insn, Address, - this, STI); - if (result != MCDisassembler::Fail) { - Size = 4; - return result; - } - - MI.clear(); - Size = 0; - return MCDisassembler::Fail; -} - -static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) { - const AArch64Disassembler *Dis = static_cast(D); - const MCRegisterInfo *RegInfo = Dis->getContext().getRegisterInfo(); - return RegInfo->getRegClass(RC).getRegister(RegNo); -} - -static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::GPR64RegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus -DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::GPR64xspRegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::GPR32RegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus -DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::GPR32wspRegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus -DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::FPR8RegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus -DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::FPR16RegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - - -static DecodeStatus -DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::FPR32RegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus -DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::FPR64RegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus -DecodeFPR64LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 15) - return MCDisassembler::Fail; - - return DecodeFPR64RegisterClass(Inst, RegNo, Address, Decoder); -} - -static DecodeStatus -DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::FPR128RegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus -DecodeFPR128LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 15) - return MCDisassembler::Fail; - - return DecodeFPR128RegisterClass(Inst, RegNo, Address, Decoder); -} - -static DecodeStatus DecodeGPR64noxzrRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder) { - if (RegNo > 30) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::GPR64noxzrRegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeRegisterClassByID(llvm::MCInst &Inst, unsigned RegNo, - unsigned RegID, - const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, RegID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder) { - return DecodeRegisterClassByID(Inst, RegNo, AArch64::DPairRegClassID, - Decoder); -} - -static DecodeStatus DecodeQPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder) { - return DecodeRegisterClassByID(Inst, RegNo, AArch64::QPairRegClassID, - Decoder); -} - -static DecodeStatus DecodeDTripleRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder) { - return DecodeRegisterClassByID(Inst, RegNo, AArch64::DTripleRegClassID, - Decoder); -} - -static DecodeStatus DecodeQTripleRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder) { - return DecodeRegisterClassByID(Inst, RegNo, AArch64::QTripleRegClassID, - Decoder); -} - -static DecodeStatus DecodeDQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder) { - return DecodeRegisterClassByID(Inst, RegNo, AArch64::DQuadRegClassID, - Decoder); -} - -static DecodeStatus DecodeQQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder) { - return DecodeRegisterClassByID(Inst, RegNo, AArch64::QQuadRegClassID, - Decoder); -} - -static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst, - unsigned OptionHiS, - uint64_t Address, - const void *Decoder) { - // Option{1} must be 1. OptionHiS is made up of {Option{2}, Option{1}, - // S}. Hence we want to check bit 1. - if (!(OptionHiS & 2)) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(OptionHiS)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst, - unsigned Imm6Bits, - uint64_t Address, - const void *Decoder) { - // In the 32-bit variant, bit 6 must be zero. I.e. the immediate must be - // between 0 and 31. - if (Imm6Bits > 31) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(Imm6Bits)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst, - unsigned Imm6Bits, - uint64_t Address, - const void *Decoder) { - // 1 <= Imm <= 32. Encoded as 64 - Imm so: 63 >= Encoded >= 32. - if (Imm6Bits < 32) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(Imm6Bits)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst, - unsigned RmBits, - uint64_t Address, - const void *Decoder) { - // Any bits are valid in the instruction (they're architecturally ignored), - // but a code generator should insert 0. - Inst.addOperand(MCOperand::CreateImm(0)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeShiftRightImm8(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(8 - Val)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeShiftRightImm16(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(16 - Val)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeShiftRightImm32(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(32 - Val)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(64 - Val)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeShiftLeftImm8(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { - if (Val > 7) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(Val)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeShiftLeftImm16(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { - if (Val > 15) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(Val)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeShiftLeftImm32(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { - if (Val > 31) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(Val)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeShiftLeftImm64(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { - if (Val > 63) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(Val)); - return MCDisassembler::Success; -} - -template -static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, - unsigned FullImm, - uint64_t Address, - const void *Decoder) { - unsigned Imm16 = FullImm & 0xffff; - unsigned Shift = FullImm >> 16; - - if (RegWidth == 32 && Shift > 1) return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(Imm16)); - Inst.addOperand(MCOperand::CreateImm(Shift)); - return MCDisassembler::Success; -} - -template -static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst, - unsigned Bits, - uint64_t Address, - const void *Decoder) { - uint64_t Imm; - if (!A64Imms::isLogicalImmBits(RegWidth, Bits, Imm)) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(Bits)); - return MCDisassembler::Success; -} - - -static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst, - unsigned ShiftAmount, - uint64_t Address, - const void *Decoder) { - // Only values 0-4 are valid for this 3-bit field - if (ShiftAmount > 4) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(ShiftAmount)); - return MCDisassembler::Success; -} - -static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst, - unsigned ShiftAmount, - uint64_t Address, - const void *Decoder) { - // Only values below 32 are valid for a 32-bit register - if (ShiftAmount > 31) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(ShiftAmount)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder) { - unsigned Rd = fieldFromInstruction(Insn, 0, 5); - unsigned Rn = fieldFromInstruction(Insn, 5, 5); - unsigned ImmS = fieldFromInstruction(Insn, 10, 6); - unsigned ImmR = fieldFromInstruction(Insn, 16, 6); - unsigned SF = fieldFromInstruction(Insn, 31, 1); - - // Undef for 0b11 just in case it occurs. Don't want the compiler to optimise - // out assertions that it thinks should never be hit. - enum OpcTypes { SBFM = 0, BFM, UBFM, Undef } Opc; - Opc = (OpcTypes)fieldFromInstruction(Insn, 29, 2); - - if (!SF) { - // ImmR and ImmS must be between 0 and 31 for 32-bit instructions. - if (ImmR > 31 || ImmS > 31) - return MCDisassembler::Fail; - } - - if (SF) { - DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder); - // BFM MCInsts use Rd as a source too. - if (Opc == BFM) DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder); - DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder); - } else { - DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder); - // BFM MCInsts use Rd as a source too. - if (Opc == BFM) DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder); - DecodeGPR32RegisterClass(Inst, Rn, Address, Decoder); - } - - // ASR and LSR have more specific patterns so they won't get here: - assert(!(ImmS == 31 && !SF && Opc != BFM) - && "shift should have used auto decode"); - assert(!(ImmS == 63 && SF && Opc != BFM) - && "shift should have used auto decode"); - - // Extension instructions similarly: - if (Opc == SBFM && ImmR == 0) { - assert((ImmS != 7 && ImmS != 15) && "extension got here"); - assert((ImmS != 31 || SF == 0) && "extension got here"); - } else if (Opc == UBFM && ImmR == 0) { - assert((SF != 0 || (ImmS != 7 && ImmS != 15)) && "extension got here"); - } - - if (Opc == UBFM) { - // It might be a LSL instruction, which actually takes the shift amount - // itself as an MCInst operand. - if (SF && (ImmS + 1) % 64 == ImmR) { - Inst.setOpcode(AArch64::LSLxxi); - Inst.addOperand(MCOperand::CreateImm(63 - ImmS)); - return MCDisassembler::Success; - } else if (!SF && (ImmS + 1) % 32 == ImmR) { - Inst.setOpcode(AArch64::LSLwwi); - Inst.addOperand(MCOperand::CreateImm(31 - ImmS)); - return MCDisassembler::Success; - } - } - - // Otherwise it's definitely either an extract or an insert depending on which - // of ImmR or ImmS is larger. - unsigned ExtractOp, InsertOp; - switch (Opc) { - default: llvm_unreachable("unexpected instruction trying to decode bitfield"); - case SBFM: - ExtractOp = SF ? AArch64::SBFXxxii : AArch64::SBFXwwii; - InsertOp = SF ? AArch64::SBFIZxxii : AArch64::SBFIZwwii; - break; - case BFM: - ExtractOp = SF ? AArch64::BFXILxxii : AArch64::BFXILwwii; - InsertOp = SF ? AArch64::BFIxxii : AArch64::BFIwwii; - break; - case UBFM: - ExtractOp = SF ? AArch64::UBFXxxii : AArch64::UBFXwwii; - InsertOp = SF ? AArch64::UBFIZxxii : AArch64::UBFIZwwii; - break; - } - - // Otherwise it's a boring insert or extract - Inst.addOperand(MCOperand::CreateImm(ImmR)); - Inst.addOperand(MCOperand::CreateImm(ImmS)); - - - if (ImmS < ImmR) - Inst.setOpcode(InsertOp); - else - Inst.setOpcode(ExtractOp); - - return MCDisassembler::Success; -} - -static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder) { - // This decoder exists to add the dummy Lane operand to the MCInst, which must - // be 1 in assembly but has no other real manifestation. - unsigned Rd = fieldFromInstruction(Insn, 0, 5); - unsigned Rn = fieldFromInstruction(Insn, 5, 5); - unsigned IsToVec = fieldFromInstruction(Insn, 16, 1); - - if (IsToVec) { - DecodeFPR128RegisterClass(Inst, Rd, Address, Decoder); - DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder); - } else { - DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder); - DecodeFPR128RegisterClass(Inst, Rn, Address, Decoder); - } - - // Add the lane - Inst.addOperand(MCOperand::CreateImm(1)); - - return MCDisassembler::Success; -} - - -static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder) { - DecodeStatus Result = MCDisassembler::Success; - unsigned Rt = fieldFromInstruction(Insn, 0, 5); - unsigned Rn = fieldFromInstruction(Insn, 5, 5); - unsigned Rt2 = fieldFromInstruction(Insn, 10, 5); - unsigned SImm7 = fieldFromInstruction(Insn, 15, 7); - unsigned L = fieldFromInstruction(Insn, 22, 1); - unsigned V = fieldFromInstruction(Insn, 26, 1); - unsigned Opc = fieldFromInstruction(Insn, 30, 2); - - // Not an official name, but it turns out that bit 23 distinguishes indexed - // from non-indexed operations. - unsigned Indexed = fieldFromInstruction(Insn, 23, 1); - - if (Indexed && L == 0) { - // The MCInst for an indexed store has an out operand and 4 ins: - // Rn_wb, Rt, Rt2, Rn, Imm - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - } - - // You shouldn't load to the same register twice in an instruction... - if (L && Rt == Rt2) - Result = MCDisassembler::SoftFail; - - // ... or do any operation that writes-back to a transfer register. But note - // that "stp xzr, xzr, [sp], #4" is fine because xzr and sp are different. - if (Indexed && V == 0 && Rn != 31 && (Rt == Rn || Rt2 == Rn)) - Result = MCDisassembler::SoftFail; - - // Exactly how we decode the MCInst's registers depends on the Opc and V - // fields of the instruction. These also obviously determine the size of the - // operation so we can fill in that information while we're at it. - if (V) { - // The instruction operates on the FP/SIMD registers - switch (Opc) { - default: return MCDisassembler::Fail; - case 0: - DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder); - DecodeFPR32RegisterClass(Inst, Rt2, Address, Decoder); - break; - case 1: - DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder); - DecodeFPR64RegisterClass(Inst, Rt2, Address, Decoder); - break; - case 2: - DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); - DecodeFPR128RegisterClass(Inst, Rt2, Address, Decoder); - break; - } - } else { - switch (Opc) { - default: return MCDisassembler::Fail; - case 0: - DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder); - DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder); - break; - case 1: - assert(L && "unexpected \"store signed\" attempt"); - DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder); - DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder); - break; - case 2: - DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder); - DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder); - break; - } - } - - if (Indexed && L == 1) { - // The MCInst for an indexed load has 3 out operands and an 3 ins: - // Rt, Rt2, Rn_wb, Rt2, Rn, Imm - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - } - - - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - Inst.addOperand(MCOperand::CreateImm(SImm7)); - - return Result; -} - -static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst, - uint32_t Val, - uint64_t Address, - const void *Decoder) { - unsigned Rt = fieldFromInstruction(Val, 0, 5); - unsigned Rn = fieldFromInstruction(Val, 5, 5); - unsigned Rt2 = fieldFromInstruction(Val, 10, 5); - unsigned MemSize = fieldFromInstruction(Val, 30, 2); - - DecodeStatus S = MCDisassembler::Success; - if (Rt == Rt2) S = MCDisassembler::SoftFail; - - switch (MemSize) { - case 2: - if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder))) - return MCDisassembler::Fail; - if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder))) - return MCDisassembler::Fail; - break; - case 3: - if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder))) - return MCDisassembler::Fail; - if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder))) - return MCDisassembler::Fail; - break; - default: - llvm_unreachable("Invalid MemSize in DecodeLoadPairExclusiveInstruction"); - } - - if (!Check(S, DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder))) - return MCDisassembler::Fail; - - return S; -} - -template -static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder) { - SomeNamedImmMapper Mapper; - bool ValidNamed; - Mapper.toString(Val, ValidNamed); - if (ValidNamed || Mapper.validImm(Val)) { - Inst.addOperand(MCOperand::CreateImm(Val)); - return MCDisassembler::Success; - } - - return MCDisassembler::Fail; -} - -static DecodeStatus DecodeSysRegOperand(const A64SysReg::SysRegMapper &Mapper, - llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder) { - bool ValidNamed; - Mapper.toString(Val, ValidNamed); - - Inst.addOperand(MCOperand::CreateImm(Val)); - - return ValidNamed ? MCDisassembler::Success : MCDisassembler::Fail; -} - -static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder) { - return DecodeSysRegOperand(A64SysReg::MRSMapper(), Inst, Val, Address, - Decoder); -} - -static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder) { - return DecodeSysRegOperand(A64SysReg::MSRMapper(), Inst, Val, Address, - Decoder); -} - -static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder) { - unsigned Rt = fieldFromInstruction(Insn, 0, 5); - unsigned Rn = fieldFromInstruction(Insn, 5, 5); - unsigned Imm9 = fieldFromInstruction(Insn, 12, 9); - - unsigned Opc = fieldFromInstruction(Insn, 22, 2); - unsigned V = fieldFromInstruction(Insn, 26, 1); - unsigned Size = fieldFromInstruction(Insn, 30, 2); - - if (Opc == 0 || (V == 1 && Opc == 2)) { - // It's a store, the MCInst gets: Rn_wb, Rt, Rn, Imm - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - } - - if (V == 0 && (Opc == 2 || Size == 3)) { - DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder); - } else if (V == 0) { - DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder); - } else if (V == 1 && (Opc & 2)) { - DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); - } else { - switch (Size) { - case 0: - DecodeFPR8RegisterClass(Inst, Rt, Address, Decoder); - break; - case 1: - DecodeFPR16RegisterClass(Inst, Rt, Address, Decoder); - break; - case 2: - DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder); - break; - case 3: - DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder); - break; - } - } - - if (Opc != 0 && (V != 1 || Opc != 2)) { - // It's a load, the MCInst gets: Rt, Rn_wb, Rn, Imm - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - } - - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - - Inst.addOperand(MCOperand::CreateImm(Imm9)); - - // N.b. The official documentation says undpredictable if Rt == Rn, but this - // takes place at the architectural rather than encoding level: - // - // "STR xzr, [sp], #4" is perfectly valid. - if (V == 0 && Rt == Rn && Rn != 31) - return MCDisassembler::SoftFail; - else - return MCDisassembler::Success; -} - -static MCDisassembler *createAArch64Disassembler(const Target &T, - const MCSubtargetInfo &STI, - MCContext &Ctx) { - return new AArch64Disassembler(STI, Ctx); -} - -extern "C" void LLVMInitializeAArch64Disassembler() { - TargetRegistry::RegisterMCDisassembler(TheAArch64leTarget, - createAArch64Disassembler); - TargetRegistry::RegisterMCDisassembler(TheAArch64beTarget, - createAArch64Disassembler); -} - -template -static DecodeStatus -DecodeNeonMovImmShiftOperand(llvm::MCInst &Inst, unsigned ShiftAmount, - uint64_t Address, const void *Decoder) { - bool IsLSL = false; - if (Ext == A64SE::LSL) - IsLSL = true; - else if (Ext != A64SE::MSL) - return MCDisassembler::Fail; - - // MSL and LSLH accepts encoded shift amount 0 or 1. - if ((!IsLSL || (IsLSL && IsHalf)) && ShiftAmount != 0 && ShiftAmount != 1) - return MCDisassembler::Fail; - - // LSL accepts encoded shift amount 0, 1, 2 or 3. - if (IsLSL && ShiftAmount > 3) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(ShiftAmount)); - return MCDisassembler::Success; -} - -// Decode post-index vector load/store instructions. -// This is necessary as we need to decode Rm: if Rm == 0b11111, the last -// operand is an immediate equal the the length of vector list in bytes, -// or Rm is decoded to a GPR64noxzr register. -static DecodeStatus DecodeVLDSTPostInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder) { - unsigned Rt = fieldFromInstruction(Insn, 0, 5); - unsigned Rn = fieldFromInstruction(Insn, 5, 5); - unsigned Rm = fieldFromInstruction(Insn, 16, 5); - unsigned Opcode = fieldFromInstruction(Insn, 12, 4); - unsigned IsLoad = fieldFromInstruction(Insn, 22, 1); - // 0 for 64bit vector list, 1 for 128bit vector list - unsigned Is128BitVec = fieldFromInstruction(Insn, 30, 1); - - unsigned NumVecs; - switch (Opcode) { - case 0: // ld4/st4 - case 2: // ld1/st1 with 4 vectors - NumVecs = 4; break; - case 4: // ld3/st3 - case 6: // ld1/st1 with 3 vectors - NumVecs = 3; break; - case 7: // ld1/st1 with 1 vector - NumVecs = 1; break; - case 8: // ld2/st2 - case 10: // ld1/st1 with 2 vectors - NumVecs = 2; break; - default: - llvm_unreachable("Invalid opcode for post-index load/store instructions"); - } - - // Decode vector list of 1/2/3/4 vectors for load instructions. - if (IsLoad) { - switch (NumVecs) { - case 1: - Is128BitVec ? DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder) - : DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder); - break; - case 2: - Is128BitVec ? DecodeQPairRegisterClass(Inst, Rt, Address, Decoder) - : DecodeDPairRegisterClass(Inst, Rt, Address, Decoder); - break; - case 3: - Is128BitVec ? DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder) - : DecodeDTripleRegisterClass(Inst, Rt, Address, Decoder); - break; - case 4: - Is128BitVec ? DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder) - : DecodeDQuadRegisterClass(Inst, Rt, Address, Decoder); - break; - } - } - - // Decode write back register, which is equal to Rn. - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - - if (Rm == 31) // If Rm is 0x11111, add the vector list length in byte - Inst.addOperand(MCOperand::CreateImm(NumVecs * (Is128BitVec ? 16 : 8))); - else // Decode Rm - DecodeGPR64noxzrRegisterClass(Inst, Rm, Address, Decoder); - - // Decode vector list of 1/2/3/4 vectors for load instructions. - if (!IsLoad) { - switch (NumVecs) { - case 1: - Is128BitVec ? DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder) - : DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder); - break; - case 2: - Is128BitVec ? DecodeQPairRegisterClass(Inst, Rt, Address, Decoder) - : DecodeDPairRegisterClass(Inst, Rt, Address, Decoder); - break; - case 3: - Is128BitVec ? DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder) - : DecodeDTripleRegisterClass(Inst, Rt, Address, Decoder); - break; - case 4: - Is128BitVec ? DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder) - : DecodeDQuadRegisterClass(Inst, Rt, Address, Decoder); - break; - } - } - - return MCDisassembler::Success; -} - -// Decode post-index vector load/store lane instructions. -// This is necessary as we need to decode Rm: if Rm == 0b11111, the last -// operand is an immediate equal the the length of the changed bytes, -// or Rm is decoded to a GPR64noxzr register. -static DecodeStatus DecodeVLDSTLanePostInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder) { - bool Is64bitVec = false; - bool IsLoadDup = false; - bool IsLoad = false; - // The total number of bytes transferred. - // TransferBytes = NumVecs * OneLaneBytes - unsigned TransferBytes = 0; - unsigned NumVecs = 0; - unsigned Opc = Inst.getOpcode(); - switch (Opc) { - case AArch64::LD1R_WB_8B_fixed: case AArch64::LD1R_WB_8B_register: - case AArch64::LD1R_WB_4H_fixed: case AArch64::LD1R_WB_4H_register: - case AArch64::LD1R_WB_2S_fixed: case AArch64::LD1R_WB_2S_register: - case AArch64::LD1R_WB_1D_fixed: case AArch64::LD1R_WB_1D_register: { - switch (Opc) { - case AArch64::LD1R_WB_8B_fixed: case AArch64::LD1R_WB_8B_register: - TransferBytes = 1; break; - case AArch64::LD1R_WB_4H_fixed: case AArch64::LD1R_WB_4H_register: - TransferBytes = 2; break; - case AArch64::LD1R_WB_2S_fixed: case AArch64::LD1R_WB_2S_register: - TransferBytes = 4; break; - case AArch64::LD1R_WB_1D_fixed: case AArch64::LD1R_WB_1D_register: - TransferBytes = 8; break; - } - Is64bitVec = true; - IsLoadDup = true; - NumVecs = 1; - break; - } - - case AArch64::LD1R_WB_16B_fixed: case AArch64::LD1R_WB_16B_register: - case AArch64::LD1R_WB_8H_fixed: case AArch64::LD1R_WB_8H_register: - case AArch64::LD1R_WB_4S_fixed: case AArch64::LD1R_WB_4S_register: - case AArch64::LD1R_WB_2D_fixed: case AArch64::LD1R_WB_2D_register: { - switch (Opc) { - case AArch64::LD1R_WB_16B_fixed: case AArch64::LD1R_WB_16B_register: - TransferBytes = 1; break; - case AArch64::LD1R_WB_8H_fixed: case AArch64::LD1R_WB_8H_register: - TransferBytes = 2; break; - case AArch64::LD1R_WB_4S_fixed: case AArch64::LD1R_WB_4S_register: - TransferBytes = 4; break; - case AArch64::LD1R_WB_2D_fixed: case AArch64::LD1R_WB_2D_register: - TransferBytes = 8; break; - } - IsLoadDup = true; - NumVecs = 1; - break; - } - - case AArch64::LD2R_WB_8B_fixed: case AArch64::LD2R_WB_8B_register: - case AArch64::LD2R_WB_4H_fixed: case AArch64::LD2R_WB_4H_register: - case AArch64::LD2R_WB_2S_fixed: case AArch64::LD2R_WB_2S_register: - case AArch64::LD2R_WB_1D_fixed: case AArch64::LD2R_WB_1D_register: { - switch (Opc) { - case AArch64::LD2R_WB_8B_fixed: case AArch64::LD2R_WB_8B_register: - TransferBytes = 2; break; - case AArch64::LD2R_WB_4H_fixed: case AArch64::LD2R_WB_4H_register: - TransferBytes = 4; break; - case AArch64::LD2R_WB_2S_fixed: case AArch64::LD2R_WB_2S_register: - TransferBytes = 8; break; - case AArch64::LD2R_WB_1D_fixed: case AArch64::LD2R_WB_1D_register: - TransferBytes = 16; break; - } - Is64bitVec = true; - IsLoadDup = true; - NumVecs = 2; - break; - } - - case AArch64::LD2R_WB_16B_fixed: case AArch64::LD2R_WB_16B_register: - case AArch64::LD2R_WB_8H_fixed: case AArch64::LD2R_WB_8H_register: - case AArch64::LD2R_WB_4S_fixed: case AArch64::LD2R_WB_4S_register: - case AArch64::LD2R_WB_2D_fixed: case AArch64::LD2R_WB_2D_register: { - switch (Opc) { - case AArch64::LD2R_WB_16B_fixed: case AArch64::LD2R_WB_16B_register: - TransferBytes = 2; break; - case AArch64::LD2R_WB_8H_fixed: case AArch64::LD2R_WB_8H_register: - TransferBytes = 4; break; - case AArch64::LD2R_WB_4S_fixed: case AArch64::LD2R_WB_4S_register: - TransferBytes = 8; break; - case AArch64::LD2R_WB_2D_fixed: case AArch64::LD2R_WB_2D_register: - TransferBytes = 16; break; - } - IsLoadDup = true; - NumVecs = 2; - break; - } - - case AArch64::LD3R_WB_8B_fixed: case AArch64::LD3R_WB_8B_register: - case AArch64::LD3R_WB_4H_fixed: case AArch64::LD3R_WB_4H_register: - case AArch64::LD3R_WB_2S_fixed: case AArch64::LD3R_WB_2S_register: - case AArch64::LD3R_WB_1D_fixed: case AArch64::LD3R_WB_1D_register: { - switch (Opc) { - case AArch64::LD3R_WB_8B_fixed: case AArch64::LD3R_WB_8B_register: - TransferBytes = 3; break; - case AArch64::LD3R_WB_4H_fixed: case AArch64::LD3R_WB_4H_register: - TransferBytes = 6; break; - case AArch64::LD3R_WB_2S_fixed: case AArch64::LD3R_WB_2S_register: - TransferBytes = 12; break; - case AArch64::LD3R_WB_1D_fixed: case AArch64::LD3R_WB_1D_register: - TransferBytes = 24; break; - } - Is64bitVec = true; - IsLoadDup = true; - NumVecs = 3; - break; - } - - case AArch64::LD3R_WB_16B_fixed: case AArch64::LD3R_WB_16B_register: - case AArch64::LD3R_WB_4S_fixed: case AArch64::LD3R_WB_8H_register: - case AArch64::LD3R_WB_8H_fixed: case AArch64::LD3R_WB_4S_register: - case AArch64::LD3R_WB_2D_fixed: case AArch64::LD3R_WB_2D_register: { - switch (Opc) { - case AArch64::LD3R_WB_16B_fixed: case AArch64::LD3R_WB_16B_register: - TransferBytes = 3; break; - case AArch64::LD3R_WB_8H_fixed: case AArch64::LD3R_WB_8H_register: - TransferBytes = 6; break; - case AArch64::LD3R_WB_4S_fixed: case AArch64::LD3R_WB_4S_register: - TransferBytes = 12; break; - case AArch64::LD3R_WB_2D_fixed: case AArch64::LD3R_WB_2D_register: - TransferBytes = 24; break; - } - IsLoadDup = true; - NumVecs = 3; - break; - } - - case AArch64::LD4R_WB_8B_fixed: case AArch64::LD4R_WB_8B_register: - case AArch64::LD4R_WB_4H_fixed: case AArch64::LD4R_WB_4H_register: - case AArch64::LD4R_WB_2S_fixed: case AArch64::LD4R_WB_2S_register: - case AArch64::LD4R_WB_1D_fixed: case AArch64::LD4R_WB_1D_register: { - switch (Opc) { - case AArch64::LD4R_WB_8B_fixed: case AArch64::LD4R_WB_8B_register: - TransferBytes = 4; break; - case AArch64::LD4R_WB_4H_fixed: case AArch64::LD4R_WB_4H_register: - TransferBytes = 8; break; - case AArch64::LD4R_WB_2S_fixed: case AArch64::LD4R_WB_2S_register: - TransferBytes = 16; break; - case AArch64::LD4R_WB_1D_fixed: case AArch64::LD4R_WB_1D_register: - TransferBytes = 32; break; - } - Is64bitVec = true; - IsLoadDup = true; - NumVecs = 4; - break; - } - - case AArch64::LD4R_WB_16B_fixed: case AArch64::LD4R_WB_16B_register: - case AArch64::LD4R_WB_4S_fixed: case AArch64::LD4R_WB_8H_register: - case AArch64::LD4R_WB_8H_fixed: case AArch64::LD4R_WB_4S_register: - case AArch64::LD4R_WB_2D_fixed: case AArch64::LD4R_WB_2D_register: { - switch (Opc) { - case AArch64::LD4R_WB_16B_fixed: case AArch64::LD4R_WB_16B_register: - TransferBytes = 4; break; - case AArch64::LD4R_WB_8H_fixed: case AArch64::LD4R_WB_8H_register: - TransferBytes = 8; break; - case AArch64::LD4R_WB_4S_fixed: case AArch64::LD4R_WB_4S_register: - TransferBytes = 16; break; - case AArch64::LD4R_WB_2D_fixed: case AArch64::LD4R_WB_2D_register: - TransferBytes = 32; break; - } - IsLoadDup = true; - NumVecs = 4; - break; - } - - case AArch64::LD1LN_WB_B_fixed: case AArch64::LD1LN_WB_B_register: - case AArch64::LD1LN_WB_H_fixed: case AArch64::LD1LN_WB_H_register: - case AArch64::LD1LN_WB_S_fixed: case AArch64::LD1LN_WB_S_register: - case AArch64::LD1LN_WB_D_fixed: case AArch64::LD1LN_WB_D_register: { - switch (Opc) { - case AArch64::LD1LN_WB_B_fixed: case AArch64::LD1LN_WB_B_register: - TransferBytes = 1; break; - case AArch64::LD1LN_WB_H_fixed: case AArch64::LD1LN_WB_H_register: - TransferBytes = 2; break; - case AArch64::LD1LN_WB_S_fixed: case AArch64::LD1LN_WB_S_register: - TransferBytes = 4; break; - case AArch64::LD1LN_WB_D_fixed: case AArch64::LD1LN_WB_D_register: - TransferBytes = 8; break; - } - IsLoad = true; - NumVecs = 1; - break; - } - - case AArch64::LD2LN_WB_B_fixed: case AArch64::LD2LN_WB_B_register: - case AArch64::LD2LN_WB_H_fixed: case AArch64::LD2LN_WB_H_register: - case AArch64::LD2LN_WB_S_fixed: case AArch64::LD2LN_WB_S_register: - case AArch64::LD2LN_WB_D_fixed: case AArch64::LD2LN_WB_D_register: { - switch (Opc) { - case AArch64::LD2LN_WB_B_fixed: case AArch64::LD2LN_WB_B_register: - TransferBytes = 2; break; - case AArch64::LD2LN_WB_H_fixed: case AArch64::LD2LN_WB_H_register: - TransferBytes = 4; break; - case AArch64::LD2LN_WB_S_fixed: case AArch64::LD2LN_WB_S_register: - TransferBytes = 8; break; - case AArch64::LD2LN_WB_D_fixed: case AArch64::LD2LN_WB_D_register: - TransferBytes = 16; break; - } - IsLoad = true; - NumVecs = 2; - break; - } - - case AArch64::LD3LN_WB_B_fixed: case AArch64::LD3LN_WB_B_register: - case AArch64::LD3LN_WB_H_fixed: case AArch64::LD3LN_WB_H_register: - case AArch64::LD3LN_WB_S_fixed: case AArch64::LD3LN_WB_S_register: - case AArch64::LD3LN_WB_D_fixed: case AArch64::LD3LN_WB_D_register: { - switch (Opc) { - case AArch64::LD3LN_WB_B_fixed: case AArch64::LD3LN_WB_B_register: - TransferBytes = 3; break; - case AArch64::LD3LN_WB_H_fixed: case AArch64::LD3LN_WB_H_register: - TransferBytes = 6; break; - case AArch64::LD3LN_WB_S_fixed: case AArch64::LD3LN_WB_S_register: - TransferBytes = 12; break; - case AArch64::LD3LN_WB_D_fixed: case AArch64::LD3LN_WB_D_register: - TransferBytes = 24; break; - } - IsLoad = true; - NumVecs = 3; - break; - } - - case AArch64::LD4LN_WB_B_fixed: case AArch64::LD4LN_WB_B_register: - case AArch64::LD4LN_WB_H_fixed: case AArch64::LD4LN_WB_H_register: - case AArch64::LD4LN_WB_S_fixed: case AArch64::LD4LN_WB_S_register: - case AArch64::LD4LN_WB_D_fixed: case AArch64::LD4LN_WB_D_register: { - switch (Opc) { - case AArch64::LD4LN_WB_B_fixed: case AArch64::LD4LN_WB_B_register: - TransferBytes = 4; break; - case AArch64::LD4LN_WB_H_fixed: case AArch64::LD4LN_WB_H_register: - TransferBytes = 8; break; - case AArch64::LD4LN_WB_S_fixed: case AArch64::LD4LN_WB_S_register: - TransferBytes = 16; break; - case AArch64::LD4LN_WB_D_fixed: case AArch64::LD4LN_WB_D_register: - TransferBytes = 32; break; - } - IsLoad = true; - NumVecs = 4; - break; - } - - case AArch64::ST1LN_WB_B_fixed: case AArch64::ST1LN_WB_B_register: - case AArch64::ST1LN_WB_H_fixed: case AArch64::ST1LN_WB_H_register: - case AArch64::ST1LN_WB_S_fixed: case AArch64::ST1LN_WB_S_register: - case AArch64::ST1LN_WB_D_fixed: case AArch64::ST1LN_WB_D_register: { - switch (Opc) { - case AArch64::ST1LN_WB_B_fixed: case AArch64::ST1LN_WB_B_register: - TransferBytes = 1; break; - case AArch64::ST1LN_WB_H_fixed: case AArch64::ST1LN_WB_H_register: - TransferBytes = 2; break; - case AArch64::ST1LN_WB_S_fixed: case AArch64::ST1LN_WB_S_register: - TransferBytes = 4; break; - case AArch64::ST1LN_WB_D_fixed: case AArch64::ST1LN_WB_D_register: - TransferBytes = 8; break; - } - NumVecs = 1; - break; - } - - case AArch64::ST2LN_WB_B_fixed: case AArch64::ST2LN_WB_B_register: - case AArch64::ST2LN_WB_H_fixed: case AArch64::ST2LN_WB_H_register: - case AArch64::ST2LN_WB_S_fixed: case AArch64::ST2LN_WB_S_register: - case AArch64::ST2LN_WB_D_fixed: case AArch64::ST2LN_WB_D_register: { - switch (Opc) { - case AArch64::ST2LN_WB_B_fixed: case AArch64::ST2LN_WB_B_register: - TransferBytes = 2; break; - case AArch64::ST2LN_WB_H_fixed: case AArch64::ST2LN_WB_H_register: - TransferBytes = 4; break; - case AArch64::ST2LN_WB_S_fixed: case AArch64::ST2LN_WB_S_register: - TransferBytes = 8; break; - case AArch64::ST2LN_WB_D_fixed: case AArch64::ST2LN_WB_D_register: - TransferBytes = 16; break; - } - NumVecs = 2; - break; - } - - case AArch64::ST3LN_WB_B_fixed: case AArch64::ST3LN_WB_B_register: - case AArch64::ST3LN_WB_H_fixed: case AArch64::ST3LN_WB_H_register: - case AArch64::ST3LN_WB_S_fixed: case AArch64::ST3LN_WB_S_register: - case AArch64::ST3LN_WB_D_fixed: case AArch64::ST3LN_WB_D_register: { - switch (Opc) { - case AArch64::ST3LN_WB_B_fixed: case AArch64::ST3LN_WB_B_register: - TransferBytes = 3; break; - case AArch64::ST3LN_WB_H_fixed: case AArch64::ST3LN_WB_H_register: - TransferBytes = 6; break; - case AArch64::ST3LN_WB_S_fixed: case AArch64::ST3LN_WB_S_register: - TransferBytes = 12; break; - case AArch64::ST3LN_WB_D_fixed: case AArch64::ST3LN_WB_D_register: - TransferBytes = 24; break; - } - NumVecs = 3; - break; - } - - case AArch64::ST4LN_WB_B_fixed: case AArch64::ST4LN_WB_B_register: - case AArch64::ST4LN_WB_H_fixed: case AArch64::ST4LN_WB_H_register: - case AArch64::ST4LN_WB_S_fixed: case AArch64::ST4LN_WB_S_register: - case AArch64::ST4LN_WB_D_fixed: case AArch64::ST4LN_WB_D_register: { - switch (Opc) { - case AArch64::ST4LN_WB_B_fixed: case AArch64::ST4LN_WB_B_register: - TransferBytes = 4; break; - case AArch64::ST4LN_WB_H_fixed: case AArch64::ST4LN_WB_H_register: - TransferBytes = 8; break; - case AArch64::ST4LN_WB_S_fixed: case AArch64::ST4LN_WB_S_register: - TransferBytes = 16; break; - case AArch64::ST4LN_WB_D_fixed: case AArch64::ST4LN_WB_D_register: - TransferBytes = 32; break; - } - NumVecs = 4; - break; - } - - default: - return MCDisassembler::Fail; - } // End of switch (Opc) - - unsigned Rt = fieldFromInstruction(Insn, 0, 5); - unsigned Rn = fieldFromInstruction(Insn, 5, 5); - unsigned Rm = fieldFromInstruction(Insn, 16, 5); - - // Decode post-index of load duplicate lane - if (IsLoadDup) { - switch (NumVecs) { - case 1: - Is64bitVec ? DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder) - : DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); - break; - case 2: - Is64bitVec ? DecodeDPairRegisterClass(Inst, Rt, Address, Decoder) - : DecodeQPairRegisterClass(Inst, Rt, Address, Decoder); - break; - case 3: - Is64bitVec ? DecodeDTripleRegisterClass(Inst, Rt, Address, Decoder) - : DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder); - break; - case 4: - Is64bitVec ? DecodeDQuadRegisterClass(Inst, Rt, Address, Decoder) - : DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder); - } - - // Decode write back register, which is equal to Rn. - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - - if (Rm == 31) // If Rm is 0x11111, add the number of transferred bytes - Inst.addOperand(MCOperand::CreateImm(TransferBytes)); - else // Decode Rm - DecodeGPR64noxzrRegisterClass(Inst, Rm, Address, Decoder); - - return MCDisassembler::Success; - } - - // Decode post-index of load/store lane - // Loads have a vector list as output. - if (IsLoad) { - switch (NumVecs) { - case 1: - DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); - break; - case 2: - DecodeQPairRegisterClass(Inst, Rt, Address, Decoder); - break; - case 3: - DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder); - break; - case 4: - DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder); - } - } - - // Decode write back register, which is equal to Rn. - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - - if (Rm == 31) // If Rm is 0x11111, add the number of transferred bytes - Inst.addOperand(MCOperand::CreateImm(TransferBytes)); - else // Decode Rm - DecodeGPR64noxzrRegisterClass(Inst, Rm, Address, Decoder); - - // Decode the source vector list. - switch (NumVecs) { - case 1: - DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); - break; - case 2: - DecodeQPairRegisterClass(Inst, Rt, Address, Decoder); - break; - case 3: - DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder); - break; - case 4: - DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder); - } - - // Decode lane - unsigned Q = fieldFromInstruction(Insn, 30, 1); - unsigned S = fieldFromInstruction(Insn, 10, 3); - unsigned lane = 0; - // Calculate the number of lanes by number of vectors and transferred bytes. - // NumLanes = 16 bytes / bytes of each lane - unsigned NumLanes = 16 / (TransferBytes / NumVecs); - switch (NumLanes) { - case 16: // A vector has 16 lanes, each lane is 1 bytes. - lane = (Q << 3) | S; - break; - case 8: - lane = (Q << 2) | (S >> 1); - break; - case 4: - lane = (Q << 1) | (S >> 2); - break; - case 2: - lane = Q; - break; - } - Inst.addOperand(MCOperand::CreateImm(lane)); - - return MCDisassembler::Success; -} - -static DecodeStatus DecodeSHLLInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder) { - unsigned Rd = fieldFromInstruction(Insn, 0, 5); - unsigned Rn = fieldFromInstruction(Insn, 5, 5); - unsigned size = fieldFromInstruction(Insn, 22, 2); - unsigned Q = fieldFromInstruction(Insn, 30, 1); - - DecodeFPR128RegisterClass(Inst, Rd, Address, Decoder); - - if(Q) - DecodeFPR128RegisterClass(Inst, Rn, Address, Decoder); - else - DecodeFPR64RegisterClass(Inst, Rn, Address, Decoder); - - switch (size) { - case 0: - Inst.addOperand(MCOperand::CreateImm(8)); - break; - case 1: - Inst.addOperand(MCOperand::CreateImm(16)); - break; - case 2: - Inst.addOperand(MCOperand::CreateImm(32)); - break; - default : - return MCDisassembler::Fail; - } - return MCDisassembler::Success; -} - diff --git a/lib/Target/AArch64/Disassembler/CMakeLists.txt b/lib/Target/AArch64/Disassembler/CMakeLists.txt deleted file mode 100644 index 21baf250af86..000000000000 --- a/lib/Target/AArch64/Disassembler/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_llvm_library(LLVMAArch64Disassembler - AArch64Disassembler.cpp - ) diff --git a/lib/Target/AArch64/Disassembler/LLVMBuild.txt b/lib/Target/AArch64/Disassembler/LLVMBuild.txt deleted file mode 100644 index 05c4ed1646b9..000000000000 --- a/lib/Target/AArch64/Disassembler/LLVMBuild.txt +++ /dev/null @@ -1,23 +0,0 @@ -;===- ./lib/Target/AArch64/Disassembler/LLVMBuild.txt ----------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = AArch64Disassembler -parent = AArch64 -required_libraries = AArch64Info AArch64Utils MC Support -add_to_library_groups = AArch64 diff --git a/lib/Target/AArch64/Disassembler/Makefile b/lib/Target/AArch64/Disassembler/Makefile deleted file mode 100644 index 5c861207f836..000000000000 --- a/lib/Target/AArch64/Disassembler/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/AArch64/Disassembler/Makefile ------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMAArch64Disassembler - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp deleted file mode 100644 index d9571238a033..000000000000 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ /dev/null @@ -1,549 +0,0 @@ -//==-- AArch64InstPrinter.cpp - Convert AArch64 MCInst to assembly syntax --==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class prints an AArch64 MCInst to a .s file. -// -//===----------------------------------------------------------------------===// - -#include "AArch64InstPrinter.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "Utils/AArch64BaseInfo.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -#define DEBUG_TYPE "asm-printer" - -#define GET_INSTRUCTION_NAME -#define PRINT_ALIAS_INSTR -#include "AArch64GenAsmWriter.inc" - -static int64_t unpackSignedImm(int BitWidth, uint64_t Value) { - assert(!(Value & ~((1ULL << BitWidth)-1)) && "immediate not n-bit"); - if (Value & (1ULL << (BitWidth - 1))) - return static_cast(Value) - (1LL << BitWidth); - else - return Value; -} - -AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) : - MCInstPrinter(MAI, MII, MRI) { - // Initialize the set of available features. - setAvailableFeatures(STI.getFeatureBits()); -} - -void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { - OS << getRegisterName(RegNo); -} - -void -AArch64InstPrinter::printOffsetSImm9Operand(const MCInst *MI, - unsigned OpNum, raw_ostream &O) { - const MCOperand &MOImm = MI->getOperand(OpNum); - int32_t Imm = unpackSignedImm(9, MOImm.getImm()); - - O << '#' << Imm; -} - -void -AArch64InstPrinter::printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O, unsigned MemSize, - unsigned RmSize) { - unsigned ExtImm = MI->getOperand(OpNum).getImm(); - unsigned OptionHi = ExtImm >> 1; - unsigned S = ExtImm & 1; - bool IsLSL = OptionHi == 1 && RmSize == 64; - - const char *Ext; - switch (OptionHi) { - case 1: - Ext = (RmSize == 32) ? "uxtw" : "lsl"; - break; - case 3: - Ext = (RmSize == 32) ? "sxtw" : "sxtx"; - break; - default: - llvm_unreachable("Incorrect Option on load/store (reg offset)"); - } - O << Ext; - - if (S) { - unsigned ShiftAmt = Log2_32(MemSize); - O << " #" << ShiftAmt; - } else if (IsLSL) { - O << " #0"; - } -} - -void -AArch64InstPrinter::printAddSubImmLSL0Operand(const MCInst *MI, - unsigned OpNum, raw_ostream &O) { - const MCOperand &Imm12Op = MI->getOperand(OpNum); - - if (Imm12Op.isImm()) { - int64_t Imm12 = Imm12Op.getImm(); - assert(Imm12 >= 0 && "Invalid immediate for add/sub imm"); - O << "#" << Imm12; - } else { - assert(Imm12Op.isExpr() && "Unexpected shift operand type"); - O << "#" << *Imm12Op.getExpr(); - } -} - -void -AArch64InstPrinter::printAddSubImmLSL12Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - - printAddSubImmLSL0Operand(MI, OpNum, O); - - O << ", lsl #12"; -} - -void -AArch64InstPrinter::printBareImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - O << MO.getImm(); -} - -template void -AArch64InstPrinter::printBFILSBOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &ImmROp = MI->getOperand(OpNum); - unsigned LSB = ImmROp.getImm() == 0 ? 0 : RegWidth - ImmROp.getImm(); - - O << '#' << LSB; -} - -void AArch64InstPrinter::printBFIWidthOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &ImmSOp = MI->getOperand(OpNum); - unsigned Width = ImmSOp.getImm() + 1; - - O << '#' << Width; -} - -void -AArch64InstPrinter::printBFXWidthOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &ImmSOp = MI->getOperand(OpNum); - const MCOperand &ImmROp = MI->getOperand(OpNum - 1); - - unsigned ImmR = ImmROp.getImm(); - unsigned ImmS = ImmSOp.getImm(); - - assert(ImmS >= ImmR && "Invalid ImmR, ImmS combination for bitfield extract"); - - O << '#' << (ImmS - ImmR + 1); -} - -void -AArch64InstPrinter::printCRxOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &CRx = MI->getOperand(OpNum); - - O << 'c' << CRx.getImm(); -} - - -void -AArch64InstPrinter::printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &ScaleOp = MI->getOperand(OpNum); - - O << '#' << (64 - ScaleOp.getImm()); -} - - -void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &o) { - const MCOperand &MOImm8 = MI->getOperand(OpNum); - - assert(MOImm8.isImm() - && "Immediate operand required for floating-point immediate inst"); - - uint32_t Imm8 = MOImm8.getImm(); - uint32_t Fraction = Imm8 & 0xf; - uint32_t Exponent = (Imm8 >> 4) & 0x7; - uint32_t Negative = (Imm8 >> 7) & 0x1; - - float Val = 1.0f + Fraction / 16.0f; - - // That is: - // 000 -> 2^1, 001 -> 2^2, 010 -> 2^3, 011 -> 2^4, - // 100 -> 2^-3, 101 -> 2^-2, 110 -> 2^-1, 111 -> 2^0 - if (Exponent & 0x4) { - Val /= 1 << (7 - Exponent); - } else { - Val *= 1 << (Exponent + 1); - } - - Val = Negative ? -Val : Val; - - o << '#' << format("%.8f", Val); -} - -void AArch64InstPrinter::printFPZeroOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &o) { - o << "#0.0"; -} - -void -AArch64InstPrinter::printCondCodeOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - - O << A64CondCodeToString(static_cast(MO.getImm())); -} - -void -AArch64InstPrinter::printInverseCondCodeOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - A64CC::CondCodes CC = - static_cast(MI->getOperand(OpNum).getImm()); - O << A64CondCodeToString(A64InvertCondCode(CC)); -} - -template void -AArch64InstPrinter::printLabelOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - - if (!MO.isImm()) { - printOperand(MI, OpNum, O); - return; - } - - // The immediate of LDR (lit) instructions is a signed 19-bit immediate, which - // is multiplied by 4 (because all A64 instructions are 32-bits wide). - uint64_t UImm = MO.getImm(); - uint64_t Sign = UImm & (1LL << (field_width - 1)); - int64_t SImm = scale * ((UImm & ~Sign) - Sign); - - O << "#" << SImm; -} - -template void -AArch64InstPrinter::printLogicalImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - uint64_t Val; - A64Imms::isLogicalImmBits(RegWidth, MO.getImm(), Val); - O << "#0x"; - O.write_hex(Val); -} - -void -AArch64InstPrinter::printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O, int MemSize) { - const MCOperand &MOImm = MI->getOperand(OpNum); - - if (MOImm.isImm()) { - uint32_t Imm = MOImm.getImm() * MemSize; - - O << "#" << Imm; - } else { - O << "#" << *MOImm.getExpr(); - } -} - -void -AArch64InstPrinter::printShiftOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O, - A64SE::ShiftExtSpecifiers Shift) { - const MCOperand &MO = MI->getOperand(OpNum); - - // LSL #0 is not printed - if (Shift == A64SE::LSL && MO.isImm() && MO.getImm() == 0) - return; - - switch (Shift) { - case A64SE::LSL: O << "lsl"; break; - case A64SE::LSR: O << "lsr"; break; - case A64SE::ASR: O << "asr"; break; - case A64SE::ROR: O << "ror"; break; - default: llvm_unreachable("Invalid shift specifier in logical instruction"); - } - - O << " #" << MO.getImm(); -} - -void -AArch64InstPrinter::printMoveWideImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &UImm16MO = MI->getOperand(OpNum); - const MCOperand &ShiftMO = MI->getOperand(OpNum + 1); - - if (UImm16MO.isImm()) { - O << '#' << UImm16MO.getImm(); - - if (ShiftMO.getImm() != 0) - O << ", lsl #" << (ShiftMO.getImm() * 16); - - return; - } - - O << "#" << *UImm16MO.getExpr(); -} - -void AArch64InstPrinter::printNamedImmOperand(const NamedImmMapper &Mapper, - const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - bool ValidName; - const MCOperand &MO = MI->getOperand(OpNum); - StringRef Name = Mapper.toString(MO.getImm(), ValidName); - - if (ValidName) - O << Name; - else - O << '#' << MO.getImm(); -} - -void -AArch64InstPrinter::printSysRegOperand(const A64SysReg::SysRegMapper &Mapper, - const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - - bool ValidName; - std::string Name = Mapper.toString(MO.getImm(), ValidName); - if (ValidName) { - O << Name; - return; - } -} - - -void AArch64InstPrinter::printRegExtendOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O, - A64SE::ShiftExtSpecifiers Ext) { - // FIXME: In principle TableGen should be able to detect this itself far more - // easily. We will only accumulate more of these hacks. - unsigned Reg0 = MI->getOperand(0).getReg(); - unsigned Reg1 = MI->getOperand(1).getReg(); - - if (isStackReg(Reg0) || isStackReg(Reg1)) { - A64SE::ShiftExtSpecifiers LSLEquiv; - - if (Reg0 == AArch64::XSP || Reg1 == AArch64::XSP) - LSLEquiv = A64SE::UXTX; - else - LSLEquiv = A64SE::UXTW; - - if (Ext == LSLEquiv) { - O << "lsl #" << MI->getOperand(OpNum).getImm(); - return; - } - } - - switch (Ext) { - case A64SE::UXTB: O << "uxtb"; break; - case A64SE::UXTH: O << "uxth"; break; - case A64SE::UXTW: O << "uxtw"; break; - case A64SE::UXTX: O << "uxtx"; break; - case A64SE::SXTB: O << "sxtb"; break; - case A64SE::SXTH: O << "sxth"; break; - case A64SE::SXTW: O << "sxtw"; break; - case A64SE::SXTX: O << "sxtx"; break; - default: llvm_unreachable("Unexpected shift type for printing"); - } - - const MCOperand &MO = MI->getOperand(OpNum); - if (MO.getImm() != 0) - O << " #" << MO.getImm(); -} - -template void -AArch64InstPrinter::printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MOImm = MI->getOperand(OpNum); - int32_t Imm = unpackSignedImm(7, MOImm.getImm()); - - O << "#" << (Imm * MemScale); -} - -void AArch64InstPrinter::printVPRRegister(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - unsigned Reg = MI->getOperand(OpNo).getReg(); - std::string Name = getRegisterName(Reg); - Name[0] = 'v'; - O << Name; -} - -void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - if (Op.isReg()) { - unsigned Reg = Op.getReg(); - O << getRegisterName(Reg); - } else if (Op.isImm()) { - O << '#' << Op.getImm(); - } else { - assert(Op.isExpr() && "unknown operand kind in printOperand"); - // If a symbolic branch target was added as a constant expression then print - // that address in hex. - const MCConstantExpr *BranchTarget = dyn_cast(Op.getExpr()); - int64_t Address; - if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { - O << "0x"; - O.write_hex(Address); - } - else { - // Otherwise, just print the expression. - O << *Op.getExpr(); - } - } -} - - -void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { - if (MI->getOpcode() == AArch64::TLSDESCCALL) { - // This is a special assembler directive which applies an - // R_AARCH64_TLSDESC_CALL to the following (BLR) instruction. It has a fixed - // form outside the normal TableGenerated scheme. - O << "\t.tlsdesccall " << *MI->getOperand(0).getExpr(); - } else if (!printAliasInstr(MI, O)) - printInstruction(MI, O); - - printAnnotation(O, Annot); -} - -template -void AArch64InstPrinter::printNeonMovImmShiftOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - - assert(MO.isImm() && - "Immediate operand required for Neon vector immediate inst."); - - bool IsLSL = false; - if (Ext == A64SE::LSL) - IsLSL = true; - else if (Ext != A64SE::MSL) - llvm_unreachable("Invalid shift specifier in movi instruction"); - - int64_t Imm = MO.getImm(); - - // MSL and LSLH accepts encoded shift amount 0 or 1. - if ((!IsLSL || (IsLSL && isHalf)) && Imm != 0 && Imm != 1) - llvm_unreachable("Invalid shift amount in movi instruction"); - - // LSH accepts encoded shift amount 0, 1, 2 or 3. - if (IsLSL && (Imm < 0 || Imm > 3)) - llvm_unreachable("Invalid shift amount in movi instruction"); - - // Print shift amount as multiple of 8 with MSL encoded shift amount - // 0 and 1 printed as 8 and 16. - if (!IsLSL) - Imm++; - Imm *= 8; - - // LSL #0 is not printed - if (IsLSL) { - if (Imm == 0) - return; - O << ", lsl"; - } else - O << ", msl"; - - O << " #" << Imm; -} - -void AArch64InstPrinter::printNeonUImm0Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &o) { - o << "#0x0"; -} - -void AArch64InstPrinter::printUImmHexOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MOUImm = MI->getOperand(OpNum); - - assert(MOUImm.isImm() && - "Immediate operand required for Neon vector immediate inst."); - - unsigned Imm = MOUImm.getImm(); - - O << "#0x"; - O.write_hex(Imm); -} - -void AArch64InstPrinter::printUImmBareOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - const MCOperand &MOUImm = MI->getOperand(OpNum); - - assert(MOUImm.isImm() - && "Immediate operand required for Neon vector immediate inst."); - - unsigned Imm = MOUImm.getImm(); - O << Imm; -} - -void AArch64InstPrinter::printNeonUImm64MaskOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - const MCOperand &MOUImm8 = MI->getOperand(OpNum); - - assert(MOUImm8.isImm() && - "Immediate operand required for Neon vector immediate bytemask inst."); - - uint32_t UImm8 = MOUImm8.getImm(); - uint64_t Mask = 0; - - // Replicates 0x00 or 0xff byte in a 64-bit vector - for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) { - if ((UImm8 >> ByteNum) & 1) - Mask |= (uint64_t)0xff << (8 * ByteNum); - } - - O << "#0x"; - O.write_hex(Mask); -} - -// If Count > 1, there are two valid kinds of vector list: -// (1) {Vn.layout, Vn+1.layout, ... , Vm.layout} -// (2) {Vn.layout - Vm.layout} -// We choose the first kind as output. -template -void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - assert(Count >= 1 && Count <= 4 && "Invalid Number of Vectors"); - - unsigned Reg = MI->getOperand(OpNum).getReg(); - std::string LayoutStr = A64VectorLayoutToString(Layout); - O << "{ "; - if (Count > 1) { // Print sub registers separately - bool IsVec64 = (Layout < A64Layout::VL_16B); - unsigned SubRegIdx = IsVec64 ? AArch64::dsub_0 : AArch64::qsub_0; - for (unsigned I = 0; I < Count; I++) { - std::string Name = getRegisterName(MRI.getSubReg(Reg, SubRegIdx++)); - Name[0] = 'v'; - O << Name << LayoutStr; - if (I != Count - 1) - O << ", "; - } - } else { // Print the register directly when NumVecs is 1. - std::string Name = getRegisterName(Reg); - Name[0] = 'v'; - O << Name << LayoutStr; - } - O << " }"; -} diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h deleted file mode 100644 index 7432755dd89b..000000000000 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h +++ /dev/null @@ -1,186 +0,0 @@ -//===-- AArch64InstPrinter.h - Convert AArch64 MCInst to assembly syntax --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class prints an AArch64 MCInst to a .s file. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64INSTPRINTER_H -#define LLVM_AARCH64INSTPRINTER_H - -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "Utils/AArch64BaseInfo.h" -#include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCSubtargetInfo.h" - -namespace llvm { - -class MCOperand; - -class AArch64InstPrinter : public MCInstPrinter { -public: - AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); - - // Autogenerated by tblgen - void printInstruction(const MCInst *MI, raw_ostream &O); - bool printAliasInstr(const MCInst *MI, raw_ostream &O); - void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, - unsigned PrintMethodIdx, raw_ostream &O); - static const char *getRegisterName(unsigned RegNo); - static const char *getInstructionName(unsigned Opcode); - - void printRegName(raw_ostream &O, unsigned RegNum) const override; - - template - void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printAddrRegExtendOperand(MI, OpNum, O, MemSize, RmSize); - } - - - void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O, unsigned MemSize, - unsigned RmSize); - - void printAddSubImmLSL0Operand(const MCInst *MI, - unsigned OpNum, raw_ostream &O); - void printAddSubImmLSL12Operand(const MCInst *MI, - unsigned OpNum, raw_ostream &O); - - void printBareImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - template - void printBFILSBOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printBFIWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printBFXWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - - void printCondCodeOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printInverseCondCodeOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - void printCRxOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - void printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o); - - void printFPZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o); - - template - void printOffsetUImm12Operand(const MCInst *MI, - unsigned OpNum, raw_ostream &o) { - printOffsetUImm12Operand(MI, OpNum, o, MemScale); - } - - void printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &o, int MemScale); - - template - void printLabelOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - template - void printLogicalImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - template - void printNamedImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printNamedImmOperand(SomeNamedImmMapper(), MI, OpNum, O); - } - - void printNamedImmOperand(const NamedImmMapper &Mapper, - const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - void printSysRegOperand(const A64SysReg::SysRegMapper &Mapper, - const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - void printMRSOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printSysRegOperand(A64SysReg::MRSMapper(), MI, OpNum, O); - } - - void printMSROperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printSysRegOperand(A64SysReg::MSRMapper(), MI, OpNum, O); - } - - void printShiftOperand(const char *name, const MCInst *MI, - unsigned OpIdx, raw_ostream &O); - - void printLSLOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - void printLSROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printShiftOperand("lsr", MI, OpNum, O); - } - void printASROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printShiftOperand("asr", MI, OpNum, O); - } - void printROROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printShiftOperand("ror", MI, OpNum, O); - } - - template - void printShiftOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printShiftOperand(MI, OpNum, O, Shift); - } - - void printShiftOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O, A64SE::ShiftExtSpecifiers Sh); - - - void printMoveWideImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - template void - printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - void printOffsetSImm9Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - void printPRFMOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - template - void printRegExtendOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printRegExtendOperand(MI, OpNum, O, EXT); - } - - void printRegExtendOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O, A64SE::ShiftExtSpecifiers Ext); - - void printVPRRegister(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; - - bool isStackReg(unsigned RegNo) { - return RegNo == AArch64::XSP || RegNo == AArch64::WSP; - } - - template - void printNeonMovImmShiftOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printNeonUImm0Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printUImmHexOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printUImmBareOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printNeonUImm64MaskOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - template - void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O); -}; -} - -#endif diff --git a/lib/Target/AArch64/InstPrinter/CMakeLists.txt b/lib/Target/AArch64/InstPrinter/CMakeLists.txt deleted file mode 100644 index 3db56e4733f5..000000000000 --- a/lib/Target/AArch64/InstPrinter/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_llvm_library(LLVMAArch64AsmPrinter - AArch64InstPrinter.cpp - ) diff --git a/lib/Target/AArch64/InstPrinter/LLVMBuild.txt b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt deleted file mode 100644 index 4836c7c45d44..000000000000 --- a/lib/Target/AArch64/InstPrinter/LLVMBuild.txt +++ /dev/null @@ -1,24 +0,0 @@ -;===- ./lib/Target/AArch64/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = AArch64AsmPrinter -parent = AArch64 -required_libraries = AArch64Utils MC Support -add_to_library_groups = AArch64 - diff --git a/lib/Target/AArch64/InstPrinter/Makefile b/lib/Target/AArch64/InstPrinter/Makefile deleted file mode 100644 index 1c36a8dea798..000000000000 --- a/lib/Target/AArch64/InstPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/AArch64/AsmPrinter/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMAArch64AsmPrinter - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/AArch64/LLVMBuild.txt b/lib/Target/AArch64/LLVMBuild.txt deleted file mode 100644 index 1b838282bd41..000000000000 --- a/lib/Target/AArch64/LLVMBuild.txt +++ /dev/null @@ -1,35 +0,0 @@ -;===- ./lib/Target/AArch64/LLVMBuild.txt -----------------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[common] -subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo Utils - -[component_0] -type = TargetGroup -name = AArch64 -parent = Target -has_asmparser = 1 -has_asmprinter = 1 -has_disassembler = 1 -has_jit = 1 - -[component_1] -type = Library -name = AArch64CodeGen -parent = AArch64 -required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info AArch64Utils Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target -add_to_library_groups = AArch64 diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp deleted file mode 100644 index e0931e420785..000000000000 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ /dev/null @@ -1,593 +0,0 @@ -//===-- AArch64AsmBackend.cpp - AArch64 Assembler Backend -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the AArch64 implementation of the MCAsmBackend class, -// which is principally concerned with relaxation of the various fixup kinds. -// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/AArch64FixupKinds.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCELFObjectWriter.h" -#include "llvm/MC/MCFixupKindInfo.h" -#include "llvm/MC/MCObjectWriter.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ELF.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -namespace { -class AArch64AsmBackend : public MCAsmBackend { - const MCSubtargetInfo* STI; -public: - AArch64AsmBackend(const Target &T, const StringRef TT) - : MCAsmBackend(), - STI(AArch64_MC::createAArch64MCSubtargetInfo(TT, "", "")) - {} - - - ~AArch64AsmBackend() { - delete STI; - } - - bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; - - virtual void processFixupValue(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) override; -}; -} // end anonymous namespace - -void AArch64AsmBackend::processFixupValue(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFixup &Fixup, - const MCFragment *DF, - const MCValue &Target, - uint64_t &Value, bool &IsResolved) { - // The ADRP instruction adds some multiple of 0x1000 to the current PC & - // ~0xfff. This means that the required offset to reach a symbol can vary by - // up to one step depending on where the ADRP is in memory. For example: - // - // ADRP x0, there - // there: - // - // If the ADRP occurs at address 0xffc then "there" will be at 0x1000 and - // we'll need that as an offset. At any other address "there" will be in the - // same page as the ADRP and the instruction should encode 0x0. Assuming the - // section isn't 0x1000-aligned, we therefore need to delegate this decision - // to the linker -- a relocation! - if ((uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_page || - (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_got_page || - (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_gottprel_page || - (uint32_t)Fixup.getKind() == AArch64::fixup_a64_tlsdesc_adr_page) - IsResolved = false; -} - - -static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value); - -namespace { - -class ELFAArch64AsmBackend : public AArch64AsmBackend { - uint8_t OSABI; - bool IsLittle; // Big or little endian -public: - ELFAArch64AsmBackend(const Target &T, const StringRef TT, - uint8_t _OSABI, bool isLittle) - : AArch64AsmBackend(T, TT), OSABI(_OSABI), IsLittle(isLittle) { } - - bool fixupNeedsRelaxation(const MCFixup &Fixup, - uint64_t Value, - const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const override; - - unsigned int getNumFixupKinds() const override { - return AArch64::NumTargetFixupKinds; - } - - const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override { - const static MCFixupKindInfo Infos[AArch64::NumTargetFixupKinds] = { -// This table *must* be in the order that the fixup_* kinds are defined in -// AArch64FixupKinds.h. -// -// Name Offset (bits) Size (bits) Flags -{ "fixup_a64_ld_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_adr_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_adr_prel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_add_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst8_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst16_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst32_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst64_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst128_lo12", 0, 32, 0 }, -{ "fixup_a64_tstbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_condbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_uncondbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_call", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_movw_uabs_g0", 0, 32, 0 }, -{ "fixup_a64_movw_uabs_g0_nc", 0, 32, 0 }, -{ "fixup_a64_movw_uabs_g1", 0, 32, 0 }, -{ "fixup_a64_movw_uabs_g1_nc", 0, 32, 0 }, -{ "fixup_a64_movw_uabs_g2", 0, 32, 0 }, -{ "fixup_a64_movw_uabs_g2_nc", 0, 32, 0 }, -{ "fixup_a64_movw_uabs_g3", 0, 32, 0 }, -{ "fixup_a64_movw_sabs_g0", 0, 32, 0 }, -{ "fixup_a64_movw_sabs_g1", 0, 32, 0 }, -{ "fixup_a64_movw_sabs_g2", 0, 32, 0 }, -{ "fixup_a64_adr_prel_got_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_ld64_got_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_movw_dtprel_g2", 0, 32, 0 }, -{ "fixup_a64_movw_dtprel_g1", 0, 32, 0 }, -{ "fixup_a64_movw_dtprel_g1_nc", 0, 32, 0 }, -{ "fixup_a64_movw_dtprel_g0", 0, 32, 0 }, -{ "fixup_a64_movw_dtprel_g0_nc", 0, 32, 0 }, -{ "fixup_a64_add_dtprel_hi12", 0, 32, 0 }, -{ "fixup_a64_add_dtprel_lo12", 0, 32, 0 }, -{ "fixup_a64_add_dtprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ldst8_dtprel_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst8_dtprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ldst16_dtprel_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst16_dtprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ldst32_dtprel_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst32_dtprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ldst64_dtprel_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst64_dtprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_movw_gottprel_g1", 0, 32, 0 }, -{ "fixup_a64_movw_gottprel_g0_nc", 0, 32, 0 }, -{ "fixup_a64_adr_gottprel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_ld64_gottprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ld_gottprel_prel19", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_movw_tprel_g2", 0, 32, 0 }, -{ "fixup_a64_movw_tprel_g1", 0, 32, 0 }, -{ "fixup_a64_movw_tprel_g1_nc", 0, 32, 0 }, -{ "fixup_a64_movw_tprel_g0", 0, 32, 0 }, -{ "fixup_a64_movw_tprel_g0_nc", 0, 32, 0 }, -{ "fixup_a64_add_tprel_hi12", 0, 32, 0 }, -{ "fixup_a64_add_tprel_lo12", 0, 32, 0 }, -{ "fixup_a64_add_tprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ldst8_tprel_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst8_tprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ldst16_tprel_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst16_tprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ldst32_tprel_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst32_tprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ldst64_tprel_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst64_tprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_tlsdesc_adr_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_tlsdesc_ld64_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_tlsdesc_add_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_tlsdesc_call", 0, 0, 0 } - }; - if (Kind < FirstTargetFixupKind) - return MCAsmBackend::getFixupKindInfo(Kind); - - assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && - "Invalid kind!"); - return Infos[Kind - FirstTargetFixupKind]; - } - - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel) const override { - unsigned NumBytes = getFixupKindInfo(Fixup.getKind()).TargetSize / 8; - Value = adjustFixupValue(Fixup.getKind(), Value); - if (!Value) return; // Doesn't change encoding. - - unsigned Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); - - // For each byte of the fragment that the fixup touches, mask in the bits - // from the fixup value. - for (unsigned i = 0; i != NumBytes; ++i) { - Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); - } - } - - bool mayNeedRelaxation(const MCInst&) const override { - return false; - } - - void relaxInstruction(const MCInst&, llvm::MCInst&) const override { - llvm_unreachable("Cannot relax instructions"); - } - - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { - return createAArch64ELFObjectWriter(OS, OSABI, IsLittle); - } -}; - -} // end anonymous namespace - -bool -ELFAArch64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, - uint64_t Value, - const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const { - // Correct for now. With all instructions 32-bit only very low-level - // considerations could make you select something which may fail. - return false; -} - - -bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { - // Can't emit NOP with size not multiple of 32-bits - if (Count % 4 != 0) - return false; - - uint64_t NumNops = Count / 4; - for (uint64_t i = 0; i != NumNops; ++i) - OW->Write32(0xd503201f); - - return true; -} - -static unsigned ADRImmBits(unsigned Value) { - unsigned lo2 = Value & 0x3; - unsigned hi19 = (Value & 0x1fffff) >> 2; - - return (hi19 << 5) | (lo2 << 29); -} - -static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { - switch (Kind) { - default: - llvm_unreachable("Unknown fixup kind!"); - case FK_Data_2: - assert((int64_t)Value >= -32768 && - (int64_t)Value <= 65536 && - "Out of range ABS16 fixup"); - return Value; - case FK_Data_4: - assert((int64_t)Value >= -(1LL << 31) && - (int64_t)Value <= (1LL << 32) - 1 && - "Out of range ABS32 fixup"); - return Value; - case FK_Data_8: - return Value; - - case AArch64::fixup_a64_ld_gottprel_prel19: - // R_AARCH64_LD_GOTTPREL_PREL19: Set a load-literal immediate to bits 1F - // FFFC of G(TPREL(S+A)) - P; check -2^20 <= X < 2^20. - case AArch64::fixup_a64_ld_prel: - // R_AARCH64_LD_PREL_LO19: Sets a load-literal (immediate) value to bits - // 1F FFFC of S+A-P, checking that -2^20 <= S+A-P < 2^20. - assert((int64_t)Value >= -(1LL << 20) && - (int64_t)Value < (1LL << 20) && "Out of range LDR (lit) fixup"); - return (Value & 0x1ffffc) << 3; - - case AArch64::fixup_a64_adr_prel: - // R_AARCH64_ADR_PREL_LO21: Sets an ADR immediate value to bits 1F FFFF of - // the result of S+A-P, checking that -2^20 <= S+A-P < 2^20. - assert((int64_t)Value >= -(1LL << 20) && - (int64_t)Value < (1LL << 20) && "Out of range ADR fixup"); - return ADRImmBits(Value & 0x1fffff); - - case AArch64::fixup_a64_adr_prel_page: - // R_AARCH64_ADR_PREL_PG_HI21: Sets an ADRP immediate value to bits 1 FFFF - // F000 of the result of the operation, checking that -2^32 <= result < - // 2^32. - assert((int64_t)Value >= -(1LL << 32) && - (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup"); - return ADRImmBits((Value & 0x1fffff000ULL) >> 12); - - case AArch64::fixup_a64_add_dtprel_hi12: - // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits - // FF F000 of DTPREL(S+A), check 0 <= X < 2^24. - case AArch64::fixup_a64_add_tprel_hi12: - // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits - // FF F000 of TPREL(S+A), check 0 <= X < 2^24. - assert((int64_t)Value >= 0 && - (int64_t)Value < (1LL << 24) && "Out of range ADD fixup"); - return (Value & 0xfff000) >> 2; - - case AArch64::fixup_a64_add_dtprel_lo12: - // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits - // FFF of DTPREL(S+A), check 0 <= X < 2^12. - case AArch64::fixup_a64_add_tprel_lo12: - // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits - // FFF of TPREL(S+A), check 0 <= X < 2^12. - assert((int64_t)Value >= 0 && - (int64_t)Value < (1LL << 12) && "Out of range ADD fixup"); - // ... fallthrough to no-checking versions ... - case AArch64::fixup_a64_add_dtprel_lo12_nc: - // R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC: Set an ADD immediate field to bits - // FFF of DTPREL(S+A) with no overflow check. - case AArch64::fixup_a64_add_tprel_lo12_nc: - // R_AARCH64_TLSLD_ADD_TPREL_LO12_NC: Set an ADD immediate field to bits - // FFF of TPREL(S+A) with no overflow check. - case AArch64::fixup_a64_tlsdesc_add_lo12_nc: - // R_AARCH64_TLSDESC_ADD_LO12_NC: Set an ADD immediate field to bits - // FFF of G(TLSDESC(S+A)), with no overflow check. - case AArch64::fixup_a64_add_lo12: - // R_AARCH64_ADD_ABS_LO12_NC: Sets an ADD immediate value to bits FFF of - // S+A, with no overflow check. - return (Value & 0xfff) << 10; - - case AArch64::fixup_a64_ldst8_dtprel_lo12: - // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF - // of DTPREL(S+A), check 0 <= X < 2^12. - case AArch64::fixup_a64_ldst8_tprel_lo12: - // R_AARCH64_TLSLE_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF - // of DTPREL(S+A), check 0 <= X < 2^12. - assert((int64_t) Value >= 0 && - (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); - // ... fallthrough to no-checking versions ... - case AArch64::fixup_a64_ldst8_dtprel_lo12_nc: - // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF - // of DTPREL(S+A), with no overflow check. - case AArch64::fixup_a64_ldst8_tprel_lo12_nc: - // R_AARCH64_TLSLD_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF - // of TPREL(S+A), with no overflow check. - case AArch64::fixup_a64_ldst8_lo12: - // R_AARCH64_LDST8_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFF - // of S+A, with no overflow check. - return (Value & 0xfff) << 10; - - case AArch64::fixup_a64_ldst16_dtprel_lo12: - // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE - // of DTPREL(S+A), check 0 <= X < 2^12. - case AArch64::fixup_a64_ldst16_tprel_lo12: - // R_AARCH64_TLSLE_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE - // of DTPREL(S+A), check 0 <= X < 2^12. - assert((int64_t) Value >= 0 && - (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); - // ... fallthrough to no-checking versions ... - case AArch64::fixup_a64_ldst16_dtprel_lo12_nc: - // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE - // of DTPREL(S+A), with no overflow check. - case AArch64::fixup_a64_ldst16_tprel_lo12_nc: - // R_AARCH64_TLSLD_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE - // of TPREL(S+A), with no overflow check. - case AArch64::fixup_a64_ldst16_lo12: - // R_AARCH64_LDST16_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFE - // of S+A, with no overflow check. - return (Value & 0xffe) << 9; - - case AArch64::fixup_a64_ldst32_dtprel_lo12: - // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC - // of DTPREL(S+A), check 0 <= X < 2^12. - case AArch64::fixup_a64_ldst32_tprel_lo12: - // R_AARCH64_TLSLE_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC - // of DTPREL(S+A), check 0 <= X < 2^12. - assert((int64_t) Value >= 0 && - (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); - // ... fallthrough to no-checking versions ... - case AArch64::fixup_a64_ldst32_dtprel_lo12_nc: - // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC - // of DTPREL(S+A), with no overflow check. - case AArch64::fixup_a64_ldst32_tprel_lo12_nc: - // R_AARCH64_TLSLD_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC - // of TPREL(S+A), with no overflow check. - case AArch64::fixup_a64_ldst32_lo12: - // R_AARCH64_LDST32_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFC - // of S+A, with no overflow check. - return (Value & 0xffc) << 8; - - case AArch64::fixup_a64_ldst64_dtprel_lo12: - // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8 - // of DTPREL(S+A), check 0 <= X < 2^12. - case AArch64::fixup_a64_ldst64_tprel_lo12: - // R_AARCH64_TLSLE_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8 - // of DTPREL(S+A), check 0 <= X < 2^12. - assert((int64_t) Value >= 0 && - (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); - // ... fallthrough to no-checking versions ... - case AArch64::fixup_a64_ldst64_dtprel_lo12_nc: - // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8 - // of DTPREL(S+A), with no overflow check. - case AArch64::fixup_a64_ldst64_tprel_lo12_nc: - // R_AARCH64_TLSLD_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8 - // of TPREL(S+A), with no overflow check. - case AArch64::fixup_a64_ldst64_lo12: - // R_AARCH64_LDST64_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF8 - // of S+A, with no overflow check. - return (Value & 0xff8) << 7; - - case AArch64::fixup_a64_ldst128_lo12: - // R_AARCH64_LDST128_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF0 - // of S+A, with no overflow check. - return (Value & 0xff0) << 6; - - case AArch64::fixup_a64_movw_uabs_g0: - // R_AARCH64_MOVW_UABS_G0: Sets a MOVZ immediate field to bits FFFF of S+A - // with a check that S+A < 2^16 - assert(Value <= 0xffff && "Out of range move wide fixup"); - return (Value & 0xffff) << 5; - - case AArch64::fixup_a64_movw_dtprel_g0_nc: - // R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC: Sets a MOVK immediate field to bits - // FFFF of DTPREL(S+A) with no overflow check. - case AArch64::fixup_a64_movw_gottprel_g0_nc: - // R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC: Sets a MOVK immediate field to bits - // FFFF of G(TPREL(S+A)) - GOT with no overflow check. - case AArch64::fixup_a64_movw_tprel_g0_nc: - // R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: Sets a MOVK immediate field to bits - // FFFF of TPREL(S+A) with no overflow check. - case AArch64::fixup_a64_movw_uabs_g0_nc: - // R_AARCH64_MOVW_UABS_G0_NC: Sets a MOVK immediate field to bits FFFF of - // S+A with no overflow check. - return (Value & 0xffff) << 5; - - case AArch64::fixup_a64_movw_uabs_g1: - // R_AARCH64_MOVW_UABS_G1: Sets a MOVZ immediate field to bits FFFF0000 of - // S+A with a check that S+A < 2^32 - assert(Value <= 0xffffffffull && "Out of range move wide fixup"); - return ((Value >> 16) & 0xffff) << 5; - - case AArch64::fixup_a64_movw_dtprel_g1_nc: - // R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC: Set a MOVK immediate field - // to bits FFFF0000 of DTPREL(S+A), with no overflow check. - case AArch64::fixup_a64_movw_tprel_g1_nc: - // R_AARCH64_TLSLD_MOVW_TPREL_G1_NC: Set a MOVK immediate field - // to bits FFFF0000 of TPREL(S+A), with no overflow check. - case AArch64::fixup_a64_movw_uabs_g1_nc: - // R_AARCH64_MOVW_UABS_G1_NC: Sets a MOVK immediate field to bits - // FFFF0000 of S+A with no overflow check. - return ((Value >> 16) & 0xffff) << 5; - - case AArch64::fixup_a64_movw_uabs_g2: - // R_AARCH64_MOVW_UABS_G2: Sets a MOVZ immediate field to bits FFFF 0000 - // 0000 of S+A with a check that S+A < 2^48 - assert(Value <= 0xffffffffffffull && "Out of range move wide fixup"); - return ((Value >> 32) & 0xffff) << 5; - - case AArch64::fixup_a64_movw_uabs_g2_nc: - // R_AARCH64_MOVW_UABS_G2: Sets a MOVK immediate field to bits FFFF 0000 - // 0000 of S+A with no overflow check. - return ((Value >> 32) & 0xffff) << 5; - - case AArch64::fixup_a64_movw_uabs_g3: - // R_AARCH64_MOVW_UABS_G3: Sets a MOVZ immediate field to bits FFFF 0000 - // 0000 0000 of S+A (no overflow check needed) - return ((Value >> 48) & 0xffff) << 5; - - case AArch64::fixup_a64_movw_dtprel_g0: - // R_AARCH64_TLSLD_MOVW_DTPREL_G0: Set a MOV[NZ] immediate field - // to bits FFFF of DTPREL(S+A). - case AArch64::fixup_a64_movw_tprel_g0: - // R_AARCH64_TLSLE_MOVW_TPREL_G0: Set a MOV[NZ] immediate field to - // bits FFFF of TPREL(S+A). - case AArch64::fixup_a64_movw_sabs_g0: { - // R_AARCH64_MOVW_SABS_G0: Sets MOV[NZ] immediate field using bits FFFF of - // S+A (see notes below); check -2^16 <= S+A < 2^16. (notes say that we - // should convert between MOVN and MOVZ to achieve our goals). - int64_t Signed = Value; - assert(Signed >= -(1LL << 16) && Signed < (1LL << 16) - && "Out of range move wide fixup"); - if (Signed >= 0) { - Value = (Value & 0xffff) << 5; - // Bit 30 converts the MOVN encoding into a MOVZ - Value |= 1 << 30; - } else { - // MCCodeEmitter should have encoded a MOVN, which is fine. - Value = (~Value & 0xffff) << 5; - } - return Value; - } - - case AArch64::fixup_a64_movw_dtprel_g1: - // R_AARCH64_TLSLD_MOVW_DTPREL_G1: Set a MOV[NZ] immediate field - // to bits FFFF0000 of DTPREL(S+A). - case AArch64::fixup_a64_movw_gottprel_g1: - // R_AARCH64_TLSIE_MOVW_GOTTPREL_G1: Set a MOV[NZ] immediate field - // to bits FFFF0000 of G(TPREL(S+A)) - GOT. - case AArch64::fixup_a64_movw_tprel_g1: - // R_AARCH64_TLSLE_MOVW_TPREL_G1: Set a MOV[NZ] immediate field to - // bits FFFF0000 of TPREL(S+A). - case AArch64::fixup_a64_movw_sabs_g1: { - // R_AARCH64_MOVW_SABS_G1: Sets MOV[NZ] immediate field using bits FFFF 0000 - // of S+A (see notes below); check -2^32 <= S+A < 2^32. (notes say that we - // should convert between MOVN and MOVZ to achieve our goals). - int64_t Signed = Value; - assert(Signed >= -(1LL << 32) && Signed < (1LL << 32) - && "Out of range move wide fixup"); - if (Signed >= 0) { - Value = ((Value >> 16) & 0xffff) << 5; - // Bit 30 converts the MOVN encoding into a MOVZ - Value |= 1 << 30; - } else { - Value = ((~Value >> 16) & 0xffff) << 5; - } - return Value; - } - - case AArch64::fixup_a64_movw_dtprel_g2: - // R_AARCH64_TLSLD_MOVW_DTPREL_G2: Set a MOV[NZ] immediate field - // to bits FFFF 0000 0000 of DTPREL(S+A). - case AArch64::fixup_a64_movw_tprel_g2: - // R_AARCH64_TLSLE_MOVW_TPREL_G2: Set a MOV[NZ] immediate field to - // bits FFFF 0000 0000 of TPREL(S+A). - case AArch64::fixup_a64_movw_sabs_g2: { - // R_AARCH64_MOVW_SABS_G2: Sets MOV[NZ] immediate field using bits FFFF 0000 - // 0000 of S+A (see notes below); check -2^48 <= S+A < 2^48. (notes say that - // we should convert between MOVN and MOVZ to achieve our goals). - int64_t Signed = Value; - assert(Signed >= -(1LL << 48) && Signed < (1LL << 48) - && "Out of range move wide fixup"); - if (Signed >= 0) { - Value = ((Value >> 32) & 0xffff) << 5; - // Bit 30 converts the MOVN encoding into a MOVZ - Value |= 1 << 30; - } else { - Value = ((~Value >> 32) & 0xffff) << 5; - } - return Value; - } - - case AArch64::fixup_a64_tstbr: - // R_AARCH64_TSTBR14: Sets the immediate field of a TBZ/TBNZ instruction to - // bits FFFC of S+A-P, checking -2^15 <= S+A-P < 2^15. - assert((int64_t)Value >= -(1LL << 15) && - (int64_t)Value < (1LL << 15) && "Out of range TBZ/TBNZ fixup"); - return (Value & 0xfffc) << (5 - 2); - - case AArch64::fixup_a64_condbr: - // R_AARCH64_CONDBR19: Sets the immediate field of a conditional branch - // instruction to bits 1FFFFC of S+A-P, checking -2^20 <= S+A-P < 2^20. - assert((int64_t)Value >= -(1LL << 20) && - (int64_t)Value < (1LL << 20) && "Out of range B.cond fixup"); - return (Value & 0x1ffffc) << (5 - 2); - - case AArch64::fixup_a64_uncondbr: - // R_AARCH64_JUMP26 same as below (except to a linker, possibly). - case AArch64::fixup_a64_call: - // R_AARCH64_CALL26: Sets a CALL immediate field to bits FFFFFFC of S+A-P, - // checking that -2^27 <= S+A-P < 2^27. - assert((int64_t)Value >= -(1LL << 27) && - (int64_t)Value < (1LL << 27) && "Out of range branch fixup"); - return (Value & 0xffffffc) >> 2; - - case AArch64::fixup_a64_adr_gottprel_page: - // R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: Set an ADRP immediate field to bits - // 1FFFFF000 of Page(G(TPREL(S+A))) - Page(P); check -2^32 <= X < 2^32. - case AArch64::fixup_a64_tlsdesc_adr_page: - // R_AARCH64_TLSDESC_ADR_PAGE: Set an ADRP immediate field to bits 1FFFFF000 - // of Page(G(TLSDESC(S+A))) - Page(P); check -2^32 <= X < 2^32. - case AArch64::fixup_a64_adr_prel_got_page: - // R_AARCH64_ADR_GOT_PAGE: Sets the immediate value of an ADRP to bits - // 1FFFFF000 of the operation, checking that -2^32 < Page(G(S))-Page(GOT) < - // 2^32. - assert((int64_t)Value >= -(1LL << 32) && - (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup"); - return ADRImmBits((Value & 0x1fffff000ULL) >> 12); - - case AArch64::fixup_a64_ld64_gottprel_lo12_nc: - // R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: Set an LD offset field to bits FF8 - // of X, with no overflow check. Check that X & 7 == 0. - case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc: - // R_AARCH64_TLSDESC_LD64_LO12_NC: Set an LD offset field to bits FF8 of - // G(TLSDESC(S+A)), with no overflow check. Check that X & 7 == 0. - case AArch64::fixup_a64_ld64_got_lo12_nc: - // R_AARCH64_LD64_GOT_LO12_NC: Sets the LD/ST immediate field to bits FF8 of - // G(S) with no overflow check. Check X & 7 == 0 - assert(((int64_t)Value & 7) == 0 && "Misaligned fixup"); - return (Value & 0xff8) << 7; - - case AArch64::fixup_a64_tlsdesc_call: - // R_AARCH64_TLSDESC_CALL: For relaxation only. - return 0; - } -} - -MCAsmBackend * -llvm::createAArch64leAsmBackend(const Target &T, const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU) { - Triple TheTriple(TT); - return new ELFAArch64AsmBackend(T, TT, TheTriple.getOS(), /*isLittle*/ true); -} - -MCAsmBackend * -llvm::createAArch64beAsmBackend(const Target &T, const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU) { - Triple TheTriple(TT); - return new ELFAArch64AsmBackend(T, TT, TheTriple.getOS(), /*isLittle*/ false); -} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp deleted file mode 100644 index a5fe9141e655..000000000000 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ /dev/null @@ -1,291 +0,0 @@ -//===-- AArch64ELFObjectWriter.cpp - AArch64 ELF Writer -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file handles ELF-specific object emission, converting LLVM's internal -// fixups into the appropriate relocations. -// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/AArch64FixupKinds.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "llvm/MC/MCELFObjectWriter.h" -#include "llvm/MC/MCValue.h" -#include "llvm/Support/ErrorHandling.h" - -using namespace llvm; - -namespace { -class AArch64ELFObjectWriter : public MCELFObjectTargetWriter { -public: - AArch64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian); - - virtual ~AArch64ELFObjectWriter(); - -protected: - unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsPCRel) const override; - -private: -}; -} - -AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian) - : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64, - /*HasRelocationAddend*/ true) -{} - -AArch64ELFObjectWriter::~AArch64ELFObjectWriter() -{} - -unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target, - const MCFixup &Fixup, - bool IsPCRel) const { - unsigned Type; - if (IsPCRel) { - switch ((unsigned)Fixup.getKind()) { - default: - llvm_unreachable("Unimplemented fixup -> relocation"); - case FK_Data_8: - return ELF::R_AARCH64_PREL64; - case FK_Data_4: - return ELF::R_AARCH64_PREL32; - case FK_Data_2: - return ELF::R_AARCH64_PREL16; - case AArch64::fixup_a64_ld_prel: - Type = ELF::R_AARCH64_LD_PREL_LO19; - break; - case AArch64::fixup_a64_adr_prel: - Type = ELF::R_AARCH64_ADR_PREL_LO21; - break; - case AArch64::fixup_a64_adr_prel_page: - Type = ELF::R_AARCH64_ADR_PREL_PG_HI21; - break; - case AArch64::fixup_a64_adr_prel_got_page: - Type = ELF::R_AARCH64_ADR_GOT_PAGE; - break; - case AArch64::fixup_a64_tstbr: - Type = ELF::R_AARCH64_TSTBR14; - break; - case AArch64::fixup_a64_condbr: - Type = ELF::R_AARCH64_CONDBR19; - break; - case AArch64::fixup_a64_uncondbr: - Type = ELF::R_AARCH64_JUMP26; - break; - case AArch64::fixup_a64_call: - Type = ELF::R_AARCH64_CALL26; - break; - case AArch64::fixup_a64_adr_gottprel_page: - Type = ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21; - break; - case AArch64::fixup_a64_ld_gottprel_prel19: - Type = ELF::R_AARCH64_TLSIE_LD_GOTTPREL_PREL19; - break; - case AArch64::fixup_a64_tlsdesc_adr_page: - Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE; - break; - } - } else { - switch ((unsigned)Fixup.getKind()) { - default: - llvm_unreachable("Unimplemented fixup -> relocation"); - case FK_Data_8: - return ELF::R_AARCH64_ABS64; - case FK_Data_4: - return ELF::R_AARCH64_ABS32; - case FK_Data_2: - return ELF::R_AARCH64_ABS16; - case AArch64::fixup_a64_add_lo12: - Type = ELF::R_AARCH64_ADD_ABS_LO12_NC; - break; - case AArch64::fixup_a64_ld64_got_lo12_nc: - Type = ELF::R_AARCH64_LD64_GOT_LO12_NC; - break; - case AArch64::fixup_a64_ldst8_lo12: - Type = ELF::R_AARCH64_LDST8_ABS_LO12_NC; - break; - case AArch64::fixup_a64_ldst16_lo12: - Type = ELF::R_AARCH64_LDST16_ABS_LO12_NC; - break; - case AArch64::fixup_a64_ldst32_lo12: - Type = ELF::R_AARCH64_LDST32_ABS_LO12_NC; - break; - case AArch64::fixup_a64_ldst64_lo12: - Type = ELF::R_AARCH64_LDST64_ABS_LO12_NC; - break; - case AArch64::fixup_a64_ldst128_lo12: - Type = ELF::R_AARCH64_LDST128_ABS_LO12_NC; - break; - case AArch64::fixup_a64_movw_uabs_g0: - Type = ELF::R_AARCH64_MOVW_UABS_G0; - break; - case AArch64::fixup_a64_movw_uabs_g0_nc: - Type = ELF::R_AARCH64_MOVW_UABS_G0_NC; - break; - case AArch64::fixup_a64_movw_uabs_g1: - Type = ELF::R_AARCH64_MOVW_UABS_G1; - break; - case AArch64::fixup_a64_movw_uabs_g1_nc: - Type = ELF::R_AARCH64_MOVW_UABS_G1_NC; - break; - case AArch64::fixup_a64_movw_uabs_g2: - Type = ELF::R_AARCH64_MOVW_UABS_G2; - break; - case AArch64::fixup_a64_movw_uabs_g2_nc: - Type = ELF::R_AARCH64_MOVW_UABS_G2_NC; - break; - case AArch64::fixup_a64_movw_uabs_g3: - Type = ELF::R_AARCH64_MOVW_UABS_G3; - break; - case AArch64::fixup_a64_movw_sabs_g0: - Type = ELF::R_AARCH64_MOVW_SABS_G0; - break; - case AArch64::fixup_a64_movw_sabs_g1: - Type = ELF::R_AARCH64_MOVW_SABS_G1; - break; - case AArch64::fixup_a64_movw_sabs_g2: - Type = ELF::R_AARCH64_MOVW_SABS_G2; - break; - - // TLS Local-dynamic block - case AArch64::fixup_a64_movw_dtprel_g2: - Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2; - break; - case AArch64::fixup_a64_movw_dtprel_g1: - Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1; - break; - case AArch64::fixup_a64_movw_dtprel_g1_nc: - Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC; - break; - case AArch64::fixup_a64_movw_dtprel_g0: - Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0; - break; - case AArch64::fixup_a64_movw_dtprel_g0_nc: - Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC; - break; - case AArch64::fixup_a64_add_dtprel_hi12: - Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_HI12; - break; - case AArch64::fixup_a64_add_dtprel_lo12: - Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12; - break; - case AArch64::fixup_a64_add_dtprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC; - break; - case AArch64::fixup_a64_ldst8_dtprel_lo12: - Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12; - break; - case AArch64::fixup_a64_ldst8_dtprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC; - break; - case AArch64::fixup_a64_ldst16_dtprel_lo12: - Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12; - break; - case AArch64::fixup_a64_ldst16_dtprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC; - break; - case AArch64::fixup_a64_ldst32_dtprel_lo12: - Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12; - break; - case AArch64::fixup_a64_ldst32_dtprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC; - break; - case AArch64::fixup_a64_ldst64_dtprel_lo12: - Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12; - break; - case AArch64::fixup_a64_ldst64_dtprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC; - break; - - // TLS initial-exec block - case AArch64::fixup_a64_movw_gottprel_g1: - Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1; - break; - case AArch64::fixup_a64_movw_gottprel_g0_nc: - Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC; - break; - case AArch64::fixup_a64_ld64_gottprel_lo12_nc: - Type = ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC; - break; - - // TLS local-exec block - case AArch64::fixup_a64_movw_tprel_g2: - Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G2; - break; - case AArch64::fixup_a64_movw_tprel_g1: - Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1; - break; - case AArch64::fixup_a64_movw_tprel_g1_nc: - Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1_NC; - break; - case AArch64::fixup_a64_movw_tprel_g0: - Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0; - break; - case AArch64::fixup_a64_movw_tprel_g0_nc: - Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0_NC; - break; - case AArch64::fixup_a64_add_tprel_hi12: - Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_HI12; - break; - case AArch64::fixup_a64_add_tprel_lo12: - Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12; - break; - case AArch64::fixup_a64_add_tprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC; - break; - case AArch64::fixup_a64_ldst8_tprel_lo12: - Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12; - break; - case AArch64::fixup_a64_ldst8_tprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC; - break; - case AArch64::fixup_a64_ldst16_tprel_lo12: - Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12; - break; - case AArch64::fixup_a64_ldst16_tprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC; - break; - case AArch64::fixup_a64_ldst32_tprel_lo12: - Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12; - break; - case AArch64::fixup_a64_ldst32_tprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC; - break; - case AArch64::fixup_a64_ldst64_tprel_lo12: - Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12; - break; - case AArch64::fixup_a64_ldst64_tprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC; - break; - - // TLS general-dynamic block - case AArch64::fixup_a64_tlsdesc_adr_page: - Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE; - break; - case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc: - Type = ELF::R_AARCH64_TLSDESC_LD64_LO12_NC; - break; - case AArch64::fixup_a64_tlsdesc_add_lo12_nc: - Type = ELF::R_AARCH64_TLSDESC_ADD_LO12_NC; - break; - case AArch64::fixup_a64_tlsdesc_call: - Type = ELF::R_AARCH64_TLSDESC_CALL; - break; - } - } - - return Type; -} - -MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_ostream &OS, - uint8_t OSABI, - bool IsLittleEndian) { - MCELFObjectTargetWriter *MOTW = new AArch64ELFObjectWriter(OSABI, IsLittleEndian); - return createELFObjectWriter(MOTW, OS, IsLittleEndian); -} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp deleted file mode 100644 index df2cb3837580..000000000000 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ /dev/null @@ -1,161 +0,0 @@ -//===- lib/MC/AArch64ELFStreamer.cpp - ELF Object Output for AArch64 ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file assembles .s files and emits AArch64 ELF .o object files. Different -// from generic ELF streamer in emitting mapping symbols ($x and $d) to delimit -// regions of data and code. -// -//===----------------------------------------------------------------------===// - -#include "llvm/MC/MCELFStreamer.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Twine.h" -#include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCELF.h" -#include "llvm/MC/MCELFStreamer.h" -#include "llvm/MC/MCELFSymbolFlags.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCObjectStreamer.h" -#include "llvm/MC/MCSection.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCValue.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -namespace { - -/// Extend the generic ELFStreamer class so that it can emit mapping symbols at -/// the appropriate points in the object files. These symbols are defined in the -/// AArch64 ELF ABI: -/// infocenter.arm.com/help/topic/com.arm.doc.ihi0056a/IHI0056A_aaelf64.pdf -/// -/// In brief: $x or $d should be emitted at the start of each contiguous region -/// of A64 code or data in a section. In practice, this emission does not rely -/// on explicit assembler directives but on inherent properties of the -/// directives doing the emission (e.g. ".byte" is data, "add x0, x0, x0" an -/// instruction). -/// -/// As a result this system is orthogonal to the DataRegion infrastructure used -/// by MachO. Beware! -class AArch64ELFStreamer : public MCELFStreamer { -public: - AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, - MCCodeEmitter *Emitter) - : MCELFStreamer(Context, TAB, OS, Emitter), MappingSymbolCounter(0), - LastEMS(EMS_None) {} - - ~AArch64ELFStreamer() {} - - void ChangeSection(const MCSection *Section, - const MCExpr *Subsection) override { - // We have to keep track of the mapping symbol state of any sections we - // use. Each one should start off as EMS_None, which is provided as the - // default constructor by DenseMap::lookup. - LastMappingSymbols[getPreviousSection().first] = LastEMS; - LastEMS = LastMappingSymbols.lookup(Section); - - MCELFStreamer::ChangeSection(Section, Subsection); - } - - /// This function is the one used to emit instruction data into the ELF - /// streamer. We override it to add the appropriate mapping symbol if - /// necessary. - void EmitInstruction(const MCInst& Inst, - const MCSubtargetInfo &STI) override { - EmitA64MappingSymbol(); - MCELFStreamer::EmitInstruction(Inst, STI); - } - - /// This is one of the functions used to emit data into an ELF section, so the - /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d) - /// if necessary. - void EmitBytes(StringRef Data) override { - EmitDataMappingSymbol(); - MCELFStreamer::EmitBytes(Data); - } - - /// This is one of the functions used to emit data into an ELF section, so the - /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d) - /// if necessary. - void EmitValueImpl(const MCExpr *Value, unsigned Size, - const SMLoc &Loc) override { - EmitDataMappingSymbol(); - MCELFStreamer::EmitValueImpl(Value, Size, Loc); - } - -private: - enum ElfMappingSymbol { - EMS_None, - EMS_A64, - EMS_Data - }; - - void EmitDataMappingSymbol() { - if (LastEMS == EMS_Data) return; - EmitMappingSymbol("$d"); - LastEMS = EMS_Data; - } - - void EmitA64MappingSymbol() { - if (LastEMS == EMS_A64) return; - EmitMappingSymbol("$x"); - LastEMS = EMS_A64; - } - - void EmitMappingSymbol(StringRef Name) { - MCSymbol *Start = getContext().CreateTempSymbol(); - EmitLabel(Start); - - MCSymbol *Symbol = - getContext().GetOrCreateSymbol(Name + "." + - Twine(MappingSymbolCounter++)); - - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); - MCELF::SetType(SD, ELF::STT_NOTYPE); - MCELF::SetBinding(SD, ELF::STB_LOCAL); - SD.setExternal(false); - AssignSection(Symbol, getCurrentSection().first); - - const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext()); - Symbol->setVariableValue(Value); - } - - int64_t MappingSymbolCounter; - - DenseMap LastMappingSymbols; - ElfMappingSymbol LastEMS; - - /// @} -}; -} - -namespace llvm { - MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll, bool NoExecStack) { - AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter); - if (RelaxAll) - S->getAssembler().setRelaxAll(true); - if (NoExecStack) - S->getAssembler().setNoExecStack(true); - return S; - } -} - - diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h deleted file mode 100644 index 5a89ca50cee8..000000000000 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h +++ /dev/null @@ -1,27 +0,0 @@ -//===-- AArch64ELFStreamer.h - ELF Streamer for AArch64 ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements ELF streamer information for the AArch64 backend. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64_ELF_STREAMER_H -#define LLVM_AARCH64_ELF_STREAMER_H - -#include "llvm/MC/MCELFStreamer.h" - -namespace llvm { - - MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, - MCCodeEmitter *Emitter, - bool RelaxAll, bool NoExecStack); -} - -#endif // AArch64_ELF_STREAMER_H diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h deleted file mode 100644 index eeb122d38494..000000000000 --- a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h +++ /dev/null @@ -1,113 +0,0 @@ -//=- AArch64/AArch64FixupKinds.h - AArch64 Specific Fixup Entries -*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes the LLVM fixups applied to MCInsts in the AArch64 -// backend. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64_AARCH64FIXUPKINDS_H -#define LLVM_AARCH64_AARCH64FIXUPKINDS_H - -#include "llvm/MC/MCFixup.h" - -namespace llvm { - namespace AArch64 { - enum Fixups { - fixup_a64_ld_prel = FirstTargetFixupKind, - fixup_a64_adr_prel, - fixup_a64_adr_prel_page, - - fixup_a64_add_lo12, - - fixup_a64_ldst8_lo12, - fixup_a64_ldst16_lo12, - fixup_a64_ldst32_lo12, - fixup_a64_ldst64_lo12, - fixup_a64_ldst128_lo12, - - fixup_a64_tstbr, - fixup_a64_condbr, - fixup_a64_uncondbr, - fixup_a64_call, - - fixup_a64_movw_uabs_g0, - fixup_a64_movw_uabs_g0_nc, - fixup_a64_movw_uabs_g1, - fixup_a64_movw_uabs_g1_nc, - fixup_a64_movw_uabs_g2, - fixup_a64_movw_uabs_g2_nc, - fixup_a64_movw_uabs_g3, - - fixup_a64_movw_sabs_g0, - fixup_a64_movw_sabs_g1, - fixup_a64_movw_sabs_g2, - - fixup_a64_adr_prel_got_page, - fixup_a64_ld64_got_lo12_nc, - - // Produce offsets relative to the module's dynamic TLS area. - fixup_a64_movw_dtprel_g2, - fixup_a64_movw_dtprel_g1, - fixup_a64_movw_dtprel_g1_nc, - fixup_a64_movw_dtprel_g0, - fixup_a64_movw_dtprel_g0_nc, - fixup_a64_add_dtprel_hi12, - fixup_a64_add_dtprel_lo12, - fixup_a64_add_dtprel_lo12_nc, - fixup_a64_ldst8_dtprel_lo12, - fixup_a64_ldst8_dtprel_lo12_nc, - fixup_a64_ldst16_dtprel_lo12, - fixup_a64_ldst16_dtprel_lo12_nc, - fixup_a64_ldst32_dtprel_lo12, - fixup_a64_ldst32_dtprel_lo12_nc, - fixup_a64_ldst64_dtprel_lo12, - fixup_a64_ldst64_dtprel_lo12_nc, - - // Produce the GOT entry containing a variable's address in TLS's - // initial-exec mode. - fixup_a64_movw_gottprel_g1, - fixup_a64_movw_gottprel_g0_nc, - fixup_a64_adr_gottprel_page, - fixup_a64_ld64_gottprel_lo12_nc, - fixup_a64_ld_gottprel_prel19, - - // Produce offsets relative to the thread pointer: TPIDR_EL0. - fixup_a64_movw_tprel_g2, - fixup_a64_movw_tprel_g1, - fixup_a64_movw_tprel_g1_nc, - fixup_a64_movw_tprel_g0, - fixup_a64_movw_tprel_g0_nc, - fixup_a64_add_tprel_hi12, - fixup_a64_add_tprel_lo12, - fixup_a64_add_tprel_lo12_nc, - fixup_a64_ldst8_tprel_lo12, - fixup_a64_ldst8_tprel_lo12_nc, - fixup_a64_ldst16_tprel_lo12, - fixup_a64_ldst16_tprel_lo12_nc, - fixup_a64_ldst32_tprel_lo12, - fixup_a64_ldst32_tprel_lo12_nc, - fixup_a64_ldst64_tprel_lo12, - fixup_a64_ldst64_tprel_lo12_nc, - - // Produce the special fixups used by the general-dynamic TLS model. - fixup_a64_tlsdesc_adr_page, - fixup_a64_tlsdesc_ld64_lo12_nc, - fixup_a64_tlsdesc_add_lo12_nc, - fixup_a64_tlsdesc_call, - - - // Marker - LastTargetFixupKind, - NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind - }; - } -} - -#endif diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp deleted file mode 100644 index b090a55eb99a..000000000000 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ /dev/null @@ -1,46 +0,0 @@ -//===-- AArch64MCAsmInfo.cpp - AArch64 asm properties ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declarations of the AArch64MCAsmInfo properties. -// -//===----------------------------------------------------------------------===// - -#include "AArch64MCAsmInfo.h" -#include "llvm/ADT/Triple.h" - -using namespace llvm; - -AArch64ELFMCAsmInfo::AArch64ELFMCAsmInfo(StringRef TT) { - Triple TheTriple(TT); - if (TheTriple.getArch() == Triple::aarch64_be) - IsLittleEndian = false; - - PointerSize = 8; - - // ".comm align is in bytes but .align is pow-2." - AlignmentIsInBytes = false; - - CommentString = "//"; - Code32Directive = ".code\t32"; - - Data16bitsDirective = "\t.hword\t"; - Data32bitsDirective = "\t.word\t"; - Data64bitsDirective = "\t.xword\t"; - - HasLEB128 = true; - SupportsDebugInformation = true; - - // Exceptions handling - ExceptionsType = ExceptionHandling::DwarfCFI; - - UseIntegratedAssembler = true; -} - -// Pin the vtable to this file. -void AArch64ELFMCAsmInfo::anchor() {} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h deleted file mode 100644 index 78fd5d5b4fe0..000000000000 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h +++ /dev/null @@ -1,29 +0,0 @@ -//==-- AArch64MCAsmInfo.h - AArch64 asm properties -------------*- C++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declaration of the AArch64MCAsmInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64TARGETASMINFO_H -#define LLVM_AARCH64TARGETASMINFO_H - -#include "llvm/MC/MCAsmInfoELF.h" - -namespace llvm { - -struct AArch64ELFMCAsmInfo : public MCAsmInfoELF { - explicit AArch64ELFMCAsmInfo(StringRef TT); -private: - void anchor() override; -}; - -} // namespace llvm - -#endif diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp deleted file mode 100644 index 7ff46d71df91..000000000000 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ /dev/null @@ -1,613 +0,0 @@ -//=- AArch64/AArch64MCCodeEmitter.cpp - Convert AArch64 code to machine code =// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the AArch64MCCodeEmitter class. -// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/AArch64FixupKinds.h" -#include "MCTargetDesc/AArch64MCExpr.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "Utils/AArch64BaseInfo.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -#define DEBUG_TYPE "mccodeemitter" - -namespace { -class AArch64MCCodeEmitter : public MCCodeEmitter { - AArch64MCCodeEmitter(const AArch64MCCodeEmitter &) LLVM_DELETED_FUNCTION; - void operator=(const AArch64MCCodeEmitter &) LLVM_DELETED_FUNCTION; - MCContext &Ctx; - -public: - AArch64MCCodeEmitter(MCContext &ctx) : Ctx(ctx) {} - - ~AArch64MCCodeEmitter() {} - - unsigned getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - unsigned getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - template - unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return getOffsetUImm12OpValue(MI, OpIdx, Fixups, STI, MemSize); - } - - unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI, - int MemSize) const; - - unsigned getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - unsigned getShiftRightImm8(const MCInst &MI, unsigned Op, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getShiftRightImm16(const MCInst &MI, unsigned Op, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getShiftRightImm32(const MCInst &MI, unsigned Op, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getShiftRightImm64(const MCInst &MI, unsigned Op, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - unsigned getShiftLeftImm8(const MCInst &MI, unsigned Op, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getShiftLeftImm16(const MCInst &MI, unsigned Op, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getShiftLeftImm32(const MCInst &MI, unsigned Op, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getShiftLeftImm64(const MCInst &MI, unsigned Op, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - // Labels are handled mostly the same way: a symbol is needed, and - // just gets some fixup attached. - template - unsigned getLabelOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - unsigned getLoadLitLabelOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - - unsigned getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - - unsigned getAddressWithFixup(const MCOperand &MO, - unsigned FixupKind, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - - // getBinaryCodeForInstr - TableGen'erated function for getting the - // binary encoding for an instruction. - uint64_t getBinaryCodeForInstr(const MCInst &MI, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - /// getMachineOpValue - Return binary encoding of operand. If the machine - /// operand requires relocation, record the relocation and return zero. - unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - - void EmitByte(unsigned char C, raw_ostream &OS) const { - OS << (char)C; - } - - void EmitInstruction(uint32_t Val, raw_ostream &OS) const { - // Output the constant in little endian byte order. - for (unsigned i = 0; i != 4; ++i) { - EmitByte(Val & 0xff, OS); - Val >>= 8; - } - } - - - void EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const override; - - template unsigned - fixLoadStoreExclusive(const MCInst &MI, unsigned EncodedValue, - const MCSubtargetInfo &STI) const; - - unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue, - const MCSubtargetInfo &STI) const; - - unsigned fixMulHigh(const MCInst &MI, unsigned EncodedValue, - const MCSubtargetInfo &STI) const; - - -}; - -} // end anonymous namespace - -unsigned AArch64MCCodeEmitter::getAddressWithFixup(const MCOperand &MO, - unsigned FixupKind, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - if (!MO.isExpr()) { - // This can occur for manually decoded or constructed MCInsts, but neither - // the assembly-parser nor instruction selection will currently produce an - // MCInst that's not a symbol reference. - assert(MO.isImm() && "Unexpected address requested"); - return MO.getImm(); - } - - const MCExpr *Expr = MO.getExpr(); - MCFixupKind Kind = MCFixupKind(FixupKind); - Fixups.push_back(MCFixup::Create(0, Expr, Kind)); - - return 0; -} - -unsigned AArch64MCCodeEmitter:: -getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI, - int MemSize) const { - const MCOperand &ImmOp = MI.getOperand(OpIdx); - if (ImmOp.isImm()) - return ImmOp.getImm(); - - assert(ImmOp.isExpr() && "Unexpected operand type"); - const AArch64MCExpr *Expr = cast(ImmOp.getExpr()); - unsigned FixupKind; - - - switch (Expr->getKind()) { - default: llvm_unreachable("Unexpected operand modifier"); - case AArch64MCExpr::VK_AARCH64_LO12: { - static const unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_lo12, - AArch64::fixup_a64_ldst16_lo12, - AArch64::fixup_a64_ldst32_lo12, - AArch64::fixup_a64_ldst64_lo12, - AArch64::fixup_a64_ldst128_lo12 }; - assert(MemSize <= 16 && "Invalid fixup for operation"); - FixupKind = FixupsBySize[Log2_32(MemSize)]; - break; - } - case AArch64MCExpr::VK_AARCH64_GOT_LO12: - assert(MemSize == 8 && "Invalid fixup for operation"); - FixupKind = AArch64::fixup_a64_ld64_got_lo12_nc; - break; - case AArch64MCExpr::VK_AARCH64_DTPREL_LO12: { - static const unsigned FixupsBySize[] = { - AArch64::fixup_a64_ldst8_dtprel_lo12, - AArch64::fixup_a64_ldst16_dtprel_lo12, - AArch64::fixup_a64_ldst32_dtprel_lo12, - AArch64::fixup_a64_ldst64_dtprel_lo12 - }; - assert(MemSize <= 8 && "Invalid fixup for operation"); - FixupKind = FixupsBySize[Log2_32(MemSize)]; - break; - } - case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC: { - static const unsigned FixupsBySize[] = { - AArch64::fixup_a64_ldst8_dtprel_lo12_nc, - AArch64::fixup_a64_ldst16_dtprel_lo12_nc, - AArch64::fixup_a64_ldst32_dtprel_lo12_nc, - AArch64::fixup_a64_ldst64_dtprel_lo12_nc - }; - assert(MemSize <= 8 && "Invalid fixup for operation"); - FixupKind = FixupsBySize[Log2_32(MemSize)]; - break; - } - case AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12: - assert(MemSize == 8 && "Invalid fixup for operation"); - FixupKind = AArch64::fixup_a64_ld64_gottprel_lo12_nc; - break; - case AArch64MCExpr::VK_AARCH64_TPREL_LO12:{ - static const unsigned FixupsBySize[] = { - AArch64::fixup_a64_ldst8_tprel_lo12, - AArch64::fixup_a64_ldst16_tprel_lo12, - AArch64::fixup_a64_ldst32_tprel_lo12, - AArch64::fixup_a64_ldst64_tprel_lo12 - }; - assert(MemSize <= 8 && "Invalid fixup for operation"); - FixupKind = FixupsBySize[Log2_32(MemSize)]; - break; - } - case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC: { - static const unsigned FixupsBySize[] = { - AArch64::fixup_a64_ldst8_tprel_lo12_nc, - AArch64::fixup_a64_ldst16_tprel_lo12_nc, - AArch64::fixup_a64_ldst32_tprel_lo12_nc, - AArch64::fixup_a64_ldst64_tprel_lo12_nc - }; - assert(MemSize <= 8 && "Invalid fixup for operation"); - FixupKind = FixupsBySize[Log2_32(MemSize)]; - break; - } - case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12: - assert(MemSize == 8 && "Invalid fixup for operation"); - FixupKind = AArch64::fixup_a64_tlsdesc_ld64_lo12_nc; - break; - } - - return getAddressWithFixup(ImmOp, FixupKind, Fixups, STI); -} - -unsigned -AArch64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - if (MO.isImm()) - return static_cast(MO.getImm()); - - assert(MO.isExpr()); - - unsigned FixupKind = 0; - switch(cast(MO.getExpr())->getKind()) { - default: llvm_unreachable("Invalid expression modifier"); - case AArch64MCExpr::VK_AARCH64_LO12: - FixupKind = AArch64::fixup_a64_add_lo12; break; - case AArch64MCExpr::VK_AARCH64_DTPREL_HI12: - FixupKind = AArch64::fixup_a64_add_dtprel_hi12; break; - case AArch64MCExpr::VK_AARCH64_DTPREL_LO12: - FixupKind = AArch64::fixup_a64_add_dtprel_lo12; break; - case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC: - FixupKind = AArch64::fixup_a64_add_dtprel_lo12_nc; break; - case AArch64MCExpr::VK_AARCH64_TPREL_HI12: - FixupKind = AArch64::fixup_a64_add_tprel_hi12; break; - case AArch64MCExpr::VK_AARCH64_TPREL_LO12: - FixupKind = AArch64::fixup_a64_add_tprel_lo12; break; - case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC: - FixupKind = AArch64::fixup_a64_add_tprel_lo12_nc; break; - case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12: - FixupKind = AArch64::fixup_a64_tlsdesc_add_lo12_nc; break; - } - - return getAddressWithFixup(MO, FixupKind, Fixups, STI); -} - -unsigned -AArch64MCCodeEmitter::getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - - const MCOperand &MO = MI.getOperand(OpIdx); - if (MO.isImm()) - return static_cast(MO.getImm()); - - assert(MO.isExpr()); - - unsigned Modifier = AArch64MCExpr::VK_AARCH64_None; - if (const AArch64MCExpr *Expr = dyn_cast(MO.getExpr())) - Modifier = Expr->getKind(); - - unsigned FixupKind = 0; - switch(Modifier) { - case AArch64MCExpr::VK_AARCH64_None: - FixupKind = AArch64::fixup_a64_adr_prel_page; - break; - case AArch64MCExpr::VK_AARCH64_GOT: - FixupKind = AArch64::fixup_a64_adr_prel_got_page; - break; - case AArch64MCExpr::VK_AARCH64_GOTTPREL: - FixupKind = AArch64::fixup_a64_adr_gottprel_page; - break; - case AArch64MCExpr::VK_AARCH64_TLSDESC: - FixupKind = AArch64::fixup_a64_tlsdesc_adr_page; - break; - default: - llvm_unreachable("Unknown symbol reference kind for ADRP instruction"); - } - - return getAddressWithFixup(MO, FixupKind, Fixups, STI); -} - -unsigned -AArch64MCCodeEmitter::getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Only immediate expected for shift"); - - return ((32 - MO.getImm()) & 0x1f) | (31 - MO.getImm()) << 6; -} - -unsigned -AArch64MCCodeEmitter::getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Only immediate expected for shift"); - - return ((64 - MO.getImm()) & 0x3f) | (63 - MO.getImm()) << 6; -} - -unsigned AArch64MCCodeEmitter::getShiftRightImm8( - const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return 8 - MI.getOperand(Op).getImm(); -} - -unsigned AArch64MCCodeEmitter::getShiftRightImm16( - const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return 16 - MI.getOperand(Op).getImm(); -} - -unsigned AArch64MCCodeEmitter::getShiftRightImm32( - const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return 32 - MI.getOperand(Op).getImm(); -} - -unsigned AArch64MCCodeEmitter::getShiftRightImm64( - const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return 64 - MI.getOperand(Op).getImm(); -} - -unsigned AArch64MCCodeEmitter::getShiftLeftImm8( - const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return MI.getOperand(Op).getImm() - 8; -} - -unsigned AArch64MCCodeEmitter::getShiftLeftImm16( - const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return MI.getOperand(Op).getImm() - 16; -} - -unsigned AArch64MCCodeEmitter::getShiftLeftImm32( - const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return MI.getOperand(Op).getImm() - 32; -} - -unsigned AArch64MCCodeEmitter::getShiftLeftImm64( - const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return MI.getOperand(Op).getImm() - 64; -} - -template unsigned -AArch64MCCodeEmitter::getLabelOpValue(const MCInst &MI, - unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - - if (MO.isExpr()) - return getAddressWithFixup(MO, fixupDesired, Fixups, STI); - - assert(MO.isImm()); - return MO.getImm(); -} - -unsigned -AArch64MCCodeEmitter::getLoadLitLabelOpValue(const MCInst &MI, - unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - - if (MO.isImm()) - return MO.getImm(); - - assert(MO.isExpr()); - - unsigned FixupKind; - if (isa(MO.getExpr())) { - assert(dyn_cast(MO.getExpr())->getKind() - == AArch64MCExpr::VK_AARCH64_GOTTPREL - && "Invalid symbol modifier for literal load"); - FixupKind = AArch64::fixup_a64_ld_gottprel_prel19; - } else { - FixupKind = AArch64::fixup_a64_ld_prel; - } - - return getAddressWithFixup(MO, FixupKind, Fixups, STI); -} - - -unsigned -AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI, - const MCOperand &MO, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - if (MO.isReg()) { - return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()); - } else if (MO.isImm()) { - return static_cast(MO.getImm()); - } - - llvm_unreachable("Unable to encode MCOperand!"); - return 0; -} - -unsigned -AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &UImm16MO = MI.getOperand(OpIdx); - const MCOperand &ShiftMO = MI.getOperand(OpIdx + 1); - - unsigned Result = static_cast(ShiftMO.getImm()) << 16; - - if (UImm16MO.isImm()) { - Result |= UImm16MO.getImm(); - return Result; - } - - const AArch64MCExpr *A64E = cast(UImm16MO.getExpr()); - AArch64::Fixups requestedFixup; - switch (A64E->getKind()) { - default: llvm_unreachable("unexpected expression modifier"); - case AArch64MCExpr::VK_AARCH64_ABS_G0: - requestedFixup = AArch64::fixup_a64_movw_uabs_g0; break; - case AArch64MCExpr::VK_AARCH64_ABS_G0_NC: - requestedFixup = AArch64::fixup_a64_movw_uabs_g0_nc; break; - case AArch64MCExpr::VK_AARCH64_ABS_G1: - requestedFixup = AArch64::fixup_a64_movw_uabs_g1; break; - case AArch64MCExpr::VK_AARCH64_ABS_G1_NC: - requestedFixup = AArch64::fixup_a64_movw_uabs_g1_nc; break; - case AArch64MCExpr::VK_AARCH64_ABS_G2: - requestedFixup = AArch64::fixup_a64_movw_uabs_g2; break; - case AArch64MCExpr::VK_AARCH64_ABS_G2_NC: - requestedFixup = AArch64::fixup_a64_movw_uabs_g2_nc; break; - case AArch64MCExpr::VK_AARCH64_ABS_G3: - requestedFixup = AArch64::fixup_a64_movw_uabs_g3; break; - case AArch64MCExpr::VK_AARCH64_SABS_G0: - requestedFixup = AArch64::fixup_a64_movw_sabs_g0; break; - case AArch64MCExpr::VK_AARCH64_SABS_G1: - requestedFixup = AArch64::fixup_a64_movw_sabs_g1; break; - case AArch64MCExpr::VK_AARCH64_SABS_G2: - requestedFixup = AArch64::fixup_a64_movw_sabs_g2; break; - case AArch64MCExpr::VK_AARCH64_DTPREL_G2: - requestedFixup = AArch64::fixup_a64_movw_dtprel_g2; break; - case AArch64MCExpr::VK_AARCH64_DTPREL_G1: - requestedFixup = AArch64::fixup_a64_movw_dtprel_g1; break; - case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC: - requestedFixup = AArch64::fixup_a64_movw_dtprel_g1_nc; break; - case AArch64MCExpr::VK_AARCH64_DTPREL_G0: - requestedFixup = AArch64::fixup_a64_movw_dtprel_g0; break; - case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC: - requestedFixup = AArch64::fixup_a64_movw_dtprel_g0_nc; break; - case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1: - requestedFixup = AArch64::fixup_a64_movw_gottprel_g1; break; - case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC: - requestedFixup = AArch64::fixup_a64_movw_gottprel_g0_nc; break; - case AArch64MCExpr::VK_AARCH64_TPREL_G2: - requestedFixup = AArch64::fixup_a64_movw_tprel_g2; break; - case AArch64MCExpr::VK_AARCH64_TPREL_G1: - requestedFixup = AArch64::fixup_a64_movw_tprel_g1; break; - case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC: - requestedFixup = AArch64::fixup_a64_movw_tprel_g1_nc; break; - case AArch64MCExpr::VK_AARCH64_TPREL_G0: - requestedFixup = AArch64::fixup_a64_movw_tprel_g0; break; - case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC: - requestedFixup = AArch64::fixup_a64_movw_tprel_g0_nc; break; - } - - return Result | getAddressWithFixup(UImm16MO, requestedFixup, Fixups, STI); -} - -template unsigned -AArch64MCCodeEmitter::fixLoadStoreExclusive(const MCInst &MI, - unsigned EncodedValue, - const MCSubtargetInfo &STI) const { - if (!hasRs) EncodedValue |= 0x001F0000; - if (!hasRt2) EncodedValue |= 0x00007C00; - - return EncodedValue; -} - -unsigned -AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue, - const MCSubtargetInfo &STI) const { - // If one of the signed fixup kinds is applied to a MOVZ instruction, the - // eventual result could be either a MOVZ or a MOVN. It's the MCCodeEmitter's - // job to ensure that any bits possibly affected by this are 0. This means we - // must zero out bit 30 (essentially emitting a MOVN). - MCOperand UImm16MO = MI.getOperand(1); - - // Nothing to do if there's no fixup. - if (UImm16MO.isImm()) - return EncodedValue; - - const AArch64MCExpr *A64E = cast(UImm16MO.getExpr()); - switch (A64E->getKind()) { - case AArch64MCExpr::VK_AARCH64_SABS_G0: - case AArch64MCExpr::VK_AARCH64_SABS_G1: - case AArch64MCExpr::VK_AARCH64_SABS_G2: - case AArch64MCExpr::VK_AARCH64_DTPREL_G2: - case AArch64MCExpr::VK_AARCH64_DTPREL_G1: - case AArch64MCExpr::VK_AARCH64_DTPREL_G0: - case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1: - case AArch64MCExpr::VK_AARCH64_TPREL_G2: - case AArch64MCExpr::VK_AARCH64_TPREL_G1: - case AArch64MCExpr::VK_AARCH64_TPREL_G0: - return EncodedValue & ~(1u << 30); - default: - // Nothing to do for an unsigned fixup. - return EncodedValue; - } - - llvm_unreachable("Should have returned by now"); -} - -unsigned -AArch64MCCodeEmitter::fixMulHigh(const MCInst &MI, - unsigned EncodedValue, - const MCSubtargetInfo &STI) const { - // The Ra field of SMULH and UMULH is unused: it should be assembled as 31 - // (i.e. all bits 1) but is ignored by the processor. - EncodedValue |= 0x1f << 10; - return EncodedValue; -} - -MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, - MCContext &Ctx) { - return new AArch64MCCodeEmitter(Ctx); -} - -void AArch64MCCodeEmitter:: -EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - if (MI.getOpcode() == AArch64::TLSDESCCALL) { - // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the - // following (BLR) instruction. It doesn't emit any code itself so it - // doesn't go through the normal TableGenerated channels. - MCFixupKind Fixup = MCFixupKind(AArch64::fixup_a64_tlsdesc_call); - const MCExpr *Expr; - Expr = AArch64MCExpr::CreateTLSDesc(MI.getOperand(0).getExpr(), Ctx); - Fixups.push_back(MCFixup::Create(0, Expr, Fixup)); - return; - } - - uint32_t Binary = getBinaryCodeForInstr(MI, Fixups, STI); - - EmitInstruction(Binary, OS); -} - - -#include "AArch64GenMCCodeEmitter.inc" diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp deleted file mode 100644 index 7aef9c57bf36..000000000000 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp +++ /dev/null @@ -1,179 +0,0 @@ -//===-- AArch64MCExpr.cpp - AArch64 specific MC expression classes --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the implementation of the assembly expression modifiers -// accepted by the AArch64 architecture (e.g. ":lo12:", ":gottprel_g1:", ...). -// -//===----------------------------------------------------------------------===// - -#include "AArch64MCExpr.h" -#include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCELF.h" -#include "llvm/Object/ELF.h" - -using namespace llvm; - -#define DEBUG_TYPE "aarch64mcexpr" - -const AArch64MCExpr* -AArch64MCExpr::Create(VariantKind Kind, const MCExpr *Expr, - MCContext &Ctx) { - return new (Ctx) AArch64MCExpr(Kind, Expr); -} - -void AArch64MCExpr::PrintImpl(raw_ostream &OS) const { - switch (Kind) { - default: llvm_unreachable("Invalid kind!"); - case VK_AARCH64_GOT: OS << ":got:"; break; - case VK_AARCH64_GOT_LO12: OS << ":got_lo12:"; break; - case VK_AARCH64_LO12: OS << ":lo12:"; break; - case VK_AARCH64_ABS_G0: OS << ":abs_g0:"; break; - case VK_AARCH64_ABS_G0_NC: OS << ":abs_g0_nc:"; break; - case VK_AARCH64_ABS_G1: OS << ":abs_g1:"; break; - case VK_AARCH64_ABS_G1_NC: OS << ":abs_g1_nc:"; break; - case VK_AARCH64_ABS_G2: OS << ":abs_g2:"; break; - case VK_AARCH64_ABS_G2_NC: OS << ":abs_g2_nc:"; break; - case VK_AARCH64_ABS_G3: OS << ":abs_g3:"; break; - case VK_AARCH64_SABS_G0: OS << ":abs_g0_s:"; break; - case VK_AARCH64_SABS_G1: OS << ":abs_g1_s:"; break; - case VK_AARCH64_SABS_G2: OS << ":abs_g2_s:"; break; - case VK_AARCH64_DTPREL_G2: OS << ":dtprel_g2:"; break; - case VK_AARCH64_DTPREL_G1: OS << ":dtprel_g1:"; break; - case VK_AARCH64_DTPREL_G1_NC: OS << ":dtprel_g1_nc:"; break; - case VK_AARCH64_DTPREL_G0: OS << ":dtprel_g0:"; break; - case VK_AARCH64_DTPREL_G0_NC: OS << ":dtprel_g0_nc:"; break; - case VK_AARCH64_DTPREL_HI12: OS << ":dtprel_hi12:"; break; - case VK_AARCH64_DTPREL_LO12: OS << ":dtprel_lo12:"; break; - case VK_AARCH64_DTPREL_LO12_NC: OS << ":dtprel_lo12_nc:"; break; - case VK_AARCH64_GOTTPREL_G1: OS << ":gottprel_g1:"; break; - case VK_AARCH64_GOTTPREL_G0_NC: OS << ":gottprel_g0_nc:"; break; - case VK_AARCH64_GOTTPREL: OS << ":gottprel:"; break; - case VK_AARCH64_GOTTPREL_LO12: OS << ":gottprel_lo12:"; break; - case VK_AARCH64_TPREL_G2: OS << ":tprel_g2:"; break; - case VK_AARCH64_TPREL_G1: OS << ":tprel_g1:"; break; - case VK_AARCH64_TPREL_G1_NC: OS << ":tprel_g1_nc:"; break; - case VK_AARCH64_TPREL_G0: OS << ":tprel_g0:"; break; - case VK_AARCH64_TPREL_G0_NC: OS << ":tprel_g0_nc:"; break; - case VK_AARCH64_TPREL_HI12: OS << ":tprel_hi12:"; break; - case VK_AARCH64_TPREL_LO12: OS << ":tprel_lo12:"; break; - case VK_AARCH64_TPREL_LO12_NC: OS << ":tprel_lo12_nc:"; break; - case VK_AARCH64_TLSDESC: OS << ":tlsdesc:"; break; - case VK_AARCH64_TLSDESC_LO12: OS << ":tlsdesc_lo12:"; break; - - } - - const MCExpr *Expr = getSubExpr(); - if (Expr->getKind() != MCExpr::SymbolRef) - OS << '('; - Expr->print(OS); - if (Expr->getKind() != MCExpr::SymbolRef) - OS << ')'; -} - -bool -AArch64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const { - return getSubExpr()->EvaluateAsRelocatable(Res, Layout); -} - -static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { - switch (Expr->getKind()) { - case MCExpr::Target: - llvm_unreachable("Can't handle nested target expression"); - break; - case MCExpr::Constant: - break; - - case MCExpr::Binary: { - const MCBinaryExpr *BE = cast(Expr); - fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm); - fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm); - break; - } - - case MCExpr::SymbolRef: { - // We're known to be under a TLS fixup, so any symbol should be - // modified. There should be only one. - const MCSymbolRefExpr &SymRef = *cast(Expr); - MCSymbolData &SD = Asm.getOrCreateSymbolData(SymRef.getSymbol()); - MCELF::SetType(SD, ELF::STT_TLS); - break; - } - - case MCExpr::Unary: - fixELFSymbolsInTLSFixupsImpl(cast(Expr)->getSubExpr(), Asm); - break; - } -} - -void AArch64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { - switch (getKind()) { - default: - return; - case VK_AARCH64_DTPREL_G2: - case VK_AARCH64_DTPREL_G1: - case VK_AARCH64_DTPREL_G1_NC: - case VK_AARCH64_DTPREL_G0: - case VK_AARCH64_DTPREL_G0_NC: - case VK_AARCH64_DTPREL_HI12: - case VK_AARCH64_DTPREL_LO12: - case VK_AARCH64_DTPREL_LO12_NC: - case VK_AARCH64_GOTTPREL_G1: - case VK_AARCH64_GOTTPREL_G0_NC: - case VK_AARCH64_GOTTPREL: - case VK_AARCH64_GOTTPREL_LO12: - case VK_AARCH64_TPREL_G2: - case VK_AARCH64_TPREL_G1: - case VK_AARCH64_TPREL_G1_NC: - case VK_AARCH64_TPREL_G0: - case VK_AARCH64_TPREL_G0_NC: - case VK_AARCH64_TPREL_HI12: - case VK_AARCH64_TPREL_LO12: - case VK_AARCH64_TPREL_LO12_NC: - case VK_AARCH64_TLSDESC: - case VK_AARCH64_TLSDESC_LO12: - break; - } - - fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm); -} - -// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps -// that method should be made public? -// FIXME: really do above: now that two backends are using it. -static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) { - switch (Value->getKind()) { - case MCExpr::Target: - llvm_unreachable("Can't handle nested target expr!"); - break; - - case MCExpr::Constant: - break; - - case MCExpr::Binary: { - const MCBinaryExpr *BE = cast(Value); - AddValueSymbolsImpl(BE->getLHS(), Asm); - AddValueSymbolsImpl(BE->getRHS(), Asm); - break; - } - - case MCExpr::SymbolRef: - Asm->getOrCreateSymbolData(cast(Value)->getSymbol()); - break; - - case MCExpr::Unary: - AddValueSymbolsImpl(cast(Value)->getSubExpr(), Asm); - break; - } -} - -void AArch64MCExpr::AddValueSymbols(MCAssembler *Asm) const { - AddValueSymbolsImpl(getSubExpr(), Asm); -} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h deleted file mode 100644 index 23128fefb0b0..000000000000 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h +++ /dev/null @@ -1,187 +0,0 @@ -//==- AArch64MCExpr.h - AArch64 specific MC expression classes --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes AArch64-specific MCExprs, used for modifiers like -// ":lo12:" or ":gottprel_g1:". -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64MCEXPR_H -#define LLVM_AARCH64MCEXPR_H - -#include "llvm/MC/MCExpr.h" - -namespace llvm { - -class AArch64MCExpr : public MCTargetExpr { -public: - enum VariantKind { - VK_AARCH64_None, - VK_AARCH64_GOT, // :got: modifier in assembly - VK_AARCH64_GOT_LO12, // :got_lo12: - VK_AARCH64_LO12, // :lo12: - - VK_AARCH64_ABS_G0, // :abs_g0: - VK_AARCH64_ABS_G0_NC, // :abs_g0_nc: - VK_AARCH64_ABS_G1, - VK_AARCH64_ABS_G1_NC, - VK_AARCH64_ABS_G2, - VK_AARCH64_ABS_G2_NC, - VK_AARCH64_ABS_G3, - - VK_AARCH64_SABS_G0, // :abs_g0_s: - VK_AARCH64_SABS_G1, - VK_AARCH64_SABS_G2, - - VK_AARCH64_DTPREL_G2, // :dtprel_g2: - VK_AARCH64_DTPREL_G1, - VK_AARCH64_DTPREL_G1_NC, - VK_AARCH64_DTPREL_G0, - VK_AARCH64_DTPREL_G0_NC, - VK_AARCH64_DTPREL_HI12, - VK_AARCH64_DTPREL_LO12, - VK_AARCH64_DTPREL_LO12_NC, - - VK_AARCH64_GOTTPREL_G1, // :gottprel: - VK_AARCH64_GOTTPREL_G0_NC, - VK_AARCH64_GOTTPREL, - VK_AARCH64_GOTTPREL_LO12, - - VK_AARCH64_TPREL_G2, // :tprel: - VK_AARCH64_TPREL_G1, - VK_AARCH64_TPREL_G1_NC, - VK_AARCH64_TPREL_G0, - VK_AARCH64_TPREL_G0_NC, - VK_AARCH64_TPREL_HI12, - VK_AARCH64_TPREL_LO12, - VK_AARCH64_TPREL_LO12_NC, - - VK_AARCH64_TLSDESC, // :tlsdesc: - VK_AARCH64_TLSDESC_LO12 - }; - -private: - const VariantKind Kind; - const MCExpr *Expr; - - explicit AArch64MCExpr(VariantKind _Kind, const MCExpr *_Expr) - : Kind(_Kind), Expr(_Expr) {} - -public: - /// @name Construction - /// @{ - - static const AArch64MCExpr *Create(VariantKind Kind, const MCExpr *Expr, - MCContext &Ctx); - - static const AArch64MCExpr *CreateLo12(const MCExpr *Expr, MCContext &Ctx) { - return Create(VK_AARCH64_LO12, Expr, Ctx); - } - - static const AArch64MCExpr *CreateGOT(const MCExpr *Expr, MCContext &Ctx) { - return Create(VK_AARCH64_GOT, Expr, Ctx); - } - - static const AArch64MCExpr *CreateGOTLo12(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_GOT_LO12, Expr, Ctx); - } - - static const AArch64MCExpr *CreateDTPREL_G1(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_DTPREL_G1, Expr, Ctx); - } - - static const AArch64MCExpr *CreateDTPREL_G0_NC(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_DTPREL_G0_NC, Expr, Ctx); - } - - static const AArch64MCExpr *CreateGOTTPREL(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_GOTTPREL, Expr, Ctx); - } - - static const AArch64MCExpr *CreateGOTTPRELLo12(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_GOTTPREL_LO12, Expr, Ctx); - } - - static const AArch64MCExpr *CreateTLSDesc(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_TLSDESC, Expr, Ctx); - } - - static const AArch64MCExpr *CreateTLSDescLo12(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_TLSDESC_LO12, Expr, Ctx); - } - - static const AArch64MCExpr *CreateTPREL_G1(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_TPREL_G1, Expr, Ctx); - } - - static const AArch64MCExpr *CreateTPREL_G0_NC(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_TPREL_G0_NC, Expr, Ctx); - } - - static const AArch64MCExpr *CreateABS_G3(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_ABS_G3, Expr, Ctx); - } - - static const AArch64MCExpr *CreateABS_G2_NC(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_ABS_G2_NC, Expr, Ctx); - } - - static const AArch64MCExpr *CreateABS_G1_NC(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_ABS_G1_NC, Expr, Ctx); - } - - static const AArch64MCExpr *CreateABS_G0_NC(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_ABS_G0_NC, Expr, Ctx); - } - - /// @} - /// @name Accessors - /// @{ - - /// getOpcode - Get the kind of this expression. - VariantKind getKind() const { return Kind; } - - /// getSubExpr - Get the child of this expression. - const MCExpr *getSubExpr() const { return Expr; } - - /// @} - - void PrintImpl(raw_ostream &OS) const override; - bool EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const override; - void AddValueSymbols(MCAssembler *) const override; - const MCSection *FindAssociatedSection() const override { - return getSubExpr()->FindAssociatedSection(); - } - - void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override; - - static bool classof(const MCExpr *E) { - return E->getKind() == MCExpr::Target; - } - - static bool classof(const AArch64MCExpr *) { return true; } - -}; -} // end namespace llvm - -#endif diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp deleted file mode 100644 index 599949c04357..000000000000 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ /dev/null @@ -1,221 +0,0 @@ -//===-- AArch64MCTargetDesc.cpp - AArch64 Target Descriptions -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides AArch64 specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#include "AArch64MCTargetDesc.h" -#include "AArch64ELFStreamer.h" -#include "AArch64MCAsmInfo.h" -#include "InstPrinter/AArch64InstPrinter.h" -#include "llvm/ADT/APInt.h" -#include "llvm/MC/MCCodeGenInfo.h" -#include "llvm/MC/MCInstrAnalysis.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" - -using namespace llvm; - -#define GET_REGINFO_MC_DESC -#include "AArch64GenRegisterInfo.inc" - -#define GET_INSTRINFO_MC_DESC -#include "AArch64GenInstrInfo.inc" - -#define GET_SUBTARGETINFO_MC_DESC -#include "AArch64GenSubtargetInfo.inc" - -MCSubtargetInfo *AArch64_MC::createAArch64MCSubtargetInfo(StringRef TT, - StringRef CPU, - StringRef FS) { - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitAArch64MCSubtargetInfo(X, TT, CPU, FS); - return X; -} - - -static MCInstrInfo *createAArch64MCInstrInfo() { - MCInstrInfo *X = new MCInstrInfo(); - InitAArch64MCInstrInfo(X); - return X; -} - -static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) { - MCRegisterInfo *X = new MCRegisterInfo(); - InitAArch64MCRegisterInfo(X, AArch64::X30); - return X; -} - -static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI, - StringRef TT) { - Triple TheTriple(TT); - - MCAsmInfo *MAI = new AArch64ELFMCAsmInfo(TT); - unsigned Reg = MRI.getDwarfRegNum(AArch64::XSP, true); - MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 0); - MAI->addInitialFrameState(Inst); - - return MAI; -} - -static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) { - MCCodeGenInfo *X = new MCCodeGenInfo(); - if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC) { - // On ELF platforms the default static relocation model has a smart enough - // linker to cope with referencing external symbols defined in a shared - // library. Hence DynamicNoPIC doesn't need to be promoted to PIC. - RM = Reloc::Static; - } - - if (CM == CodeModel::Default) - CM = CodeModel::Small; - else if (CM == CodeModel::JITDefault) { - // The default MCJIT memory managers make no guarantees about where they can - // find an executable page; JITed code needs to be able to refer to globals - // no matter how far away they are. - CM = CodeModel::Large; - } - - X->InitMCCodeGenInfo(RM, CM, OL); - return X; -} - -static MCStreamer *createMCStreamer(const Target &T, StringRef TT, - MCContext &Ctx, MCAsmBackend &MAB, - raw_ostream &OS, - MCCodeEmitter *Emitter, - const MCSubtargetInfo &STI, - bool RelaxAll, - bool NoExecStack) { - Triple TheTriple(TT); - - return createAArch64ELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack); -} - - -static MCInstPrinter *createAArch64MCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { - if (SyntaxVariant == 0) - return new AArch64InstPrinter(MAI, MII, MRI, STI); - return nullptr; -} - -namespace { - -class AArch64MCInstrAnalysis : public MCInstrAnalysis { -public: - AArch64MCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {} - - bool isUnconditionalBranch(const MCInst &Inst) const override { - if (Inst.getOpcode() == AArch64::Bcc - && Inst.getOperand(0).getImm() == A64CC::AL) - return true; - return MCInstrAnalysis::isUnconditionalBranch(Inst); - } - - bool isConditionalBranch(const MCInst &Inst) const override { - if (Inst.getOpcode() == AArch64::Bcc - && Inst.getOperand(0).getImm() == A64CC::AL) - return false; - return MCInstrAnalysis::isConditionalBranch(Inst); - } - - bool evaluateBranch(const MCInst &Inst, uint64_t Addr, - uint64_t Size, uint64_t &Target) const override { - unsigned LblOperand = Inst.getOpcode() == AArch64::Bcc ? 1 : 0; - // FIXME: We only handle PCRel branches for now. - if (Info->get(Inst.getOpcode()).OpInfo[LblOperand].OperandType - != MCOI::OPERAND_PCREL) - return false; - - int64_t Imm = Inst.getOperand(LblOperand).getImm(); - Target = Addr + Imm; - return true; - } -}; - -} - -static MCInstrAnalysis *createAArch64MCInstrAnalysis(const MCInstrInfo *Info) { - return new AArch64MCInstrAnalysis(Info); -} - - - -extern "C" void LLVMInitializeAArch64TargetMC() { - // Register the MC asm info. - RegisterMCAsmInfoFn A(TheAArch64leTarget, createAArch64MCAsmInfo); - RegisterMCAsmInfoFn B(TheAArch64beTarget, createAArch64MCAsmInfo); - - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(TheAArch64leTarget, - createAArch64MCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheAArch64beTarget, - createAArch64MCCodeGenInfo); - - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(TheAArch64leTarget, - createAArch64MCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheAArch64beTarget, - createAArch64MCInstrInfo); - - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(TheAArch64leTarget, - createAArch64MCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheAArch64beTarget, - createAArch64MCRegisterInfo); - - // Register the MC subtarget info. - using AArch64_MC::createAArch64MCSubtargetInfo; - TargetRegistry::RegisterMCSubtargetInfo(TheAArch64leTarget, - createAArch64MCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheAArch64beTarget, - createAArch64MCSubtargetInfo); - - // Register the MC instruction analyzer. - TargetRegistry::RegisterMCInstrAnalysis(TheAArch64leTarget, - createAArch64MCInstrAnalysis); - TargetRegistry::RegisterMCInstrAnalysis(TheAArch64beTarget, - createAArch64MCInstrAnalysis); - - // Register the MC Code Emitter - TargetRegistry::RegisterMCCodeEmitter(TheAArch64leTarget, - createAArch64MCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(TheAArch64beTarget, - createAArch64MCCodeEmitter); - - // Register the asm backend. - TargetRegistry::RegisterMCAsmBackend(TheAArch64leTarget, - createAArch64leAsmBackend); - TargetRegistry::RegisterMCAsmBackend(TheAArch64beTarget, - createAArch64beAsmBackend); - - // Register the object streamer. - TargetRegistry::RegisterMCObjectStreamer(TheAArch64leTarget, - createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(TheAArch64beTarget, - createMCStreamer); - - // Register the MCInstPrinter. - TargetRegistry::RegisterMCInstPrinter(TheAArch64leTarget, - createAArch64MCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheAArch64beTarget, - createAArch64MCInstPrinter); -} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h deleted file mode 100644 index bd8beaf16b07..000000000000 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h +++ /dev/null @@ -1,72 +0,0 @@ -//===-- AArch64MCTargetDesc.h - AArch64 Target Descriptions -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides AArch64 specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64MCTARGETDESC_H -#define LLVM_AARCH64MCTARGETDESC_H - -#include "llvm/Support/DataTypes.h" - -namespace llvm { -class MCAsmBackend; -class MCCodeEmitter; -class MCContext; -class MCInstrInfo; -class MCObjectWriter; -class MCRegisterInfo; -class MCSubtargetInfo; -class StringRef; -class Target; -class raw_ostream; - -extern Target TheAArch64leTarget; -extern Target TheAArch64beTarget; - -namespace AArch64_MC { - MCSubtargetInfo *createAArch64MCSubtargetInfo(StringRef TT, StringRef CPU, - StringRef FS); -} - -MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, - MCContext &Ctx); - -MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, - uint8_t OSABI, - bool IsLittleEndian); - -MCAsmBackend *createAArch64leAsmBackend(const Target &T, - const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU); - -MCAsmBackend *createAArch64beAsmBackend(const Target &T, - const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU); - -} // End llvm namespace - -// Defines symbolic names for AArch64 registers. This defines a mapping from -// register name to register number. -// -#define GET_REGINFO_ENUM -#include "AArch64GenRegisterInfo.inc" - -// Defines symbolic names for the AArch64 instructions. -// -#define GET_INSTRINFO_ENUM -#include "AArch64GenInstrInfo.inc" - -#define GET_SUBTARGETINFO_ENUM -#include "AArch64GenSubtargetInfo.inc" - -#endif diff --git a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index 54c4465b60d7..000000000000 --- a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -add_llvm_library(LLVMAArch64Desc - AArch64AsmBackend.cpp - AArch64ELFObjectWriter.cpp - AArch64ELFStreamer.cpp - AArch64MCAsmInfo.cpp - AArch64MCCodeEmitter.cpp - AArch64MCExpr.cpp - AArch64MCTargetDesc.cpp - ) diff --git a/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt deleted file mode 100644 index 37c8035a49f9..000000000000 --- a/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt +++ /dev/null @@ -1,24 +0,0 @@ -;===- ./lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = AArch64Desc -parent = AArch64 -required_libraries = AArch64AsmPrinter AArch64Info MC Support -add_to_library_groups = AArch64 - diff --git a/lib/Target/AArch64/MCTargetDesc/Makefile b/lib/Target/AArch64/MCTargetDesc/Makefile deleted file mode 100644 index 5779ac5ac60a..000000000000 --- a/lib/Target/AArch64/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/AArch64/TargetDesc/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMAArch64Desc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/AArch64/Makefile b/lib/Target/AArch64/Makefile deleted file mode 100644 index 641bb83c4775..000000000000 --- a/lib/Target/AArch64/Makefile +++ /dev/null @@ -1,30 +0,0 @@ -##===- lib/Target/AArch64/Makefile -------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMAArch64CodeGen -TARGET = AArch64 - -# Make sure that tblgen is run, first thing. -BUILT_SOURCES = AArch64GenAsmMatcher.inc \ - AArch64GenAsmWriter.inc \ - AArch64GenCallingConv.inc \ - AArch64GenDAGISel.inc \ - AArch64GenDisassemblerTables.inc \ - AArch64GenInstrInfo.inc \ - AArch64GenMCCodeEmitter.inc \ - AArch64GenMCPseudoLowering.inc \ - AArch64GenRegisterInfo.inc \ - AArch64GenSubtargetInfo.inc - -DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc Utils - -include $(LEVEL)/Makefile.common - - diff --git a/lib/Target/AArch64/README.txt b/lib/Target/AArch64/README.txt deleted file mode 100644 index 601990f17dee..000000000000 --- a/lib/Target/AArch64/README.txt +++ /dev/null @@ -1,2 +0,0 @@ -This file will contain changes that need to be made before AArch64 can become an -officially supported target. Currently a placeholder. diff --git a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp deleted file mode 100644 index 9281e4e1d937..000000000000 --- a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp +++ /dev/null @@ -1,27 +0,0 @@ -//===-- AArch64TargetInfo.cpp - AArch64 Target Implementation -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the key registration step for the architecture. -// -//===----------------------------------------------------------------------===// - -#include "AArch64.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/TargetRegistry.h" -using namespace llvm; - -Target llvm::TheAArch64leTarget; -Target llvm::TheAArch64beTarget; - -extern "C" void LLVMInitializeAArch64TargetInfo() { - RegisterTarget - X(TheAArch64leTarget, "aarch64", "AArch64 (ARM 64-bit little endian target)"); - RegisterTarget - Y(TheAArch64beTarget, "aarch64_be", "AArch64 (ARM 64-bit big endian target)"); -} diff --git a/lib/Target/AArch64/TargetInfo/CMakeLists.txt b/lib/Target/AArch64/TargetInfo/CMakeLists.txt deleted file mode 100644 index ee734c647261..000000000000 --- a/lib/Target/AArch64/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_llvm_library(LLVMAArch64Info - AArch64TargetInfo.cpp - ) diff --git a/lib/Target/AArch64/TargetInfo/LLVMBuild.txt b/lib/Target/AArch64/TargetInfo/LLVMBuild.txt deleted file mode 100644 index 642917239810..000000000000 --- a/lib/Target/AArch64/TargetInfo/LLVMBuild.txt +++ /dev/null @@ -1,23 +0,0 @@ -;===- ./lib/Target/AArch64/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = AArch64Info -parent = AArch64 -required_libraries = Support -add_to_library_groups = AArch64 diff --git a/lib/Target/AArch64/TargetInfo/Makefile b/lib/Target/AArch64/TargetInfo/Makefile deleted file mode 100644 index 9dc9aa4bccf7..000000000000 --- a/lib/Target/AArch64/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/AArch64/TargetInfo/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMAArch64Info - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp deleted file mode 100644 index 2a97cd632560..000000000000 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ /dev/null @@ -1,1173 +0,0 @@ -//===-- AArch64BaseInfo.cpp - AArch64 Base encoding information------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides basic encoding and assembly information for AArch64. -// -//===----------------------------------------------------------------------===// -#include "AArch64BaseInfo.h" -#include "llvm/ADT/APFloat.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/Regex.h" - -using namespace llvm; - -StringRef NamedImmMapper::toString(uint32_t Value, bool &Valid) const { - for (unsigned i = 0; i < NumPairs; ++i) { - if (Pairs[i].Value == Value) { - Valid = true; - return Pairs[i].Name; - } - } - - Valid = false; - return StringRef(); -} - -uint32_t NamedImmMapper::fromString(StringRef Name, bool &Valid) const { - std::string LowerCaseName = Name.lower(); - for (unsigned i = 0; i < NumPairs; ++i) { - if (Pairs[i].Name == LowerCaseName) { - Valid = true; - return Pairs[i].Value; - } - } - - Valid = false; - return -1; -} - -bool NamedImmMapper::validImm(uint32_t Value) const { - return Value < TooBigImm; -} - -const NamedImmMapper::Mapping A64AT::ATMapper::ATPairs[] = { - {"s1e1r", S1E1R}, - {"s1e2r", S1E2R}, - {"s1e3r", S1E3R}, - {"s1e1w", S1E1W}, - {"s1e2w", S1E2W}, - {"s1e3w", S1E3W}, - {"s1e0r", S1E0R}, - {"s1e0w", S1E0W}, - {"s12e1r", S12E1R}, - {"s12e1w", S12E1W}, - {"s12e0r", S12E0R}, - {"s12e0w", S12E0W}, -}; - -A64AT::ATMapper::ATMapper() - : NamedImmMapper(ATPairs, 0) {} - -const NamedImmMapper::Mapping A64DB::DBarrierMapper::DBarrierPairs[] = { - {"oshld", OSHLD}, - {"oshst", OSHST}, - {"osh", OSH}, - {"nshld", NSHLD}, - {"nshst", NSHST}, - {"nsh", NSH}, - {"ishld", ISHLD}, - {"ishst", ISHST}, - {"ish", ISH}, - {"ld", LD}, - {"st", ST}, - {"sy", SY} -}; - -A64DB::DBarrierMapper::DBarrierMapper() - : NamedImmMapper(DBarrierPairs, 16u) {} - -const NamedImmMapper::Mapping A64DC::DCMapper::DCPairs[] = { - {"zva", ZVA}, - {"ivac", IVAC}, - {"isw", ISW}, - {"cvac", CVAC}, - {"csw", CSW}, - {"cvau", CVAU}, - {"civac", CIVAC}, - {"cisw", CISW} -}; - -A64DC::DCMapper::DCMapper() - : NamedImmMapper(DCPairs, 0) {} - -const NamedImmMapper::Mapping A64IC::ICMapper::ICPairs[] = { - {"ialluis", IALLUIS}, - {"iallu", IALLU}, - {"ivau", IVAU} -}; - -A64IC::ICMapper::ICMapper() - : NamedImmMapper(ICPairs, 0) {} - -const NamedImmMapper::Mapping A64ISB::ISBMapper::ISBPairs[] = { - {"sy", SY}, -}; - -A64ISB::ISBMapper::ISBMapper() - : NamedImmMapper(ISBPairs, 16) {} - -const NamedImmMapper::Mapping A64PRFM::PRFMMapper::PRFMPairs[] = { - {"pldl1keep", PLDL1KEEP}, - {"pldl1strm", PLDL1STRM}, - {"pldl2keep", PLDL2KEEP}, - {"pldl2strm", PLDL2STRM}, - {"pldl3keep", PLDL3KEEP}, - {"pldl3strm", PLDL3STRM}, - {"plil1keep", PLIL1KEEP}, - {"plil1strm", PLIL1STRM}, - {"plil2keep", PLIL2KEEP}, - {"plil2strm", PLIL2STRM}, - {"plil3keep", PLIL3KEEP}, - {"plil3strm", PLIL3STRM}, - {"pstl1keep", PSTL1KEEP}, - {"pstl1strm", PSTL1STRM}, - {"pstl2keep", PSTL2KEEP}, - {"pstl2strm", PSTL2STRM}, - {"pstl3keep", PSTL3KEEP}, - {"pstl3strm", PSTL3STRM} -}; - -A64PRFM::PRFMMapper::PRFMMapper() - : NamedImmMapper(PRFMPairs, 32) {} - -const NamedImmMapper::Mapping A64PState::PStateMapper::PStatePairs[] = { - {"spsel", SPSel}, - {"daifset", DAIFSet}, - {"daifclr", DAIFClr} -}; - -A64PState::PStateMapper::PStateMapper() - : NamedImmMapper(PStatePairs, 0) {} - -const NamedImmMapper::Mapping A64SysReg::MRSMapper::MRSPairs[] = { - {"mdccsr_el0", MDCCSR_EL0}, - {"dbgdtrrx_el0", DBGDTRRX_EL0}, - {"mdrar_el1", MDRAR_EL1}, - {"oslsr_el1", OSLSR_EL1}, - {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1}, - {"pmceid0_el0", PMCEID0_EL0}, - {"pmceid1_el0", PMCEID1_EL0}, - {"midr_el1", MIDR_EL1}, - {"ccsidr_el1", CCSIDR_EL1}, - {"clidr_el1", CLIDR_EL1}, - {"ctr_el0", CTR_EL0}, - {"mpidr_el1", MPIDR_EL1}, - {"revidr_el1", REVIDR_EL1}, - {"aidr_el1", AIDR_EL1}, - {"dczid_el0", DCZID_EL0}, - {"id_pfr0_el1", ID_PFR0_EL1}, - {"id_pfr1_el1", ID_PFR1_EL1}, - {"id_dfr0_el1", ID_DFR0_EL1}, - {"id_afr0_el1", ID_AFR0_EL1}, - {"id_mmfr0_el1", ID_MMFR0_EL1}, - {"id_mmfr1_el1", ID_MMFR1_EL1}, - {"id_mmfr2_el1", ID_MMFR2_EL1}, - {"id_mmfr3_el1", ID_MMFR3_EL1}, - {"id_isar0_el1", ID_ISAR0_EL1}, - {"id_isar1_el1", ID_ISAR1_EL1}, - {"id_isar2_el1", ID_ISAR2_EL1}, - {"id_isar3_el1", ID_ISAR3_EL1}, - {"id_isar4_el1", ID_ISAR4_EL1}, - {"id_isar5_el1", ID_ISAR5_EL1}, - {"id_aa64pfr0_el1", ID_AA64PFR0_EL1}, - {"id_aa64pfr1_el1", ID_AA64PFR1_EL1}, - {"id_aa64dfr0_el1", ID_AA64DFR0_EL1}, - {"id_aa64dfr1_el1", ID_AA64DFR1_EL1}, - {"id_aa64afr0_el1", ID_AA64AFR0_EL1}, - {"id_aa64afr1_el1", ID_AA64AFR1_EL1}, - {"id_aa64isar0_el1", ID_AA64ISAR0_EL1}, - {"id_aa64isar1_el1", ID_AA64ISAR1_EL1}, - {"id_aa64mmfr0_el1", ID_AA64MMFR0_EL1}, - {"id_aa64mmfr1_el1", ID_AA64MMFR1_EL1}, - {"mvfr0_el1", MVFR0_EL1}, - {"mvfr1_el1", MVFR1_EL1}, - {"mvfr2_el1", MVFR2_EL1}, - {"rvbar_el1", RVBAR_EL1}, - {"rvbar_el2", RVBAR_EL2}, - {"rvbar_el3", RVBAR_EL3}, - {"isr_el1", ISR_EL1}, - {"cntpct_el0", CNTPCT_EL0}, - {"cntvct_el0", CNTVCT_EL0}, - - // Trace registers - {"trcstatr", TRCSTATR}, - {"trcidr8", TRCIDR8}, - {"trcidr9", TRCIDR9}, - {"trcidr10", TRCIDR10}, - {"trcidr11", TRCIDR11}, - {"trcidr12", TRCIDR12}, - {"trcidr13", TRCIDR13}, - {"trcidr0", TRCIDR0}, - {"trcidr1", TRCIDR1}, - {"trcidr2", TRCIDR2}, - {"trcidr3", TRCIDR3}, - {"trcidr4", TRCIDR4}, - {"trcidr5", TRCIDR5}, - {"trcidr6", TRCIDR6}, - {"trcidr7", TRCIDR7}, - {"trcoslsr", TRCOSLSR}, - {"trcpdsr", TRCPDSR}, - {"trcdevaff0", TRCDEVAFF0}, - {"trcdevaff1", TRCDEVAFF1}, - {"trclsr", TRCLSR}, - {"trcauthstatus", TRCAUTHSTATUS}, - {"trcdevarch", TRCDEVARCH}, - {"trcdevid", TRCDEVID}, - {"trcdevtype", TRCDEVTYPE}, - {"trcpidr4", TRCPIDR4}, - {"trcpidr5", TRCPIDR5}, - {"trcpidr6", TRCPIDR6}, - {"trcpidr7", TRCPIDR7}, - {"trcpidr0", TRCPIDR0}, - {"trcpidr1", TRCPIDR1}, - {"trcpidr2", TRCPIDR2}, - {"trcpidr3", TRCPIDR3}, - {"trccidr0", TRCCIDR0}, - {"trccidr1", TRCCIDR1}, - {"trccidr2", TRCCIDR2}, - {"trccidr3", TRCCIDR3}, - - // GICv3 registers - {"icc_iar1_el1", ICC_IAR1_EL1}, - {"icc_iar0_el1", ICC_IAR0_EL1}, - {"icc_hppir1_el1", ICC_HPPIR1_EL1}, - {"icc_hppir0_el1", ICC_HPPIR0_EL1}, - {"icc_rpr_el1", ICC_RPR_EL1}, - {"ich_vtr_el2", ICH_VTR_EL2}, - {"ich_eisr_el2", ICH_EISR_EL2}, - {"ich_elsr_el2", ICH_ELSR_EL2} -}; - -A64SysReg::MRSMapper::MRSMapper() { - InstPairs = &MRSPairs[0]; - NumInstPairs = llvm::array_lengthof(MRSPairs); -} - -const NamedImmMapper::Mapping A64SysReg::MSRMapper::MSRPairs[] = { - {"dbgdtrtx_el0", DBGDTRTX_EL0}, - {"oslar_el1", OSLAR_EL1}, - {"pmswinc_el0", PMSWINC_EL0}, - - // Trace registers - {"trcoslar", TRCOSLAR}, - {"trclar", TRCLAR}, - - // GICv3 registers - {"icc_eoir1_el1", ICC_EOIR1_EL1}, - {"icc_eoir0_el1", ICC_EOIR0_EL1}, - {"icc_dir_el1", ICC_DIR_EL1}, - {"icc_sgi1r_el1", ICC_SGI1R_EL1}, - {"icc_asgi1r_el1", ICC_ASGI1R_EL1}, - {"icc_sgi0r_el1", ICC_SGI0R_EL1} -}; - -A64SysReg::MSRMapper::MSRMapper() { - InstPairs = &MSRPairs[0]; - NumInstPairs = llvm::array_lengthof(MSRPairs); -} - - -const NamedImmMapper::Mapping A64SysReg::SysRegMapper::SysRegPairs[] = { - {"osdtrrx_el1", OSDTRRX_EL1}, - {"osdtrtx_el1", OSDTRTX_EL1}, - {"teecr32_el1", TEECR32_EL1}, - {"mdccint_el1", MDCCINT_EL1}, - {"mdscr_el1", MDSCR_EL1}, - {"dbgdtr_el0", DBGDTR_EL0}, - {"oseccr_el1", OSECCR_EL1}, - {"dbgvcr32_el2", DBGVCR32_EL2}, - {"dbgbvr0_el1", DBGBVR0_EL1}, - {"dbgbvr1_el1", DBGBVR1_EL1}, - {"dbgbvr2_el1", DBGBVR2_EL1}, - {"dbgbvr3_el1", DBGBVR3_EL1}, - {"dbgbvr4_el1", DBGBVR4_EL1}, - {"dbgbvr5_el1", DBGBVR5_EL1}, - {"dbgbvr6_el1", DBGBVR6_EL1}, - {"dbgbvr7_el1", DBGBVR7_EL1}, - {"dbgbvr8_el1", DBGBVR8_EL1}, - {"dbgbvr9_el1", DBGBVR9_EL1}, - {"dbgbvr10_el1", DBGBVR10_EL1}, - {"dbgbvr11_el1", DBGBVR11_EL1}, - {"dbgbvr12_el1", DBGBVR12_EL1}, - {"dbgbvr13_el1", DBGBVR13_EL1}, - {"dbgbvr14_el1", DBGBVR14_EL1}, - {"dbgbvr15_el1", DBGBVR15_EL1}, - {"dbgbcr0_el1", DBGBCR0_EL1}, - {"dbgbcr1_el1", DBGBCR1_EL1}, - {"dbgbcr2_el1", DBGBCR2_EL1}, - {"dbgbcr3_el1", DBGBCR3_EL1}, - {"dbgbcr4_el1", DBGBCR4_EL1}, - {"dbgbcr5_el1", DBGBCR5_EL1}, - {"dbgbcr6_el1", DBGBCR6_EL1}, - {"dbgbcr7_el1", DBGBCR7_EL1}, - {"dbgbcr8_el1", DBGBCR8_EL1}, - {"dbgbcr9_el1", DBGBCR9_EL1}, - {"dbgbcr10_el1", DBGBCR10_EL1}, - {"dbgbcr11_el1", DBGBCR11_EL1}, - {"dbgbcr12_el1", DBGBCR12_EL1}, - {"dbgbcr13_el1", DBGBCR13_EL1}, - {"dbgbcr14_el1", DBGBCR14_EL1}, - {"dbgbcr15_el1", DBGBCR15_EL1}, - {"dbgwvr0_el1", DBGWVR0_EL1}, - {"dbgwvr1_el1", DBGWVR1_EL1}, - {"dbgwvr2_el1", DBGWVR2_EL1}, - {"dbgwvr3_el1", DBGWVR3_EL1}, - {"dbgwvr4_el1", DBGWVR4_EL1}, - {"dbgwvr5_el1", DBGWVR5_EL1}, - {"dbgwvr6_el1", DBGWVR6_EL1}, - {"dbgwvr7_el1", DBGWVR7_EL1}, - {"dbgwvr8_el1", DBGWVR8_EL1}, - {"dbgwvr9_el1", DBGWVR9_EL1}, - {"dbgwvr10_el1", DBGWVR10_EL1}, - {"dbgwvr11_el1", DBGWVR11_EL1}, - {"dbgwvr12_el1", DBGWVR12_EL1}, - {"dbgwvr13_el1", DBGWVR13_EL1}, - {"dbgwvr14_el1", DBGWVR14_EL1}, - {"dbgwvr15_el1", DBGWVR15_EL1}, - {"dbgwcr0_el1", DBGWCR0_EL1}, - {"dbgwcr1_el1", DBGWCR1_EL1}, - {"dbgwcr2_el1", DBGWCR2_EL1}, - {"dbgwcr3_el1", DBGWCR3_EL1}, - {"dbgwcr4_el1", DBGWCR4_EL1}, - {"dbgwcr5_el1", DBGWCR5_EL1}, - {"dbgwcr6_el1", DBGWCR6_EL1}, - {"dbgwcr7_el1", DBGWCR7_EL1}, - {"dbgwcr8_el1", DBGWCR8_EL1}, - {"dbgwcr9_el1", DBGWCR9_EL1}, - {"dbgwcr10_el1", DBGWCR10_EL1}, - {"dbgwcr11_el1", DBGWCR11_EL1}, - {"dbgwcr12_el1", DBGWCR12_EL1}, - {"dbgwcr13_el1", DBGWCR13_EL1}, - {"dbgwcr14_el1", DBGWCR14_EL1}, - {"dbgwcr15_el1", DBGWCR15_EL1}, - {"teehbr32_el1", TEEHBR32_EL1}, - {"osdlr_el1", OSDLR_EL1}, - {"dbgprcr_el1", DBGPRCR_EL1}, - {"dbgclaimset_el1", DBGCLAIMSET_EL1}, - {"dbgclaimclr_el1", DBGCLAIMCLR_EL1}, - {"csselr_el1", CSSELR_EL1}, - {"vpidr_el2", VPIDR_EL2}, - {"vmpidr_el2", VMPIDR_EL2}, - {"sctlr_el1", SCTLR_EL1}, - {"sctlr_el2", SCTLR_EL2}, - {"sctlr_el3", SCTLR_EL3}, - {"actlr_el1", ACTLR_EL1}, - {"actlr_el2", ACTLR_EL2}, - {"actlr_el3", ACTLR_EL3}, - {"cpacr_el1", CPACR_EL1}, - {"hcr_el2", HCR_EL2}, - {"scr_el3", SCR_EL3}, - {"mdcr_el2", MDCR_EL2}, - {"sder32_el3", SDER32_EL3}, - {"cptr_el2", CPTR_EL2}, - {"cptr_el3", CPTR_EL3}, - {"hstr_el2", HSTR_EL2}, - {"hacr_el2", HACR_EL2}, - {"mdcr_el3", MDCR_EL3}, - {"ttbr0_el1", TTBR0_EL1}, - {"ttbr0_el2", TTBR0_EL2}, - {"ttbr0_el3", TTBR0_EL3}, - {"ttbr1_el1", TTBR1_EL1}, - {"tcr_el1", TCR_EL1}, - {"tcr_el2", TCR_EL2}, - {"tcr_el3", TCR_EL3}, - {"vttbr_el2", VTTBR_EL2}, - {"vtcr_el2", VTCR_EL2}, - {"dacr32_el2", DACR32_EL2}, - {"spsr_el1", SPSR_EL1}, - {"spsr_el2", SPSR_EL2}, - {"spsr_el3", SPSR_EL3}, - {"elr_el1", ELR_EL1}, - {"elr_el2", ELR_EL2}, - {"elr_el3", ELR_EL3}, - {"sp_el0", SP_EL0}, - {"sp_el1", SP_EL1}, - {"sp_el2", SP_EL2}, - {"spsel", SPSel}, - {"nzcv", NZCV}, - {"daif", DAIF}, - {"currentel", CurrentEL}, - {"spsr_irq", SPSR_irq}, - {"spsr_abt", SPSR_abt}, - {"spsr_und", SPSR_und}, - {"spsr_fiq", SPSR_fiq}, - {"fpcr", FPCR}, - {"fpsr", FPSR}, - {"dspsr_el0", DSPSR_EL0}, - {"dlr_el0", DLR_EL0}, - {"ifsr32_el2", IFSR32_EL2}, - {"afsr0_el1", AFSR0_EL1}, - {"afsr0_el2", AFSR0_EL2}, - {"afsr0_el3", AFSR0_EL3}, - {"afsr1_el1", AFSR1_EL1}, - {"afsr1_el2", AFSR1_EL2}, - {"afsr1_el3", AFSR1_EL3}, - {"esr_el1", ESR_EL1}, - {"esr_el2", ESR_EL2}, - {"esr_el3", ESR_EL3}, - {"fpexc32_el2", FPEXC32_EL2}, - {"far_el1", FAR_EL1}, - {"far_el2", FAR_EL2}, - {"far_el3", FAR_EL3}, - {"hpfar_el2", HPFAR_EL2}, - {"par_el1", PAR_EL1}, - {"pmcr_el0", PMCR_EL0}, - {"pmcntenset_el0", PMCNTENSET_EL0}, - {"pmcntenclr_el0", PMCNTENCLR_EL0}, - {"pmovsclr_el0", PMOVSCLR_EL0}, - {"pmselr_el0", PMSELR_EL0}, - {"pmccntr_el0", PMCCNTR_EL0}, - {"pmxevtyper_el0", PMXEVTYPER_EL0}, - {"pmxevcntr_el0", PMXEVCNTR_EL0}, - {"pmuserenr_el0", PMUSERENR_EL0}, - {"pmintenset_el1", PMINTENSET_EL1}, - {"pmintenclr_el1", PMINTENCLR_EL1}, - {"pmovsset_el0", PMOVSSET_EL0}, - {"mair_el1", MAIR_EL1}, - {"mair_el2", MAIR_EL2}, - {"mair_el3", MAIR_EL3}, - {"amair_el1", AMAIR_EL1}, - {"amair_el2", AMAIR_EL2}, - {"amair_el3", AMAIR_EL3}, - {"vbar_el1", VBAR_EL1}, - {"vbar_el2", VBAR_EL2}, - {"vbar_el3", VBAR_EL3}, - {"rmr_el1", RMR_EL1}, - {"rmr_el2", RMR_EL2}, - {"rmr_el3", RMR_EL3}, - {"contextidr_el1", CONTEXTIDR_EL1}, - {"tpidr_el0", TPIDR_EL0}, - {"tpidr_el2", TPIDR_EL2}, - {"tpidr_el3", TPIDR_EL3}, - {"tpidrro_el0", TPIDRRO_EL0}, - {"tpidr_el1", TPIDR_EL1}, - {"cntfrq_el0", CNTFRQ_EL0}, - {"cntvoff_el2", CNTVOFF_EL2}, - {"cntkctl_el1", CNTKCTL_EL1}, - {"cnthctl_el2", CNTHCTL_EL2}, - {"cntp_tval_el0", CNTP_TVAL_EL0}, - {"cnthp_tval_el2", CNTHP_TVAL_EL2}, - {"cntps_tval_el1", CNTPS_TVAL_EL1}, - {"cntp_ctl_el0", CNTP_CTL_EL0}, - {"cnthp_ctl_el2", CNTHP_CTL_EL2}, - {"cntps_ctl_el1", CNTPS_CTL_EL1}, - {"cntp_cval_el0", CNTP_CVAL_EL0}, - {"cnthp_cval_el2", CNTHP_CVAL_EL2}, - {"cntps_cval_el1", CNTPS_CVAL_EL1}, - {"cntv_tval_el0", CNTV_TVAL_EL0}, - {"cntv_ctl_el0", CNTV_CTL_EL0}, - {"cntv_cval_el0", CNTV_CVAL_EL0}, - {"pmevcntr0_el0", PMEVCNTR0_EL0}, - {"pmevcntr1_el0", PMEVCNTR1_EL0}, - {"pmevcntr2_el0", PMEVCNTR2_EL0}, - {"pmevcntr3_el0", PMEVCNTR3_EL0}, - {"pmevcntr4_el0", PMEVCNTR4_EL0}, - {"pmevcntr5_el0", PMEVCNTR5_EL0}, - {"pmevcntr6_el0", PMEVCNTR6_EL0}, - {"pmevcntr7_el0", PMEVCNTR7_EL0}, - {"pmevcntr8_el0", PMEVCNTR8_EL0}, - {"pmevcntr9_el0", PMEVCNTR9_EL0}, - {"pmevcntr10_el0", PMEVCNTR10_EL0}, - {"pmevcntr11_el0", PMEVCNTR11_EL0}, - {"pmevcntr12_el0", PMEVCNTR12_EL0}, - {"pmevcntr13_el0", PMEVCNTR13_EL0}, - {"pmevcntr14_el0", PMEVCNTR14_EL0}, - {"pmevcntr15_el0", PMEVCNTR15_EL0}, - {"pmevcntr16_el0", PMEVCNTR16_EL0}, - {"pmevcntr17_el0", PMEVCNTR17_EL0}, - {"pmevcntr18_el0", PMEVCNTR18_EL0}, - {"pmevcntr19_el0", PMEVCNTR19_EL0}, - {"pmevcntr20_el0", PMEVCNTR20_EL0}, - {"pmevcntr21_el0", PMEVCNTR21_EL0}, - {"pmevcntr22_el0", PMEVCNTR22_EL0}, - {"pmevcntr23_el0", PMEVCNTR23_EL0}, - {"pmevcntr24_el0", PMEVCNTR24_EL0}, - {"pmevcntr25_el0", PMEVCNTR25_EL0}, - {"pmevcntr26_el0", PMEVCNTR26_EL0}, - {"pmevcntr27_el0", PMEVCNTR27_EL0}, - {"pmevcntr28_el0", PMEVCNTR28_EL0}, - {"pmevcntr29_el0", PMEVCNTR29_EL0}, - {"pmevcntr30_el0", PMEVCNTR30_EL0}, - {"pmccfiltr_el0", PMCCFILTR_EL0}, - {"pmevtyper0_el0", PMEVTYPER0_EL0}, - {"pmevtyper1_el0", PMEVTYPER1_EL0}, - {"pmevtyper2_el0", PMEVTYPER2_EL0}, - {"pmevtyper3_el0", PMEVTYPER3_EL0}, - {"pmevtyper4_el0", PMEVTYPER4_EL0}, - {"pmevtyper5_el0", PMEVTYPER5_EL0}, - {"pmevtyper6_el0", PMEVTYPER6_EL0}, - {"pmevtyper7_el0", PMEVTYPER7_EL0}, - {"pmevtyper8_el0", PMEVTYPER8_EL0}, - {"pmevtyper9_el0", PMEVTYPER9_EL0}, - {"pmevtyper10_el0", PMEVTYPER10_EL0}, - {"pmevtyper11_el0", PMEVTYPER11_EL0}, - {"pmevtyper12_el0", PMEVTYPER12_EL0}, - {"pmevtyper13_el0", PMEVTYPER13_EL0}, - {"pmevtyper14_el0", PMEVTYPER14_EL0}, - {"pmevtyper15_el0", PMEVTYPER15_EL0}, - {"pmevtyper16_el0", PMEVTYPER16_EL0}, - {"pmevtyper17_el0", PMEVTYPER17_EL0}, - {"pmevtyper18_el0", PMEVTYPER18_EL0}, - {"pmevtyper19_el0", PMEVTYPER19_EL0}, - {"pmevtyper20_el0", PMEVTYPER20_EL0}, - {"pmevtyper21_el0", PMEVTYPER21_EL0}, - {"pmevtyper22_el0", PMEVTYPER22_EL0}, - {"pmevtyper23_el0", PMEVTYPER23_EL0}, - {"pmevtyper24_el0", PMEVTYPER24_EL0}, - {"pmevtyper25_el0", PMEVTYPER25_EL0}, - {"pmevtyper26_el0", PMEVTYPER26_EL0}, - {"pmevtyper27_el0", PMEVTYPER27_EL0}, - {"pmevtyper28_el0", PMEVTYPER28_EL0}, - {"pmevtyper29_el0", PMEVTYPER29_EL0}, - {"pmevtyper30_el0", PMEVTYPER30_EL0}, - - // Trace registers - {"trcprgctlr", TRCPRGCTLR}, - {"trcprocselr", TRCPROCSELR}, - {"trcconfigr", TRCCONFIGR}, - {"trcauxctlr", TRCAUXCTLR}, - {"trceventctl0r", TRCEVENTCTL0R}, - {"trceventctl1r", TRCEVENTCTL1R}, - {"trcstallctlr", TRCSTALLCTLR}, - {"trctsctlr", TRCTSCTLR}, - {"trcsyncpr", TRCSYNCPR}, - {"trcccctlr", TRCCCCTLR}, - {"trcbbctlr", TRCBBCTLR}, - {"trctraceidr", TRCTRACEIDR}, - {"trcqctlr", TRCQCTLR}, - {"trcvictlr", TRCVICTLR}, - {"trcviiectlr", TRCVIIECTLR}, - {"trcvissctlr", TRCVISSCTLR}, - {"trcvipcssctlr", TRCVIPCSSCTLR}, - {"trcvdctlr", TRCVDCTLR}, - {"trcvdsacctlr", TRCVDSACCTLR}, - {"trcvdarcctlr", TRCVDARCCTLR}, - {"trcseqevr0", TRCSEQEVR0}, - {"trcseqevr1", TRCSEQEVR1}, - {"trcseqevr2", TRCSEQEVR2}, - {"trcseqrstevr", TRCSEQRSTEVR}, - {"trcseqstr", TRCSEQSTR}, - {"trcextinselr", TRCEXTINSELR}, - {"trccntrldvr0", TRCCNTRLDVR0}, - {"trccntrldvr1", TRCCNTRLDVR1}, - {"trccntrldvr2", TRCCNTRLDVR2}, - {"trccntrldvr3", TRCCNTRLDVR3}, - {"trccntctlr0", TRCCNTCTLR0}, - {"trccntctlr1", TRCCNTCTLR1}, - {"trccntctlr2", TRCCNTCTLR2}, - {"trccntctlr3", TRCCNTCTLR3}, - {"trccntvr0", TRCCNTVR0}, - {"trccntvr1", TRCCNTVR1}, - {"trccntvr2", TRCCNTVR2}, - {"trccntvr3", TRCCNTVR3}, - {"trcimspec0", TRCIMSPEC0}, - {"trcimspec1", TRCIMSPEC1}, - {"trcimspec2", TRCIMSPEC2}, - {"trcimspec3", TRCIMSPEC3}, - {"trcimspec4", TRCIMSPEC4}, - {"trcimspec5", TRCIMSPEC5}, - {"trcimspec6", TRCIMSPEC6}, - {"trcimspec7", TRCIMSPEC7}, - {"trcrsctlr2", TRCRSCTLR2}, - {"trcrsctlr3", TRCRSCTLR3}, - {"trcrsctlr4", TRCRSCTLR4}, - {"trcrsctlr5", TRCRSCTLR5}, - {"trcrsctlr6", TRCRSCTLR6}, - {"trcrsctlr7", TRCRSCTLR7}, - {"trcrsctlr8", TRCRSCTLR8}, - {"trcrsctlr9", TRCRSCTLR9}, - {"trcrsctlr10", TRCRSCTLR10}, - {"trcrsctlr11", TRCRSCTLR11}, - {"trcrsctlr12", TRCRSCTLR12}, - {"trcrsctlr13", TRCRSCTLR13}, - {"trcrsctlr14", TRCRSCTLR14}, - {"trcrsctlr15", TRCRSCTLR15}, - {"trcrsctlr16", TRCRSCTLR16}, - {"trcrsctlr17", TRCRSCTLR17}, - {"trcrsctlr18", TRCRSCTLR18}, - {"trcrsctlr19", TRCRSCTLR19}, - {"trcrsctlr20", TRCRSCTLR20}, - {"trcrsctlr21", TRCRSCTLR21}, - {"trcrsctlr22", TRCRSCTLR22}, - {"trcrsctlr23", TRCRSCTLR23}, - {"trcrsctlr24", TRCRSCTLR24}, - {"trcrsctlr25", TRCRSCTLR25}, - {"trcrsctlr26", TRCRSCTLR26}, - {"trcrsctlr27", TRCRSCTLR27}, - {"trcrsctlr28", TRCRSCTLR28}, - {"trcrsctlr29", TRCRSCTLR29}, - {"trcrsctlr30", TRCRSCTLR30}, - {"trcrsctlr31", TRCRSCTLR31}, - {"trcssccr0", TRCSSCCR0}, - {"trcssccr1", TRCSSCCR1}, - {"trcssccr2", TRCSSCCR2}, - {"trcssccr3", TRCSSCCR3}, - {"trcssccr4", TRCSSCCR4}, - {"trcssccr5", TRCSSCCR5}, - {"trcssccr6", TRCSSCCR6}, - {"trcssccr7", TRCSSCCR7}, - {"trcsscsr0", TRCSSCSR0}, - {"trcsscsr1", TRCSSCSR1}, - {"trcsscsr2", TRCSSCSR2}, - {"trcsscsr3", TRCSSCSR3}, - {"trcsscsr4", TRCSSCSR4}, - {"trcsscsr5", TRCSSCSR5}, - {"trcsscsr6", TRCSSCSR6}, - {"trcsscsr7", TRCSSCSR7}, - {"trcsspcicr0", TRCSSPCICR0}, - {"trcsspcicr1", TRCSSPCICR1}, - {"trcsspcicr2", TRCSSPCICR2}, - {"trcsspcicr3", TRCSSPCICR3}, - {"trcsspcicr4", TRCSSPCICR4}, - {"trcsspcicr5", TRCSSPCICR5}, - {"trcsspcicr6", TRCSSPCICR6}, - {"trcsspcicr7", TRCSSPCICR7}, - {"trcpdcr", TRCPDCR}, - {"trcacvr0", TRCACVR0}, - {"trcacvr1", TRCACVR1}, - {"trcacvr2", TRCACVR2}, - {"trcacvr3", TRCACVR3}, - {"trcacvr4", TRCACVR4}, - {"trcacvr5", TRCACVR5}, - {"trcacvr6", TRCACVR6}, - {"trcacvr7", TRCACVR7}, - {"trcacvr8", TRCACVR8}, - {"trcacvr9", TRCACVR9}, - {"trcacvr10", TRCACVR10}, - {"trcacvr11", TRCACVR11}, - {"trcacvr12", TRCACVR12}, - {"trcacvr13", TRCACVR13}, - {"trcacvr14", TRCACVR14}, - {"trcacvr15", TRCACVR15}, - {"trcacatr0", TRCACATR0}, - {"trcacatr1", TRCACATR1}, - {"trcacatr2", TRCACATR2}, - {"trcacatr3", TRCACATR3}, - {"trcacatr4", TRCACATR4}, - {"trcacatr5", TRCACATR5}, - {"trcacatr6", TRCACATR6}, - {"trcacatr7", TRCACATR7}, - {"trcacatr8", TRCACATR8}, - {"trcacatr9", TRCACATR9}, - {"trcacatr10", TRCACATR10}, - {"trcacatr11", TRCACATR11}, - {"trcacatr12", TRCACATR12}, - {"trcacatr13", TRCACATR13}, - {"trcacatr14", TRCACATR14}, - {"trcacatr15", TRCACATR15}, - {"trcdvcvr0", TRCDVCVR0}, - {"trcdvcvr1", TRCDVCVR1}, - {"trcdvcvr2", TRCDVCVR2}, - {"trcdvcvr3", TRCDVCVR3}, - {"trcdvcvr4", TRCDVCVR4}, - {"trcdvcvr5", TRCDVCVR5}, - {"trcdvcvr6", TRCDVCVR6}, - {"trcdvcvr7", TRCDVCVR7}, - {"trcdvcmr0", TRCDVCMR0}, - {"trcdvcmr1", TRCDVCMR1}, - {"trcdvcmr2", TRCDVCMR2}, - {"trcdvcmr3", TRCDVCMR3}, - {"trcdvcmr4", TRCDVCMR4}, - {"trcdvcmr5", TRCDVCMR5}, - {"trcdvcmr6", TRCDVCMR6}, - {"trcdvcmr7", TRCDVCMR7}, - {"trccidcvr0", TRCCIDCVR0}, - {"trccidcvr1", TRCCIDCVR1}, - {"trccidcvr2", TRCCIDCVR2}, - {"trccidcvr3", TRCCIDCVR3}, - {"trccidcvr4", TRCCIDCVR4}, - {"trccidcvr5", TRCCIDCVR5}, - {"trccidcvr6", TRCCIDCVR6}, - {"trccidcvr7", TRCCIDCVR7}, - {"trcvmidcvr0", TRCVMIDCVR0}, - {"trcvmidcvr1", TRCVMIDCVR1}, - {"trcvmidcvr2", TRCVMIDCVR2}, - {"trcvmidcvr3", TRCVMIDCVR3}, - {"trcvmidcvr4", TRCVMIDCVR4}, - {"trcvmidcvr5", TRCVMIDCVR5}, - {"trcvmidcvr6", TRCVMIDCVR6}, - {"trcvmidcvr7", TRCVMIDCVR7}, - {"trccidcctlr0", TRCCIDCCTLR0}, - {"trccidcctlr1", TRCCIDCCTLR1}, - {"trcvmidcctlr0", TRCVMIDCCTLR0}, - {"trcvmidcctlr1", TRCVMIDCCTLR1}, - {"trcitctrl", TRCITCTRL}, - {"trcclaimset", TRCCLAIMSET}, - {"trcclaimclr", TRCCLAIMCLR}, - - // GICv3 registers - {"icc_bpr1_el1", ICC_BPR1_EL1}, - {"icc_bpr0_el1", ICC_BPR0_EL1}, - {"icc_pmr_el1", ICC_PMR_EL1}, - {"icc_ctlr_el1", ICC_CTLR_EL1}, - {"icc_ctlr_el3", ICC_CTLR_EL3}, - {"icc_sre_el1", ICC_SRE_EL1}, - {"icc_sre_el2", ICC_SRE_EL2}, - {"icc_sre_el3", ICC_SRE_EL3}, - {"icc_igrpen0_el1", ICC_IGRPEN0_EL1}, - {"icc_igrpen1_el1", ICC_IGRPEN1_EL1}, - {"icc_igrpen1_el3", ICC_IGRPEN1_EL3}, - {"icc_seien_el1", ICC_SEIEN_EL1}, - {"icc_ap0r0_el1", ICC_AP0R0_EL1}, - {"icc_ap0r1_el1", ICC_AP0R1_EL1}, - {"icc_ap0r2_el1", ICC_AP0R2_EL1}, - {"icc_ap0r3_el1", ICC_AP0R3_EL1}, - {"icc_ap1r0_el1", ICC_AP1R0_EL1}, - {"icc_ap1r1_el1", ICC_AP1R1_EL1}, - {"icc_ap1r2_el1", ICC_AP1R2_EL1}, - {"icc_ap1r3_el1", ICC_AP1R3_EL1}, - {"ich_ap0r0_el2", ICH_AP0R0_EL2}, - {"ich_ap0r1_el2", ICH_AP0R1_EL2}, - {"ich_ap0r2_el2", ICH_AP0R2_EL2}, - {"ich_ap0r3_el2", ICH_AP0R3_EL2}, - {"ich_ap1r0_el2", ICH_AP1R0_EL2}, - {"ich_ap1r1_el2", ICH_AP1R1_EL2}, - {"ich_ap1r2_el2", ICH_AP1R2_EL2}, - {"ich_ap1r3_el2", ICH_AP1R3_EL2}, - {"ich_hcr_el2", ICH_HCR_EL2}, - {"ich_misr_el2", ICH_MISR_EL2}, - {"ich_vmcr_el2", ICH_VMCR_EL2}, - {"ich_vseir_el2", ICH_VSEIR_EL2}, - {"ich_lr0_el2", ICH_LR0_EL2}, - {"ich_lr1_el2", ICH_LR1_EL2}, - {"ich_lr2_el2", ICH_LR2_EL2}, - {"ich_lr3_el2", ICH_LR3_EL2}, - {"ich_lr4_el2", ICH_LR4_EL2}, - {"ich_lr5_el2", ICH_LR5_EL2}, - {"ich_lr6_el2", ICH_LR6_EL2}, - {"ich_lr7_el2", ICH_LR7_EL2}, - {"ich_lr8_el2", ICH_LR8_EL2}, - {"ich_lr9_el2", ICH_LR9_EL2}, - {"ich_lr10_el2", ICH_LR10_EL2}, - {"ich_lr11_el2", ICH_LR11_EL2}, - {"ich_lr12_el2", ICH_LR12_EL2}, - {"ich_lr13_el2", ICH_LR13_EL2}, - {"ich_lr14_el2", ICH_LR14_EL2}, - {"ich_lr15_el2", ICH_LR15_EL2} -}; - -uint32_t -A64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { - // First search the registers shared by all - std::string NameLower = Name.lower(); - for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { - if (SysRegPairs[i].Name == NameLower) { - Valid = true; - return SysRegPairs[i].Value; - } - } - - // Now try the instruction-specific registers (either read-only or - // write-only). - for (unsigned i = 0; i < NumInstPairs; ++i) { - if (InstPairs[i].Name == NameLower) { - Valid = true; - return InstPairs[i].Value; - } - } - - // Try to parse an S____ register name, where the bits - // are: 11 xxx 1x11 xxxx xxx - Regex GenericRegPattern("^s3_([0-7])_c(1[15])_c([0-9]|1[0-5])_([0-7])$"); - - SmallVector Ops; - if (!GenericRegPattern.match(NameLower, &Ops)) { - Valid = false; - return -1; - } - - uint32_t Op0 = 3, Op1 = 0, CRn = 0, CRm = 0, Op2 = 0; - uint32_t Bits; - Ops[1].getAsInteger(10, Op1); - Ops[2].getAsInteger(10, CRn); - Ops[3].getAsInteger(10, CRm); - Ops[4].getAsInteger(10, Op2); - Bits = (Op0 << 14) | (Op1 << 11) | (CRn << 7) | (CRm << 3) | Op2; - - Valid = true; - return Bits; -} - -std::string -A64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const { - for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { - if (SysRegPairs[i].Value == Bits) { - Valid = true; - return SysRegPairs[i].Name; - } - } - - for (unsigned i = 0; i < NumInstPairs; ++i) { - if (InstPairs[i].Value == Bits) { - Valid = true; - return InstPairs[i].Name; - } - } - - uint32_t Op0 = (Bits >> 14) & 0x3; - uint32_t Op1 = (Bits >> 11) & 0x7; - uint32_t CRn = (Bits >> 7) & 0xf; - uint32_t CRm = (Bits >> 3) & 0xf; - uint32_t Op2 = Bits & 0x7; - - // Only combinations matching: 11 xxx 1x11 xxxx xxx are valid for a generic - // name. - if (Op0 != 3 || (CRn != 11 && CRn != 15)) { - Valid = false; - return ""; - } - - assert(Op0 == 3 && (CRn == 11 || CRn == 15) && "Invalid generic sysreg"); - - Valid = true; - return "s3_" + utostr(Op1) + "_c" + utostr(CRn) - + "_c" + utostr(CRm) + "_" + utostr(Op2); -} - -const NamedImmMapper::Mapping A64TLBI::TLBIMapper::TLBIPairs[] = { - {"ipas2e1is", IPAS2E1IS}, - {"ipas2le1is", IPAS2LE1IS}, - {"vmalle1is", VMALLE1IS}, - {"alle2is", ALLE2IS}, - {"alle3is", ALLE3IS}, - {"vae1is", VAE1IS}, - {"vae2is", VAE2IS}, - {"vae3is", VAE3IS}, - {"aside1is", ASIDE1IS}, - {"vaae1is", VAAE1IS}, - {"alle1is", ALLE1IS}, - {"vale1is", VALE1IS}, - {"vale2is", VALE2IS}, - {"vale3is", VALE3IS}, - {"vmalls12e1is", VMALLS12E1IS}, - {"vaale1is", VAALE1IS}, - {"ipas2e1", IPAS2E1}, - {"ipas2le1", IPAS2LE1}, - {"vmalle1", VMALLE1}, - {"alle2", ALLE2}, - {"alle3", ALLE3}, - {"vae1", VAE1}, - {"vae2", VAE2}, - {"vae3", VAE3}, - {"aside1", ASIDE1}, - {"vaae1", VAAE1}, - {"alle1", ALLE1}, - {"vale1", VALE1}, - {"vale2", VALE2}, - {"vale3", VALE3}, - {"vmalls12e1", VMALLS12E1}, - {"vaale1", VAALE1} -}; - -A64TLBI::TLBIMapper::TLBIMapper() - : NamedImmMapper(TLBIPairs, 0) {} - -bool A64Imms::isFPImm(const APFloat &Val, uint32_t &Imm8Bits) { - const fltSemantics &Sem = Val.getSemantics(); - unsigned FracBits = APFloat::semanticsPrecision(Sem) - 1; - - uint32_t ExpMask; - switch (FracBits) { - case 10: // IEEE half-precision - ExpMask = 0x1f; - break; - case 23: // IEEE single-precision - ExpMask = 0xff; - break; - case 52: // IEEE double-precision - ExpMask = 0x7ff; - break; - case 112: // IEEE quad-precision - // No immediates are valid for double precision. - return false; - default: - llvm_unreachable("Only half, single and double precision supported"); - } - - uint32_t ExpStart = FracBits; - uint64_t FracMask = (1ULL << FracBits) - 1; - - uint32_t Sign = Val.isNegative(); - - uint64_t Bits= Val.bitcastToAPInt().getLimitedValue(); - uint64_t Fraction = Bits & FracMask; - int32_t Exponent = ((Bits >> ExpStart) & ExpMask); - Exponent -= ExpMask >> 1; - - // S[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 5):imm8<5:0>:Zeros(19) - // D[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 8):imm8<5:0>:Zeros(48) - // This translates to: only 4 bits of fraction; -3 <= exp <= 4. - uint64_t A64FracStart = FracBits - 4; - uint64_t A64FracMask = 0xf; - - // Are there too many fraction bits? - if (Fraction & ~(A64FracMask << A64FracStart)) - return false; - - if (Exponent < -3 || Exponent > 4) - return false; - - uint32_t PackedFraction = (Fraction >> A64FracStart) & A64FracMask; - uint32_t PackedExp = (Exponent + 7) & 0x7; - - Imm8Bits = (Sign << 7) | (PackedExp << 4) | PackedFraction; - return true; -} - -// Encoding of the immediate for logical (immediate) instructions: -// -// | N | imms | immr | size | R | S | -// |---+--------+--------+------+--------------+--------------| -// | 1 | ssssss | rrrrrr | 64 | UInt(rrrrrr) | UInt(ssssss) | -// | 0 | 0sssss | xrrrrr | 32 | UInt(rrrrr) | UInt(sssss) | -// | 0 | 10ssss | xxrrrr | 16 | UInt(rrrr) | UInt(ssss) | -// | 0 | 110sss | xxxrrr | 8 | UInt(rrr) | UInt(sss) | -// | 0 | 1110ss | xxxxrr | 4 | UInt(rr) | UInt(ss) | -// | 0 | 11110s | xxxxxr | 2 | UInt(r) | UInt(s) | -// | 0 | 11111x | - | | UNALLOCATED | | -// -// Columns 'R', 'S' and 'size' specify a "bitmask immediate" of size bits in -// which the lower S+1 bits are ones and the remaining bits are zero, then -// rotated right by R bits, which is then replicated across the datapath. -// -// + Values of 'N', 'imms' and 'immr' which do not match the above table are -// RESERVED. -// + If all 's' bits in the imms field are set then the instruction is -// RESERVED. -// + The 'x' bits in the 'immr' field are IGNORED. - -bool A64Imms::isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits) { - int RepeatWidth; - int Rotation = 0; - int Num1s = 0; - - // Because there are S+1 ones in the replicated mask, an immediate of all - // zeros is not allowed. Filtering it here is probably more efficient. - if (Imm == 0) return false; - - for (RepeatWidth = RegWidth; RepeatWidth > 1; RepeatWidth /= 2) { - uint64_t RepeatMask = RepeatWidth == 64 ? -1 : (1ULL << RepeatWidth) - 1; - uint64_t ReplicatedMask = Imm & RepeatMask; - - if (ReplicatedMask == 0) continue; - - // First we have to make sure the mask is actually repeated in each slot for - // this width-specifier. - bool IsReplicatedMask = true; - for (unsigned i = RepeatWidth; i < RegWidth; i += RepeatWidth) { - if (((Imm >> i) & RepeatMask) != ReplicatedMask) { - IsReplicatedMask = false; - break; - } - } - if (!IsReplicatedMask) continue; - - // Now we have to work out the amount of rotation needed. The first part of - // this calculation is actually independent of RepeatWidth, but the complex - // case will depend on it. - Rotation = countTrailingZeros(Imm); - if (Rotation == 0) { - // There were no leading zeros, which means it's either in place or there - // are 1s at each end (e.g. 0x8003 needs rotating). - Rotation = RegWidth == 64 ? CountLeadingOnes_64(Imm) - : CountLeadingOnes_32(Imm); - Rotation = RepeatWidth - Rotation; - } - - uint64_t ReplicatedOnes = ReplicatedMask; - if (Rotation != 0 && Rotation != 64) - ReplicatedOnes = (ReplicatedMask >> Rotation) - | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask); - - // Of course, they may not actually be ones, so we have to check that: - if (!isMask_64(ReplicatedOnes)) - continue; - - Num1s = CountTrailingOnes_64(ReplicatedOnes); - - // We know we've got an almost valid encoding (certainly, if this is invalid - // no other parameters would work). - break; - } - - // The encodings which would produce all 1s are RESERVED. - if (RepeatWidth == 1 || Num1s == RepeatWidth) return false; - - uint32_t N = RepeatWidth == 64; - uint32_t ImmR = RepeatWidth - Rotation; - uint32_t ImmS = Num1s - 1; - - switch (RepeatWidth) { - default: break; // No action required for other valid rotations. - case 16: ImmS |= 0x20; break; // 10ssss - case 8: ImmS |= 0x30; break; // 110sss - case 4: ImmS |= 0x38; break; // 1110ss - case 2: ImmS |= 0x3c; break; // 11110s - } - - Bits = ImmS | (ImmR << 6) | (N << 12); - - return true; -} - - -bool A64Imms::isLogicalImmBits(unsigned RegWidth, uint32_t Bits, - uint64_t &Imm) { - uint32_t N = Bits >> 12; - uint32_t ImmR = (Bits >> 6) & 0x3f; - uint32_t ImmS = Bits & 0x3f; - - // N=1 encodes a 64-bit replication and is invalid for the 32-bit - // instructions. - if (RegWidth == 32 && N != 0) return false; - - int Width = 0; - if (N == 1) - Width = 64; - else if ((ImmS & 0x20) == 0) - Width = 32; - else if ((ImmS & 0x10) == 0) - Width = 16; - else if ((ImmS & 0x08) == 0) - Width = 8; - else if ((ImmS & 0x04) == 0) - Width = 4; - else if ((ImmS & 0x02) == 0) - Width = 2; - else { - // ImmS is 0b11111x: UNALLOCATED - return false; - } - - int Num1s = (ImmS & (Width - 1)) + 1; - - // All encodings which would map to -1 (signed) are RESERVED. - if (Num1s == Width) return false; - - int Rotation = (ImmR & (Width - 1)); - uint64_t Mask = (1ULL << Num1s) - 1; - uint64_t WidthMask = Width == 64 ? -1 : (1ULL << Width) - 1; - if (Rotation != 0 && Rotation != 64) - Mask = (Mask >> Rotation) - | ((Mask << (Width - Rotation)) & WidthMask); - - Imm = Mask; - for (unsigned i = 1; i < RegWidth / Width; ++i) { - Mask <<= Width; - Imm |= Mask; - } - - return true; -} - -bool A64Imms::isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) { - // If high bits are set then a 32-bit MOVZ can't possibly work. - if (RegWidth == 32 && (Value & ~0xffffffffULL)) - return false; - - for (int i = 0; i < RegWidth; i += 16) { - // If the value is 0 when we mask out all the bits that could be set with - // the current LSL value then it's representable. - if ((Value & ~(0xffffULL << i)) == 0) { - Shift = i / 16; - UImm16 = (Value >> i) & 0xffff; - return true; - } - } - return false; -} - -bool A64Imms::isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) { - // MOVN is defined to set its register to NOT(LSL(imm16, shift)). - - // We have to be a little careful about a 32-bit register: 0xffff_1234 *is* - // representable, but ~0xffff_1234 == 0xffff_ffff_0000_edcb which is not - // a valid input for isMOVZImm. - if (RegWidth == 32 && (Value & ~0xffffffffULL)) - return false; - - uint64_t MOVZEquivalent = RegWidth == 32 ? ~Value & 0xffffffff : ~Value; - - return isMOVZImm(RegWidth, MOVZEquivalent, UImm16, Shift); -} - -bool A64Imms::isOnlyMOVNImm(int RegWidth, uint64_t Value, - int &UImm16, int &Shift) { - if (isMOVZImm(RegWidth, Value, UImm16, Shift)) - return false; - - return isMOVNImm(RegWidth, Value, UImm16, Shift); -} - -// decodeNeonModShiftImm - Decode a Neon OpCmode value into the -// the shift amount and the shift type (shift zeros or ones in) and -// returns whether the OpCmode value implies a shift operation. -bool A64Imms::decodeNeonModShiftImm(unsigned OpCmode, unsigned &ShiftImm, - unsigned &ShiftOnesIn) { - ShiftImm = 0; - ShiftOnesIn = false; - bool HasShift = true; - - if (OpCmode == 0xe) { - // movi byte - HasShift = false; - } else if (OpCmode == 0x1e) { - // movi 64-bit bytemask - HasShift = false; - } else if ((OpCmode & 0xc) == 0x8) { - // shift zeros, per halfword - ShiftImm = ((OpCmode & 0x2) >> 1); - } else if ((OpCmode & 0x8) == 0) { - // shift zeros, per word - ShiftImm = ((OpCmode & 0x6) >> 1); - } else if ((OpCmode & 0xe) == 0xc) { - // shift ones, per word - ShiftOnesIn = true; - ShiftImm = (OpCmode & 0x1); - } else { - // per byte, per bytemask - llvm_unreachable("Unsupported Neon modified immediate"); - } - - return HasShift; -} - -// decodeNeonModImm - Decode a NEON modified immediate and OpCmode values -// into the element value and the element size in bits. -uint64_t A64Imms::decodeNeonModImm(unsigned Val, unsigned OpCmode, - unsigned &EltBits) { - uint64_t DecodedVal = Val; - EltBits = 0; - - if (OpCmode == 0xe) { - // movi byte - EltBits = 8; - } else if (OpCmode == 0x1e) { - // movi 64-bit bytemask - DecodedVal = 0; - for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) { - if ((Val >> ByteNum) & 1) - DecodedVal |= (uint64_t)0xff << (8 * ByteNum); - } - EltBits = 64; - } else if ((OpCmode & 0xc) == 0x8) { - // shift zeros, per halfword - EltBits = 16; - } else if ((OpCmode & 0x8) == 0) { - // shift zeros, per word - EltBits = 32; - } else if ((OpCmode & 0xe) == 0xc) { - // shift ones, per word - EltBits = 32; - } else { - llvm_unreachable("Unsupported Neon modified immediate"); - } - return DecodedVal; -} diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h deleted file mode 100644 index 39b042b7208a..000000000000 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ /dev/null @@ -1,1138 +0,0 @@ -//===-- AArch64BaseInfo.h - Top level definitions for AArch64- --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains small standalone helper functions and enum definitions for -// the AArch64 target useful for the compiler back-end and the MC libraries. -// As such, it deliberately does not include references to LLVM core -// code gen types, passes, etc.. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64_BASEINFO_H -#define LLVM_AARCH64_BASEINFO_H - -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/Support/ErrorHandling.h" - -namespace llvm { - -// // Enums corresponding to AArch64 condition codes -namespace A64CC { - // The CondCodes constants map directly to the 4-bit encoding of the - // condition field for predicated instructions. - enum CondCodes { // Meaning (integer) Meaning (floating-point) - EQ = 0, // Equal Equal - NE, // Not equal Not equal, or unordered - HS, // Unsigned higher or same >, ==, or unordered - LO, // Unsigned lower or same Less than - MI, // Minus, negative Less than - PL, // Plus, positive or zero >, ==, or unordered - VS, // Overflow Unordered - VC, // No overflow Ordered - HI, // Unsigned higher Greater than, or unordered - LS, // Unsigned lower or same Less than or equal - GE, // Greater than or equal Greater than or equal - LT, // Less than Less than, or unordered - GT, // Signed greater than Greater than - LE, // Signed less than or equal <, ==, or unordered - AL, // Always (unconditional) Always (unconditional) - NV, // Always (unconditional) Always (unconditional) - // Note the NV exists purely to disassemble 0b1111. Execution - // is "always". - Invalid - }; - -} // namespace A64CC - -inline static const char *A64CondCodeToString(A64CC::CondCodes CC) { - switch (CC) { - default: llvm_unreachable("Unknown condition code"); - case A64CC::EQ: return "eq"; - case A64CC::NE: return "ne"; - case A64CC::HS: return "hs"; - case A64CC::LO: return "lo"; - case A64CC::MI: return "mi"; - case A64CC::PL: return "pl"; - case A64CC::VS: return "vs"; - case A64CC::VC: return "vc"; - case A64CC::HI: return "hi"; - case A64CC::LS: return "ls"; - case A64CC::GE: return "ge"; - case A64CC::LT: return "lt"; - case A64CC::GT: return "gt"; - case A64CC::LE: return "le"; - case A64CC::AL: return "al"; - case A64CC::NV: return "nv"; - } -} - -inline static A64CC::CondCodes A64StringToCondCode(StringRef CondStr) { - return StringSwitch(CondStr.lower()) - .Case("eq", A64CC::EQ) - .Case("ne", A64CC::NE) - .Case("ne", A64CC::NE) - .Case("hs", A64CC::HS) - .Case("cs", A64CC::HS) - .Case("lo", A64CC::LO) - .Case("cc", A64CC::LO) - .Case("mi", A64CC::MI) - .Case("pl", A64CC::PL) - .Case("vs", A64CC::VS) - .Case("vc", A64CC::VC) - .Case("hi", A64CC::HI) - .Case("ls", A64CC::LS) - .Case("ge", A64CC::GE) - .Case("lt", A64CC::LT) - .Case("gt", A64CC::GT) - .Case("le", A64CC::LE) - .Case("al", A64CC::AL) - .Case("nv", A64CC::NV) - .Default(A64CC::Invalid); -} - -inline static A64CC::CondCodes A64InvertCondCode(A64CC::CondCodes CC) { - // It turns out that the condition codes have been designed so that in order - // to reverse the intent of the condition you only have to invert the low bit: - - return static_cast(static_cast(CC) ^ 0x1); -} - -/// Instances of this class can perform bidirectional mapping from random -/// identifier strings to operand encodings. For example "MSR" takes a named -/// system-register which must be encoded somehow and decoded for printing. This -/// central location means that the information for those transformations is not -/// duplicated and remains in sync. -/// -/// FIXME: currently the algorithm is a completely unoptimised linear -/// search. Obviously this could be improved, but we would probably want to work -/// out just how often these instructions are emitted before working on it. It -/// might even be optimal to just reorder the tables for the common instructions -/// rather than changing the algorithm. -struct NamedImmMapper { - struct Mapping { - const char *Name; - uint32_t Value; - }; - - template - NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm) - : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {} - - StringRef toString(uint32_t Value, bool &Valid) const; - uint32_t fromString(StringRef Name, bool &Valid) const; - - /// Many of the instructions allow an alternative assembly form consisting of - /// a simple immediate. Currently the only valid forms are ranges [0, N) where - /// N being 0 indicates no immediate syntax-form is allowed. - bool validImm(uint32_t Value) const; -protected: - const Mapping *Pairs; - size_t NumPairs; - uint32_t TooBigImm; -}; - -namespace A64AT { - enum ATValues { - Invalid = -1, // Op0 Op1 CRn CRm Op2 - S1E1R = 0x43c0, // 01 000 0111 1000 000 - S1E2R = 0x63c0, // 01 100 0111 1000 000 - S1E3R = 0x73c0, // 01 110 0111 1000 000 - S1E1W = 0x43c1, // 01 000 0111 1000 001 - S1E2W = 0x63c1, // 01 100 0111 1000 001 - S1E3W = 0x73c1, // 01 110 0111 1000 001 - S1E0R = 0x43c2, // 01 000 0111 1000 010 - S1E0W = 0x43c3, // 01 000 0111 1000 011 - S12E1R = 0x63c4, // 01 100 0111 1000 100 - S12E1W = 0x63c5, // 01 100 0111 1000 101 - S12E0R = 0x63c6, // 01 100 0111 1000 110 - S12E0W = 0x63c7 // 01 100 0111 1000 111 - }; - - struct ATMapper : NamedImmMapper { - const static Mapping ATPairs[]; - - ATMapper(); - }; - -} -namespace A64DB { - enum DBValues { - Invalid = -1, - OSHLD = 0x1, - OSHST = 0x2, - OSH = 0x3, - NSHLD = 0x5, - NSHST = 0x6, - NSH = 0x7, - ISHLD = 0x9, - ISHST = 0xa, - ISH = 0xb, - LD = 0xd, - ST = 0xe, - SY = 0xf - }; - - struct DBarrierMapper : NamedImmMapper { - const static Mapping DBarrierPairs[]; - - DBarrierMapper(); - }; -} - -namespace A64DC { - enum DCValues { - Invalid = -1, // Op1 CRn CRm Op2 - ZVA = 0x5ba1, // 01 011 0111 0100 001 - IVAC = 0x43b1, // 01 000 0111 0110 001 - ISW = 0x43b2, // 01 000 0111 0110 010 - CVAC = 0x5bd1, // 01 011 0111 1010 001 - CSW = 0x43d2, // 01 000 0111 1010 010 - CVAU = 0x5bd9, // 01 011 0111 1011 001 - CIVAC = 0x5bf1, // 01 011 0111 1110 001 - CISW = 0x43f2 // 01 000 0111 1110 010 - }; - - struct DCMapper : NamedImmMapper { - const static Mapping DCPairs[]; - - DCMapper(); - }; - -} - -namespace A64IC { - enum ICValues { - Invalid = -1, // Op1 CRn CRm Op2 - IALLUIS = 0x0388, // 000 0111 0001 000 - IALLU = 0x03a8, // 000 0111 0101 000 - IVAU = 0x1ba9 // 011 0111 0101 001 - }; - - - struct ICMapper : NamedImmMapper { - const static Mapping ICPairs[]; - - ICMapper(); - }; - - static inline bool NeedsRegister(ICValues Val) { - return Val == IVAU; - } -} - -namespace A64ISB { - enum ISBValues { - Invalid = -1, - SY = 0xf - }; - struct ISBMapper : NamedImmMapper { - const static Mapping ISBPairs[]; - - ISBMapper(); - }; -} - -namespace A64PRFM { - enum PRFMValues { - Invalid = -1, - PLDL1KEEP = 0x00, - PLDL1STRM = 0x01, - PLDL2KEEP = 0x02, - PLDL2STRM = 0x03, - PLDL3KEEP = 0x04, - PLDL3STRM = 0x05, - PLIL1KEEP = 0x08, - PLIL1STRM = 0x09, - PLIL2KEEP = 0x0a, - PLIL2STRM = 0x0b, - PLIL3KEEP = 0x0c, - PLIL3STRM = 0x0d, - PSTL1KEEP = 0x10, - PSTL1STRM = 0x11, - PSTL2KEEP = 0x12, - PSTL2STRM = 0x13, - PSTL3KEEP = 0x14, - PSTL3STRM = 0x15 - }; - - struct PRFMMapper : NamedImmMapper { - const static Mapping PRFMPairs[]; - - PRFMMapper(); - }; -} - -namespace A64PState { - enum PStateValues { - Invalid = -1, - SPSel = 0x05, - DAIFSet = 0x1e, - DAIFClr = 0x1f - }; - - struct PStateMapper : NamedImmMapper { - const static Mapping PStatePairs[]; - - PStateMapper(); - }; - -} - -namespace A64SE { - enum ShiftExtSpecifiers { - Invalid = -1, - LSL, - MSL, - LSR, - ASR, - ROR, - - UXTB, - UXTH, - UXTW, - UXTX, - - SXTB, - SXTH, - SXTW, - SXTX - }; -} - -namespace A64Layout { - enum VectorLayout { - Invalid = -1, - VL_8B, - VL_4H, - VL_2S, - VL_1D, - - VL_16B, - VL_8H, - VL_4S, - VL_2D, - - // Bare layout for the 128-bit vector - // (only show ".b", ".h", ".s", ".d" without vector number) - VL_B, - VL_H, - VL_S, - VL_D - }; -} - -inline static const char * -A64VectorLayoutToString(A64Layout::VectorLayout Layout) { - switch (Layout) { - case A64Layout::VL_8B: return ".8b"; - case A64Layout::VL_4H: return ".4h"; - case A64Layout::VL_2S: return ".2s"; - case A64Layout::VL_1D: return ".1d"; - case A64Layout::VL_16B: return ".16b"; - case A64Layout::VL_8H: return ".8h"; - case A64Layout::VL_4S: return ".4s"; - case A64Layout::VL_2D: return ".2d"; - case A64Layout::VL_B: return ".b"; - case A64Layout::VL_H: return ".h"; - case A64Layout::VL_S: return ".s"; - case A64Layout::VL_D: return ".d"; - default: llvm_unreachable("Unknown Vector Layout"); - } -} - -inline static A64Layout::VectorLayout -A64StringToVectorLayout(StringRef LayoutStr) { - return StringSwitch(LayoutStr) - .Case(".8b", A64Layout::VL_8B) - .Case(".4h", A64Layout::VL_4H) - .Case(".2s", A64Layout::VL_2S) - .Case(".1d", A64Layout::VL_1D) - .Case(".16b", A64Layout::VL_16B) - .Case(".8h", A64Layout::VL_8H) - .Case(".4s", A64Layout::VL_4S) - .Case(".2d", A64Layout::VL_2D) - .Case(".b", A64Layout::VL_B) - .Case(".h", A64Layout::VL_H) - .Case(".s", A64Layout::VL_S) - .Case(".d", A64Layout::VL_D) - .Default(A64Layout::Invalid); -} - -namespace A64SysReg { - enum SysRegROValues { - MDCCSR_EL0 = 0x9808, // 10 011 0000 0001 000 - DBGDTRRX_EL0 = 0x9828, // 10 011 0000 0101 000 - MDRAR_EL1 = 0x8080, // 10 000 0001 0000 000 - OSLSR_EL1 = 0x808c, // 10 000 0001 0001 100 - DBGAUTHSTATUS_EL1 = 0x83f6, // 10 000 0111 1110 110 - PMCEID0_EL0 = 0xdce6, // 11 011 1001 1100 110 - PMCEID1_EL0 = 0xdce7, // 11 011 1001 1100 111 - MIDR_EL1 = 0xc000, // 11 000 0000 0000 000 - CCSIDR_EL1 = 0xc800, // 11 001 0000 0000 000 - CLIDR_EL1 = 0xc801, // 11 001 0000 0000 001 - CTR_EL0 = 0xd801, // 11 011 0000 0000 001 - MPIDR_EL1 = 0xc005, // 11 000 0000 0000 101 - REVIDR_EL1 = 0xc006, // 11 000 0000 0000 110 - AIDR_EL1 = 0xc807, // 11 001 0000 0000 111 - DCZID_EL0 = 0xd807, // 11 011 0000 0000 111 - ID_PFR0_EL1 = 0xc008, // 11 000 0000 0001 000 - ID_PFR1_EL1 = 0xc009, // 11 000 0000 0001 001 - ID_DFR0_EL1 = 0xc00a, // 11 000 0000 0001 010 - ID_AFR0_EL1 = 0xc00b, // 11 000 0000 0001 011 - ID_MMFR0_EL1 = 0xc00c, // 11 000 0000 0001 100 - ID_MMFR1_EL1 = 0xc00d, // 11 000 0000 0001 101 - ID_MMFR2_EL1 = 0xc00e, // 11 000 0000 0001 110 - ID_MMFR3_EL1 = 0xc00f, // 11 000 0000 0001 111 - ID_ISAR0_EL1 = 0xc010, // 11 000 0000 0010 000 - ID_ISAR1_EL1 = 0xc011, // 11 000 0000 0010 001 - ID_ISAR2_EL1 = 0xc012, // 11 000 0000 0010 010 - ID_ISAR3_EL1 = 0xc013, // 11 000 0000 0010 011 - ID_ISAR4_EL1 = 0xc014, // 11 000 0000 0010 100 - ID_ISAR5_EL1 = 0xc015, // 11 000 0000 0010 101 - ID_AA64PFR0_EL1 = 0xc020, // 11 000 0000 0100 000 - ID_AA64PFR1_EL1 = 0xc021, // 11 000 0000 0100 001 - ID_AA64DFR0_EL1 = 0xc028, // 11 000 0000 0101 000 - ID_AA64DFR1_EL1 = 0xc029, // 11 000 0000 0101 001 - ID_AA64AFR0_EL1 = 0xc02c, // 11 000 0000 0101 100 - ID_AA64AFR1_EL1 = 0xc02d, // 11 000 0000 0101 101 - ID_AA64ISAR0_EL1 = 0xc030, // 11 000 0000 0110 000 - ID_AA64ISAR1_EL1 = 0xc031, // 11 000 0000 0110 001 - ID_AA64MMFR0_EL1 = 0xc038, // 11 000 0000 0111 000 - ID_AA64MMFR1_EL1 = 0xc039, // 11 000 0000 0111 001 - MVFR0_EL1 = 0xc018, // 11 000 0000 0011 000 - MVFR1_EL1 = 0xc019, // 11 000 0000 0011 001 - MVFR2_EL1 = 0xc01a, // 11 000 0000 0011 010 - RVBAR_EL1 = 0xc601, // 11 000 1100 0000 001 - RVBAR_EL2 = 0xe601, // 11 100 1100 0000 001 - RVBAR_EL3 = 0xf601, // 11 110 1100 0000 001 - ISR_EL1 = 0xc608, // 11 000 1100 0001 000 - CNTPCT_EL0 = 0xdf01, // 11 011 1110 0000 001 - CNTVCT_EL0 = 0xdf02, // 11 011 1110 0000 010 - - // Trace registers - TRCSTATR = 0x8818, // 10 001 0000 0011 000 - TRCIDR8 = 0x8806, // 10 001 0000 0000 110 - TRCIDR9 = 0x880e, // 10 001 0000 0001 110 - TRCIDR10 = 0x8816, // 10 001 0000 0010 110 - TRCIDR11 = 0x881e, // 10 001 0000 0011 110 - TRCIDR12 = 0x8826, // 10 001 0000 0100 110 - TRCIDR13 = 0x882e, // 10 001 0000 0101 110 - TRCIDR0 = 0x8847, // 10 001 0000 1000 111 - TRCIDR1 = 0x884f, // 10 001 0000 1001 111 - TRCIDR2 = 0x8857, // 10 001 0000 1010 111 - TRCIDR3 = 0x885f, // 10 001 0000 1011 111 - TRCIDR4 = 0x8867, // 10 001 0000 1100 111 - TRCIDR5 = 0x886f, // 10 001 0000 1101 111 - TRCIDR6 = 0x8877, // 10 001 0000 1110 111 - TRCIDR7 = 0x887f, // 10 001 0000 1111 111 - TRCOSLSR = 0x888c, // 10 001 0001 0001 100 - TRCPDSR = 0x88ac, // 10 001 0001 0101 100 - TRCDEVAFF0 = 0x8bd6, // 10 001 0111 1010 110 - TRCDEVAFF1 = 0x8bde, // 10 001 0111 1011 110 - TRCLSR = 0x8bee, // 10 001 0111 1101 110 - TRCAUTHSTATUS = 0x8bf6, // 10 001 0111 1110 110 - TRCDEVARCH = 0x8bfe, // 10 001 0111 1111 110 - TRCDEVID = 0x8b97, // 10 001 0111 0010 111 - TRCDEVTYPE = 0x8b9f, // 10 001 0111 0011 111 - TRCPIDR4 = 0x8ba7, // 10 001 0111 0100 111 - TRCPIDR5 = 0x8baf, // 10 001 0111 0101 111 - TRCPIDR6 = 0x8bb7, // 10 001 0111 0110 111 - TRCPIDR7 = 0x8bbf, // 10 001 0111 0111 111 - TRCPIDR0 = 0x8bc7, // 10 001 0111 1000 111 - TRCPIDR1 = 0x8bcf, // 10 001 0111 1001 111 - TRCPIDR2 = 0x8bd7, // 10 001 0111 1010 111 - TRCPIDR3 = 0x8bdf, // 10 001 0111 1011 111 - TRCCIDR0 = 0x8be7, // 10 001 0111 1100 111 - TRCCIDR1 = 0x8bef, // 10 001 0111 1101 111 - TRCCIDR2 = 0x8bf7, // 10 001 0111 1110 111 - TRCCIDR3 = 0x8bff, // 10 001 0111 1111 111 - - // GICv3 registers - ICC_IAR1_EL1 = 0xc660, // 11 000 1100 1100 000 - ICC_IAR0_EL1 = 0xc640, // 11 000 1100 1000 000 - ICC_HPPIR1_EL1 = 0xc662, // 11 000 1100 1100 010 - ICC_HPPIR0_EL1 = 0xc642, // 11 000 1100 1000 010 - ICC_RPR_EL1 = 0xc65b, // 11 000 1100 1011 011 - ICH_VTR_EL2 = 0xe659, // 11 100 1100 1011 001 - ICH_EISR_EL2 = 0xe65b, // 11 100 1100 1011 011 - ICH_ELSR_EL2 = 0xe65d // 11 100 1100 1011 101 - }; - - enum SysRegWOValues { - DBGDTRTX_EL0 = 0x9828, // 10 011 0000 0101 000 - OSLAR_EL1 = 0x8084, // 10 000 0001 0000 100 - PMSWINC_EL0 = 0xdce4, // 11 011 1001 1100 100 - - // Trace Registers - TRCOSLAR = 0x8884, // 10 001 0001 0000 100 - TRCLAR = 0x8be6, // 10 001 0111 1100 110 - - // GICv3 registers - ICC_EOIR1_EL1 = 0xc661, // 11 000 1100 1100 001 - ICC_EOIR0_EL1 = 0xc641, // 11 000 1100 1000 001 - ICC_DIR_EL1 = 0xc659, // 11 000 1100 1011 001 - ICC_SGI1R_EL1 = 0xc65d, // 11 000 1100 1011 101 - ICC_ASGI1R_EL1 = 0xc65e, // 11 000 1100 1011 110 - ICC_SGI0R_EL1 = 0xc65f // 11 000 1100 1011 111 - }; - - enum SysRegValues { - Invalid = -1, // Op0 Op1 CRn CRm Op2 - OSDTRRX_EL1 = 0x8002, // 10 000 0000 0000 010 - OSDTRTX_EL1 = 0x801a, // 10 000 0000 0011 010 - TEECR32_EL1 = 0x9000, // 10 010 0000 0000 000 - MDCCINT_EL1 = 0x8010, // 10 000 0000 0010 000 - MDSCR_EL1 = 0x8012, // 10 000 0000 0010 010 - DBGDTR_EL0 = 0x9820, // 10 011 0000 0100 000 - OSECCR_EL1 = 0x8032, // 10 000 0000 0110 010 - DBGVCR32_EL2 = 0xa038, // 10 100 0000 0111 000 - DBGBVR0_EL1 = 0x8004, // 10 000 0000 0000 100 - DBGBVR1_EL1 = 0x800c, // 10 000 0000 0001 100 - DBGBVR2_EL1 = 0x8014, // 10 000 0000 0010 100 - DBGBVR3_EL1 = 0x801c, // 10 000 0000 0011 100 - DBGBVR4_EL1 = 0x8024, // 10 000 0000 0100 100 - DBGBVR5_EL1 = 0x802c, // 10 000 0000 0101 100 - DBGBVR6_EL1 = 0x8034, // 10 000 0000 0110 100 - DBGBVR7_EL1 = 0x803c, // 10 000 0000 0111 100 - DBGBVR8_EL1 = 0x8044, // 10 000 0000 1000 100 - DBGBVR9_EL1 = 0x804c, // 10 000 0000 1001 100 - DBGBVR10_EL1 = 0x8054, // 10 000 0000 1010 100 - DBGBVR11_EL1 = 0x805c, // 10 000 0000 1011 100 - DBGBVR12_EL1 = 0x8064, // 10 000 0000 1100 100 - DBGBVR13_EL1 = 0x806c, // 10 000 0000 1101 100 - DBGBVR14_EL1 = 0x8074, // 10 000 0000 1110 100 - DBGBVR15_EL1 = 0x807c, // 10 000 0000 1111 100 - DBGBCR0_EL1 = 0x8005, // 10 000 0000 0000 101 - DBGBCR1_EL1 = 0x800d, // 10 000 0000 0001 101 - DBGBCR2_EL1 = 0x8015, // 10 000 0000 0010 101 - DBGBCR3_EL1 = 0x801d, // 10 000 0000 0011 101 - DBGBCR4_EL1 = 0x8025, // 10 000 0000 0100 101 - DBGBCR5_EL1 = 0x802d, // 10 000 0000 0101 101 - DBGBCR6_EL1 = 0x8035, // 10 000 0000 0110 101 - DBGBCR7_EL1 = 0x803d, // 10 000 0000 0111 101 - DBGBCR8_EL1 = 0x8045, // 10 000 0000 1000 101 - DBGBCR9_EL1 = 0x804d, // 10 000 0000 1001 101 - DBGBCR10_EL1 = 0x8055, // 10 000 0000 1010 101 - DBGBCR11_EL1 = 0x805d, // 10 000 0000 1011 101 - DBGBCR12_EL1 = 0x8065, // 10 000 0000 1100 101 - DBGBCR13_EL1 = 0x806d, // 10 000 0000 1101 101 - DBGBCR14_EL1 = 0x8075, // 10 000 0000 1110 101 - DBGBCR15_EL1 = 0x807d, // 10 000 0000 1111 101 - DBGWVR0_EL1 = 0x8006, // 10 000 0000 0000 110 - DBGWVR1_EL1 = 0x800e, // 10 000 0000 0001 110 - DBGWVR2_EL1 = 0x8016, // 10 000 0000 0010 110 - DBGWVR3_EL1 = 0x801e, // 10 000 0000 0011 110 - DBGWVR4_EL1 = 0x8026, // 10 000 0000 0100 110 - DBGWVR5_EL1 = 0x802e, // 10 000 0000 0101 110 - DBGWVR6_EL1 = 0x8036, // 10 000 0000 0110 110 - DBGWVR7_EL1 = 0x803e, // 10 000 0000 0111 110 - DBGWVR8_EL1 = 0x8046, // 10 000 0000 1000 110 - DBGWVR9_EL1 = 0x804e, // 10 000 0000 1001 110 - DBGWVR10_EL1 = 0x8056, // 10 000 0000 1010 110 - DBGWVR11_EL1 = 0x805e, // 10 000 0000 1011 110 - DBGWVR12_EL1 = 0x8066, // 10 000 0000 1100 110 - DBGWVR13_EL1 = 0x806e, // 10 000 0000 1101 110 - DBGWVR14_EL1 = 0x8076, // 10 000 0000 1110 110 - DBGWVR15_EL1 = 0x807e, // 10 000 0000 1111 110 - DBGWCR0_EL1 = 0x8007, // 10 000 0000 0000 111 - DBGWCR1_EL1 = 0x800f, // 10 000 0000 0001 111 - DBGWCR2_EL1 = 0x8017, // 10 000 0000 0010 111 - DBGWCR3_EL1 = 0x801f, // 10 000 0000 0011 111 - DBGWCR4_EL1 = 0x8027, // 10 000 0000 0100 111 - DBGWCR5_EL1 = 0x802f, // 10 000 0000 0101 111 - DBGWCR6_EL1 = 0x8037, // 10 000 0000 0110 111 - DBGWCR7_EL1 = 0x803f, // 10 000 0000 0111 111 - DBGWCR8_EL1 = 0x8047, // 10 000 0000 1000 111 - DBGWCR9_EL1 = 0x804f, // 10 000 0000 1001 111 - DBGWCR10_EL1 = 0x8057, // 10 000 0000 1010 111 - DBGWCR11_EL1 = 0x805f, // 10 000 0000 1011 111 - DBGWCR12_EL1 = 0x8067, // 10 000 0000 1100 111 - DBGWCR13_EL1 = 0x806f, // 10 000 0000 1101 111 - DBGWCR14_EL1 = 0x8077, // 10 000 0000 1110 111 - DBGWCR15_EL1 = 0x807f, // 10 000 0000 1111 111 - TEEHBR32_EL1 = 0x9080, // 10 010 0001 0000 000 - OSDLR_EL1 = 0x809c, // 10 000 0001 0011 100 - DBGPRCR_EL1 = 0x80a4, // 10 000 0001 0100 100 - DBGCLAIMSET_EL1 = 0x83c6, // 10 000 0111 1000 110 - DBGCLAIMCLR_EL1 = 0x83ce, // 10 000 0111 1001 110 - CSSELR_EL1 = 0xd000, // 11 010 0000 0000 000 - VPIDR_EL2 = 0xe000, // 11 100 0000 0000 000 - VMPIDR_EL2 = 0xe005, // 11 100 0000 0000 101 - CPACR_EL1 = 0xc082, // 11 000 0001 0000 010 - SCTLR_EL1 = 0xc080, // 11 000 0001 0000 000 - SCTLR_EL2 = 0xe080, // 11 100 0001 0000 000 - SCTLR_EL3 = 0xf080, // 11 110 0001 0000 000 - ACTLR_EL1 = 0xc081, // 11 000 0001 0000 001 - ACTLR_EL2 = 0xe081, // 11 100 0001 0000 001 - ACTLR_EL3 = 0xf081, // 11 110 0001 0000 001 - HCR_EL2 = 0xe088, // 11 100 0001 0001 000 - SCR_EL3 = 0xf088, // 11 110 0001 0001 000 - MDCR_EL2 = 0xe089, // 11 100 0001 0001 001 - SDER32_EL3 = 0xf089, // 11 110 0001 0001 001 - CPTR_EL2 = 0xe08a, // 11 100 0001 0001 010 - CPTR_EL3 = 0xf08a, // 11 110 0001 0001 010 - HSTR_EL2 = 0xe08b, // 11 100 0001 0001 011 - HACR_EL2 = 0xe08f, // 11 100 0001 0001 111 - MDCR_EL3 = 0xf099, // 11 110 0001 0011 001 - TTBR0_EL1 = 0xc100, // 11 000 0010 0000 000 - TTBR0_EL2 = 0xe100, // 11 100 0010 0000 000 - TTBR0_EL3 = 0xf100, // 11 110 0010 0000 000 - TTBR1_EL1 = 0xc101, // 11 000 0010 0000 001 - TCR_EL1 = 0xc102, // 11 000 0010 0000 010 - TCR_EL2 = 0xe102, // 11 100 0010 0000 010 - TCR_EL3 = 0xf102, // 11 110 0010 0000 010 - VTTBR_EL2 = 0xe108, // 11 100 0010 0001 000 - VTCR_EL2 = 0xe10a, // 11 100 0010 0001 010 - DACR32_EL2 = 0xe180, // 11 100 0011 0000 000 - SPSR_EL1 = 0xc200, // 11 000 0100 0000 000 - SPSR_EL2 = 0xe200, // 11 100 0100 0000 000 - SPSR_EL3 = 0xf200, // 11 110 0100 0000 000 - ELR_EL1 = 0xc201, // 11 000 0100 0000 001 - ELR_EL2 = 0xe201, // 11 100 0100 0000 001 - ELR_EL3 = 0xf201, // 11 110 0100 0000 001 - SP_EL0 = 0xc208, // 11 000 0100 0001 000 - SP_EL1 = 0xe208, // 11 100 0100 0001 000 - SP_EL2 = 0xf208, // 11 110 0100 0001 000 - SPSel = 0xc210, // 11 000 0100 0010 000 - NZCV = 0xda10, // 11 011 0100 0010 000 - DAIF = 0xda11, // 11 011 0100 0010 001 - CurrentEL = 0xc212, // 11 000 0100 0010 010 - SPSR_irq = 0xe218, // 11 100 0100 0011 000 - SPSR_abt = 0xe219, // 11 100 0100 0011 001 - SPSR_und = 0xe21a, // 11 100 0100 0011 010 - SPSR_fiq = 0xe21b, // 11 100 0100 0011 011 - FPCR = 0xda20, // 11 011 0100 0100 000 - FPSR = 0xda21, // 11 011 0100 0100 001 - DSPSR_EL0 = 0xda28, // 11 011 0100 0101 000 - DLR_EL0 = 0xda29, // 11 011 0100 0101 001 - IFSR32_EL2 = 0xe281, // 11 100 0101 0000 001 - AFSR0_EL1 = 0xc288, // 11 000 0101 0001 000 - AFSR0_EL2 = 0xe288, // 11 100 0101 0001 000 - AFSR0_EL3 = 0xf288, // 11 110 0101 0001 000 - AFSR1_EL1 = 0xc289, // 11 000 0101 0001 001 - AFSR1_EL2 = 0xe289, // 11 100 0101 0001 001 - AFSR1_EL3 = 0xf289, // 11 110 0101 0001 001 - ESR_EL1 = 0xc290, // 11 000 0101 0010 000 - ESR_EL2 = 0xe290, // 11 100 0101 0010 000 - ESR_EL3 = 0xf290, // 11 110 0101 0010 000 - FPEXC32_EL2 = 0xe298, // 11 100 0101 0011 000 - FAR_EL1 = 0xc300, // 11 000 0110 0000 000 - FAR_EL2 = 0xe300, // 11 100 0110 0000 000 - FAR_EL3 = 0xf300, // 11 110 0110 0000 000 - HPFAR_EL2 = 0xe304, // 11 100 0110 0000 100 - PAR_EL1 = 0xc3a0, // 11 000 0111 0100 000 - PMCR_EL0 = 0xdce0, // 11 011 1001 1100 000 - PMCNTENSET_EL0 = 0xdce1, // 11 011 1001 1100 001 - PMCNTENCLR_EL0 = 0xdce2, // 11 011 1001 1100 010 - PMOVSCLR_EL0 = 0xdce3, // 11 011 1001 1100 011 - PMSELR_EL0 = 0xdce5, // 11 011 1001 1100 101 - PMCCNTR_EL0 = 0xdce8, // 11 011 1001 1101 000 - PMXEVTYPER_EL0 = 0xdce9, // 11 011 1001 1101 001 - PMXEVCNTR_EL0 = 0xdcea, // 11 011 1001 1101 010 - PMUSERENR_EL0 = 0xdcf0, // 11 011 1001 1110 000 - PMINTENSET_EL1 = 0xc4f1, // 11 000 1001 1110 001 - PMINTENCLR_EL1 = 0xc4f2, // 11 000 1001 1110 010 - PMOVSSET_EL0 = 0xdcf3, // 11 011 1001 1110 011 - MAIR_EL1 = 0xc510, // 11 000 1010 0010 000 - MAIR_EL2 = 0xe510, // 11 100 1010 0010 000 - MAIR_EL3 = 0xf510, // 11 110 1010 0010 000 - AMAIR_EL1 = 0xc518, // 11 000 1010 0011 000 - AMAIR_EL2 = 0xe518, // 11 100 1010 0011 000 - AMAIR_EL3 = 0xf518, // 11 110 1010 0011 000 - VBAR_EL1 = 0xc600, // 11 000 1100 0000 000 - VBAR_EL2 = 0xe600, // 11 100 1100 0000 000 - VBAR_EL3 = 0xf600, // 11 110 1100 0000 000 - RMR_EL1 = 0xc602, // 11 000 1100 0000 010 - RMR_EL2 = 0xe602, // 11 100 1100 0000 010 - RMR_EL3 = 0xf602, // 11 110 1100 0000 010 - CONTEXTIDR_EL1 = 0xc681, // 11 000 1101 0000 001 - TPIDR_EL0 = 0xde82, // 11 011 1101 0000 010 - TPIDR_EL2 = 0xe682, // 11 100 1101 0000 010 - TPIDR_EL3 = 0xf682, // 11 110 1101 0000 010 - TPIDRRO_EL0 = 0xde83, // 11 011 1101 0000 011 - TPIDR_EL1 = 0xc684, // 11 000 1101 0000 100 - CNTFRQ_EL0 = 0xdf00, // 11 011 1110 0000 000 - CNTVOFF_EL2 = 0xe703, // 11 100 1110 0000 011 - CNTKCTL_EL1 = 0xc708, // 11 000 1110 0001 000 - CNTHCTL_EL2 = 0xe708, // 11 100 1110 0001 000 - CNTP_TVAL_EL0 = 0xdf10, // 11 011 1110 0010 000 - CNTHP_TVAL_EL2 = 0xe710, // 11 100 1110 0010 000 - CNTPS_TVAL_EL1 = 0xff10, // 11 111 1110 0010 000 - CNTP_CTL_EL0 = 0xdf11, // 11 011 1110 0010 001 - CNTHP_CTL_EL2 = 0xe711, // 11 100 1110 0010 001 - CNTPS_CTL_EL1 = 0xff11, // 11 111 1110 0010 001 - CNTP_CVAL_EL0 = 0xdf12, // 11 011 1110 0010 010 - CNTHP_CVAL_EL2 = 0xe712, // 11 100 1110 0010 010 - CNTPS_CVAL_EL1 = 0xff12, // 11 111 1110 0010 010 - CNTV_TVAL_EL0 = 0xdf18, // 11 011 1110 0011 000 - CNTV_CTL_EL0 = 0xdf19, // 11 011 1110 0011 001 - CNTV_CVAL_EL0 = 0xdf1a, // 11 011 1110 0011 010 - PMEVCNTR0_EL0 = 0xdf40, // 11 011 1110 1000 000 - PMEVCNTR1_EL0 = 0xdf41, // 11 011 1110 1000 001 - PMEVCNTR2_EL0 = 0xdf42, // 11 011 1110 1000 010 - PMEVCNTR3_EL0 = 0xdf43, // 11 011 1110 1000 011 - PMEVCNTR4_EL0 = 0xdf44, // 11 011 1110 1000 100 - PMEVCNTR5_EL0 = 0xdf45, // 11 011 1110 1000 101 - PMEVCNTR6_EL0 = 0xdf46, // 11 011 1110 1000 110 - PMEVCNTR7_EL0 = 0xdf47, // 11 011 1110 1000 111 - PMEVCNTR8_EL0 = 0xdf48, // 11 011 1110 1001 000 - PMEVCNTR9_EL0 = 0xdf49, // 11 011 1110 1001 001 - PMEVCNTR10_EL0 = 0xdf4a, // 11 011 1110 1001 010 - PMEVCNTR11_EL0 = 0xdf4b, // 11 011 1110 1001 011 - PMEVCNTR12_EL0 = 0xdf4c, // 11 011 1110 1001 100 - PMEVCNTR13_EL0 = 0xdf4d, // 11 011 1110 1001 101 - PMEVCNTR14_EL0 = 0xdf4e, // 11 011 1110 1001 110 - PMEVCNTR15_EL0 = 0xdf4f, // 11 011 1110 1001 111 - PMEVCNTR16_EL0 = 0xdf50, // 11 011 1110 1010 000 - PMEVCNTR17_EL0 = 0xdf51, // 11 011 1110 1010 001 - PMEVCNTR18_EL0 = 0xdf52, // 11 011 1110 1010 010 - PMEVCNTR19_EL0 = 0xdf53, // 11 011 1110 1010 011 - PMEVCNTR20_EL0 = 0xdf54, // 11 011 1110 1010 100 - PMEVCNTR21_EL0 = 0xdf55, // 11 011 1110 1010 101 - PMEVCNTR22_EL0 = 0xdf56, // 11 011 1110 1010 110 - PMEVCNTR23_EL0 = 0xdf57, // 11 011 1110 1010 111 - PMEVCNTR24_EL0 = 0xdf58, // 11 011 1110 1011 000 - PMEVCNTR25_EL0 = 0xdf59, // 11 011 1110 1011 001 - PMEVCNTR26_EL0 = 0xdf5a, // 11 011 1110 1011 010 - PMEVCNTR27_EL0 = 0xdf5b, // 11 011 1110 1011 011 - PMEVCNTR28_EL0 = 0xdf5c, // 11 011 1110 1011 100 - PMEVCNTR29_EL0 = 0xdf5d, // 11 011 1110 1011 101 - PMEVCNTR30_EL0 = 0xdf5e, // 11 011 1110 1011 110 - PMCCFILTR_EL0 = 0xdf7f, // 11 011 1110 1111 111 - PMEVTYPER0_EL0 = 0xdf60, // 11 011 1110 1100 000 - PMEVTYPER1_EL0 = 0xdf61, // 11 011 1110 1100 001 - PMEVTYPER2_EL0 = 0xdf62, // 11 011 1110 1100 010 - PMEVTYPER3_EL0 = 0xdf63, // 11 011 1110 1100 011 - PMEVTYPER4_EL0 = 0xdf64, // 11 011 1110 1100 100 - PMEVTYPER5_EL0 = 0xdf65, // 11 011 1110 1100 101 - PMEVTYPER6_EL0 = 0xdf66, // 11 011 1110 1100 110 - PMEVTYPER7_EL0 = 0xdf67, // 11 011 1110 1100 111 - PMEVTYPER8_EL0 = 0xdf68, // 11 011 1110 1101 000 - PMEVTYPER9_EL0 = 0xdf69, // 11 011 1110 1101 001 - PMEVTYPER10_EL0 = 0xdf6a, // 11 011 1110 1101 010 - PMEVTYPER11_EL0 = 0xdf6b, // 11 011 1110 1101 011 - PMEVTYPER12_EL0 = 0xdf6c, // 11 011 1110 1101 100 - PMEVTYPER13_EL0 = 0xdf6d, // 11 011 1110 1101 101 - PMEVTYPER14_EL0 = 0xdf6e, // 11 011 1110 1101 110 - PMEVTYPER15_EL0 = 0xdf6f, // 11 011 1110 1101 111 - PMEVTYPER16_EL0 = 0xdf70, // 11 011 1110 1110 000 - PMEVTYPER17_EL0 = 0xdf71, // 11 011 1110 1110 001 - PMEVTYPER18_EL0 = 0xdf72, // 11 011 1110 1110 010 - PMEVTYPER19_EL0 = 0xdf73, // 11 011 1110 1110 011 - PMEVTYPER20_EL0 = 0xdf74, // 11 011 1110 1110 100 - PMEVTYPER21_EL0 = 0xdf75, // 11 011 1110 1110 101 - PMEVTYPER22_EL0 = 0xdf76, // 11 011 1110 1110 110 - PMEVTYPER23_EL0 = 0xdf77, // 11 011 1110 1110 111 - PMEVTYPER24_EL0 = 0xdf78, // 11 011 1110 1111 000 - PMEVTYPER25_EL0 = 0xdf79, // 11 011 1110 1111 001 - PMEVTYPER26_EL0 = 0xdf7a, // 11 011 1110 1111 010 - PMEVTYPER27_EL0 = 0xdf7b, // 11 011 1110 1111 011 - PMEVTYPER28_EL0 = 0xdf7c, // 11 011 1110 1111 100 - PMEVTYPER29_EL0 = 0xdf7d, // 11 011 1110 1111 101 - PMEVTYPER30_EL0 = 0xdf7e, // 11 011 1110 1111 110 - - // Trace registers - TRCPRGCTLR = 0x8808, // 10 001 0000 0001 000 - TRCPROCSELR = 0x8810, // 10 001 0000 0010 000 - TRCCONFIGR = 0x8820, // 10 001 0000 0100 000 - TRCAUXCTLR = 0x8830, // 10 001 0000 0110 000 - TRCEVENTCTL0R = 0x8840, // 10 001 0000 1000 000 - TRCEVENTCTL1R = 0x8848, // 10 001 0000 1001 000 - TRCSTALLCTLR = 0x8858, // 10 001 0000 1011 000 - TRCTSCTLR = 0x8860, // 10 001 0000 1100 000 - TRCSYNCPR = 0x8868, // 10 001 0000 1101 000 - TRCCCCTLR = 0x8870, // 10 001 0000 1110 000 - TRCBBCTLR = 0x8878, // 10 001 0000 1111 000 - TRCTRACEIDR = 0x8801, // 10 001 0000 0000 001 - TRCQCTLR = 0x8809, // 10 001 0000 0001 001 - TRCVICTLR = 0x8802, // 10 001 0000 0000 010 - TRCVIIECTLR = 0x880a, // 10 001 0000 0001 010 - TRCVISSCTLR = 0x8812, // 10 001 0000 0010 010 - TRCVIPCSSCTLR = 0x881a, // 10 001 0000 0011 010 - TRCVDCTLR = 0x8842, // 10 001 0000 1000 010 - TRCVDSACCTLR = 0x884a, // 10 001 0000 1001 010 - TRCVDARCCTLR = 0x8852, // 10 001 0000 1010 010 - TRCSEQEVR0 = 0x8804, // 10 001 0000 0000 100 - TRCSEQEVR1 = 0x880c, // 10 001 0000 0001 100 - TRCSEQEVR2 = 0x8814, // 10 001 0000 0010 100 - TRCSEQRSTEVR = 0x8834, // 10 001 0000 0110 100 - TRCSEQSTR = 0x883c, // 10 001 0000 0111 100 - TRCEXTINSELR = 0x8844, // 10 001 0000 1000 100 - TRCCNTRLDVR0 = 0x8805, // 10 001 0000 0000 101 - TRCCNTRLDVR1 = 0x880d, // 10 001 0000 0001 101 - TRCCNTRLDVR2 = 0x8815, // 10 001 0000 0010 101 - TRCCNTRLDVR3 = 0x881d, // 10 001 0000 0011 101 - TRCCNTCTLR0 = 0x8825, // 10 001 0000 0100 101 - TRCCNTCTLR1 = 0x882d, // 10 001 0000 0101 101 - TRCCNTCTLR2 = 0x8835, // 10 001 0000 0110 101 - TRCCNTCTLR3 = 0x883d, // 10 001 0000 0111 101 - TRCCNTVR0 = 0x8845, // 10 001 0000 1000 101 - TRCCNTVR1 = 0x884d, // 10 001 0000 1001 101 - TRCCNTVR2 = 0x8855, // 10 001 0000 1010 101 - TRCCNTVR3 = 0x885d, // 10 001 0000 1011 101 - TRCIMSPEC0 = 0x8807, // 10 001 0000 0000 111 - TRCIMSPEC1 = 0x880f, // 10 001 0000 0001 111 - TRCIMSPEC2 = 0x8817, // 10 001 0000 0010 111 - TRCIMSPEC3 = 0x881f, // 10 001 0000 0011 111 - TRCIMSPEC4 = 0x8827, // 10 001 0000 0100 111 - TRCIMSPEC5 = 0x882f, // 10 001 0000 0101 111 - TRCIMSPEC6 = 0x8837, // 10 001 0000 0110 111 - TRCIMSPEC7 = 0x883f, // 10 001 0000 0111 111 - TRCRSCTLR2 = 0x8890, // 10 001 0001 0010 000 - TRCRSCTLR3 = 0x8898, // 10 001 0001 0011 000 - TRCRSCTLR4 = 0x88a0, // 10 001 0001 0100 000 - TRCRSCTLR5 = 0x88a8, // 10 001 0001 0101 000 - TRCRSCTLR6 = 0x88b0, // 10 001 0001 0110 000 - TRCRSCTLR7 = 0x88b8, // 10 001 0001 0111 000 - TRCRSCTLR8 = 0x88c0, // 10 001 0001 1000 000 - TRCRSCTLR9 = 0x88c8, // 10 001 0001 1001 000 - TRCRSCTLR10 = 0x88d0, // 10 001 0001 1010 000 - TRCRSCTLR11 = 0x88d8, // 10 001 0001 1011 000 - TRCRSCTLR12 = 0x88e0, // 10 001 0001 1100 000 - TRCRSCTLR13 = 0x88e8, // 10 001 0001 1101 000 - TRCRSCTLR14 = 0x88f0, // 10 001 0001 1110 000 - TRCRSCTLR15 = 0x88f8, // 10 001 0001 1111 000 - TRCRSCTLR16 = 0x8881, // 10 001 0001 0000 001 - TRCRSCTLR17 = 0x8889, // 10 001 0001 0001 001 - TRCRSCTLR18 = 0x8891, // 10 001 0001 0010 001 - TRCRSCTLR19 = 0x8899, // 10 001 0001 0011 001 - TRCRSCTLR20 = 0x88a1, // 10 001 0001 0100 001 - TRCRSCTLR21 = 0x88a9, // 10 001 0001 0101 001 - TRCRSCTLR22 = 0x88b1, // 10 001 0001 0110 001 - TRCRSCTLR23 = 0x88b9, // 10 001 0001 0111 001 - TRCRSCTLR24 = 0x88c1, // 10 001 0001 1000 001 - TRCRSCTLR25 = 0x88c9, // 10 001 0001 1001 001 - TRCRSCTLR26 = 0x88d1, // 10 001 0001 1010 001 - TRCRSCTLR27 = 0x88d9, // 10 001 0001 1011 001 - TRCRSCTLR28 = 0x88e1, // 10 001 0001 1100 001 - TRCRSCTLR29 = 0x88e9, // 10 001 0001 1101 001 - TRCRSCTLR30 = 0x88f1, // 10 001 0001 1110 001 - TRCRSCTLR31 = 0x88f9, // 10 001 0001 1111 001 - TRCSSCCR0 = 0x8882, // 10 001 0001 0000 010 - TRCSSCCR1 = 0x888a, // 10 001 0001 0001 010 - TRCSSCCR2 = 0x8892, // 10 001 0001 0010 010 - TRCSSCCR3 = 0x889a, // 10 001 0001 0011 010 - TRCSSCCR4 = 0x88a2, // 10 001 0001 0100 010 - TRCSSCCR5 = 0x88aa, // 10 001 0001 0101 010 - TRCSSCCR6 = 0x88b2, // 10 001 0001 0110 010 - TRCSSCCR7 = 0x88ba, // 10 001 0001 0111 010 - TRCSSCSR0 = 0x88c2, // 10 001 0001 1000 010 - TRCSSCSR1 = 0x88ca, // 10 001 0001 1001 010 - TRCSSCSR2 = 0x88d2, // 10 001 0001 1010 010 - TRCSSCSR3 = 0x88da, // 10 001 0001 1011 010 - TRCSSCSR4 = 0x88e2, // 10 001 0001 1100 010 - TRCSSCSR5 = 0x88ea, // 10 001 0001 1101 010 - TRCSSCSR6 = 0x88f2, // 10 001 0001 1110 010 - TRCSSCSR7 = 0x88fa, // 10 001 0001 1111 010 - TRCSSPCICR0 = 0x8883, // 10 001 0001 0000 011 - TRCSSPCICR1 = 0x888b, // 10 001 0001 0001 011 - TRCSSPCICR2 = 0x8893, // 10 001 0001 0010 011 - TRCSSPCICR3 = 0x889b, // 10 001 0001 0011 011 - TRCSSPCICR4 = 0x88a3, // 10 001 0001 0100 011 - TRCSSPCICR5 = 0x88ab, // 10 001 0001 0101 011 - TRCSSPCICR6 = 0x88b3, // 10 001 0001 0110 011 - TRCSSPCICR7 = 0x88bb, // 10 001 0001 0111 011 - TRCPDCR = 0x88a4, // 10 001 0001 0100 100 - TRCACVR0 = 0x8900, // 10 001 0010 0000 000 - TRCACVR1 = 0x8910, // 10 001 0010 0010 000 - TRCACVR2 = 0x8920, // 10 001 0010 0100 000 - TRCACVR3 = 0x8930, // 10 001 0010 0110 000 - TRCACVR4 = 0x8940, // 10 001 0010 1000 000 - TRCACVR5 = 0x8950, // 10 001 0010 1010 000 - TRCACVR6 = 0x8960, // 10 001 0010 1100 000 - TRCACVR7 = 0x8970, // 10 001 0010 1110 000 - TRCACVR8 = 0x8901, // 10 001 0010 0000 001 - TRCACVR9 = 0x8911, // 10 001 0010 0010 001 - TRCACVR10 = 0x8921, // 10 001 0010 0100 001 - TRCACVR11 = 0x8931, // 10 001 0010 0110 001 - TRCACVR12 = 0x8941, // 10 001 0010 1000 001 - TRCACVR13 = 0x8951, // 10 001 0010 1010 001 - TRCACVR14 = 0x8961, // 10 001 0010 1100 001 - TRCACVR15 = 0x8971, // 10 001 0010 1110 001 - TRCACATR0 = 0x8902, // 10 001 0010 0000 010 - TRCACATR1 = 0x8912, // 10 001 0010 0010 010 - TRCACATR2 = 0x8922, // 10 001 0010 0100 010 - TRCACATR3 = 0x8932, // 10 001 0010 0110 010 - TRCACATR4 = 0x8942, // 10 001 0010 1000 010 - TRCACATR5 = 0x8952, // 10 001 0010 1010 010 - TRCACATR6 = 0x8962, // 10 001 0010 1100 010 - TRCACATR7 = 0x8972, // 10 001 0010 1110 010 - TRCACATR8 = 0x8903, // 10 001 0010 0000 011 - TRCACATR9 = 0x8913, // 10 001 0010 0010 011 - TRCACATR10 = 0x8923, // 10 001 0010 0100 011 - TRCACATR11 = 0x8933, // 10 001 0010 0110 011 - TRCACATR12 = 0x8943, // 10 001 0010 1000 011 - TRCACATR13 = 0x8953, // 10 001 0010 1010 011 - TRCACATR14 = 0x8963, // 10 001 0010 1100 011 - TRCACATR15 = 0x8973, // 10 001 0010 1110 011 - TRCDVCVR0 = 0x8904, // 10 001 0010 0000 100 - TRCDVCVR1 = 0x8924, // 10 001 0010 0100 100 - TRCDVCVR2 = 0x8944, // 10 001 0010 1000 100 - TRCDVCVR3 = 0x8964, // 10 001 0010 1100 100 - TRCDVCVR4 = 0x8905, // 10 001 0010 0000 101 - TRCDVCVR5 = 0x8925, // 10 001 0010 0100 101 - TRCDVCVR6 = 0x8945, // 10 001 0010 1000 101 - TRCDVCVR7 = 0x8965, // 10 001 0010 1100 101 - TRCDVCMR0 = 0x8906, // 10 001 0010 0000 110 - TRCDVCMR1 = 0x8926, // 10 001 0010 0100 110 - TRCDVCMR2 = 0x8946, // 10 001 0010 1000 110 - TRCDVCMR3 = 0x8966, // 10 001 0010 1100 110 - TRCDVCMR4 = 0x8907, // 10 001 0010 0000 111 - TRCDVCMR5 = 0x8927, // 10 001 0010 0100 111 - TRCDVCMR6 = 0x8947, // 10 001 0010 1000 111 - TRCDVCMR7 = 0x8967, // 10 001 0010 1100 111 - TRCCIDCVR0 = 0x8980, // 10 001 0011 0000 000 - TRCCIDCVR1 = 0x8990, // 10 001 0011 0010 000 - TRCCIDCVR2 = 0x89a0, // 10 001 0011 0100 000 - TRCCIDCVR3 = 0x89b0, // 10 001 0011 0110 000 - TRCCIDCVR4 = 0x89c0, // 10 001 0011 1000 000 - TRCCIDCVR5 = 0x89d0, // 10 001 0011 1010 000 - TRCCIDCVR6 = 0x89e0, // 10 001 0011 1100 000 - TRCCIDCVR7 = 0x89f0, // 10 001 0011 1110 000 - TRCVMIDCVR0 = 0x8981, // 10 001 0011 0000 001 - TRCVMIDCVR1 = 0x8991, // 10 001 0011 0010 001 - TRCVMIDCVR2 = 0x89a1, // 10 001 0011 0100 001 - TRCVMIDCVR3 = 0x89b1, // 10 001 0011 0110 001 - TRCVMIDCVR4 = 0x89c1, // 10 001 0011 1000 001 - TRCVMIDCVR5 = 0x89d1, // 10 001 0011 1010 001 - TRCVMIDCVR6 = 0x89e1, // 10 001 0011 1100 001 - TRCVMIDCVR7 = 0x89f1, // 10 001 0011 1110 001 - TRCCIDCCTLR0 = 0x8982, // 10 001 0011 0000 010 - TRCCIDCCTLR1 = 0x898a, // 10 001 0011 0001 010 - TRCVMIDCCTLR0 = 0x8992, // 10 001 0011 0010 010 - TRCVMIDCCTLR1 = 0x899a, // 10 001 0011 0011 010 - TRCITCTRL = 0x8b84, // 10 001 0111 0000 100 - TRCCLAIMSET = 0x8bc6, // 10 001 0111 1000 110 - TRCCLAIMCLR = 0x8bce, // 10 001 0111 1001 110 - - // GICv3 registers - ICC_BPR1_EL1 = 0xc663, // 11 000 1100 1100 011 - ICC_BPR0_EL1 = 0xc643, // 11 000 1100 1000 011 - ICC_PMR_EL1 = 0xc230, // 11 000 0100 0110 000 - ICC_CTLR_EL1 = 0xc664, // 11 000 1100 1100 100 - ICC_CTLR_EL3 = 0xf664, // 11 110 1100 1100 100 - ICC_SRE_EL1 = 0xc665, // 11 000 1100 1100 101 - ICC_SRE_EL2 = 0xe64d, // 11 100 1100 1001 101 - ICC_SRE_EL3 = 0xf665, // 11 110 1100 1100 101 - ICC_IGRPEN0_EL1 = 0xc666, // 11 000 1100 1100 110 - ICC_IGRPEN1_EL1 = 0xc667, // 11 000 1100 1100 111 - ICC_IGRPEN1_EL3 = 0xf667, // 11 110 1100 1100 111 - ICC_SEIEN_EL1 = 0xc668, // 11 000 1100 1101 000 - ICC_AP0R0_EL1 = 0xc644, // 11 000 1100 1000 100 - ICC_AP0R1_EL1 = 0xc645, // 11 000 1100 1000 101 - ICC_AP0R2_EL1 = 0xc646, // 11 000 1100 1000 110 - ICC_AP0R3_EL1 = 0xc647, // 11 000 1100 1000 111 - ICC_AP1R0_EL1 = 0xc648, // 11 000 1100 1001 000 - ICC_AP1R1_EL1 = 0xc649, // 11 000 1100 1001 001 - ICC_AP1R2_EL1 = 0xc64a, // 11 000 1100 1001 010 - ICC_AP1R3_EL1 = 0xc64b, // 11 000 1100 1001 011 - ICH_AP0R0_EL2 = 0xe640, // 11 100 1100 1000 000 - ICH_AP0R1_EL2 = 0xe641, // 11 100 1100 1000 001 - ICH_AP0R2_EL2 = 0xe642, // 11 100 1100 1000 010 - ICH_AP0R3_EL2 = 0xe643, // 11 100 1100 1000 011 - ICH_AP1R0_EL2 = 0xe648, // 11 100 1100 1001 000 - ICH_AP1R1_EL2 = 0xe649, // 11 100 1100 1001 001 - ICH_AP1R2_EL2 = 0xe64a, // 11 100 1100 1001 010 - ICH_AP1R3_EL2 = 0xe64b, // 11 100 1100 1001 011 - ICH_HCR_EL2 = 0xe658, // 11 100 1100 1011 000 - ICH_MISR_EL2 = 0xe65a, // 11 100 1100 1011 010 - ICH_VMCR_EL2 = 0xe65f, // 11 100 1100 1011 111 - ICH_VSEIR_EL2 = 0xe64c, // 11 100 1100 1001 100 - ICH_LR0_EL2 = 0xe660, // 11 100 1100 1100 000 - ICH_LR1_EL2 = 0xe661, // 11 100 1100 1100 001 - ICH_LR2_EL2 = 0xe662, // 11 100 1100 1100 010 - ICH_LR3_EL2 = 0xe663, // 11 100 1100 1100 011 - ICH_LR4_EL2 = 0xe664, // 11 100 1100 1100 100 - ICH_LR5_EL2 = 0xe665, // 11 100 1100 1100 101 - ICH_LR6_EL2 = 0xe666, // 11 100 1100 1100 110 - ICH_LR7_EL2 = 0xe667, // 11 100 1100 1100 111 - ICH_LR8_EL2 = 0xe668, // 11 100 1100 1101 000 - ICH_LR9_EL2 = 0xe669, // 11 100 1100 1101 001 - ICH_LR10_EL2 = 0xe66a, // 11 100 1100 1101 010 - ICH_LR11_EL2 = 0xe66b, // 11 100 1100 1101 011 - ICH_LR12_EL2 = 0xe66c, // 11 100 1100 1101 100 - ICH_LR13_EL2 = 0xe66d, // 11 100 1100 1101 101 - ICH_LR14_EL2 = 0xe66e, // 11 100 1100 1101 110 - ICH_LR15_EL2 = 0xe66f // 11 100 1100 1101 111 - }; - - // Note that these do not inherit from NamedImmMapper. This class is - // sufficiently different in its behaviour that I don't believe it's worth - // burdening the common NamedImmMapper with abstractions only needed in - // this one case. - struct SysRegMapper { - static const NamedImmMapper::Mapping SysRegPairs[]; - - const NamedImmMapper::Mapping *InstPairs; - size_t NumInstPairs; - - SysRegMapper() {} - uint32_t fromString(StringRef Name, bool &Valid) const; - std::string toString(uint32_t Bits, bool &Valid) const; - }; - - struct MSRMapper : SysRegMapper { - static const NamedImmMapper::Mapping MSRPairs[]; - MSRMapper(); - }; - - struct MRSMapper : SysRegMapper { - static const NamedImmMapper::Mapping MRSPairs[]; - MRSMapper(); - }; - - uint32_t ParseGenericRegister(StringRef Name, bool &Valid); -} - -namespace A64TLBI { - enum TLBIValues { - Invalid = -1, // Op0 Op1 CRn CRm Op2 - IPAS2E1IS = 0x6401, // 01 100 1000 0000 001 - IPAS2LE1IS = 0x6405, // 01 100 1000 0000 101 - VMALLE1IS = 0x4418, // 01 000 1000 0011 000 - ALLE2IS = 0x6418, // 01 100 1000 0011 000 - ALLE3IS = 0x7418, // 01 110 1000 0011 000 - VAE1IS = 0x4419, // 01 000 1000 0011 001 - VAE2IS = 0x6419, // 01 100 1000 0011 001 - VAE3IS = 0x7419, // 01 110 1000 0011 001 - ASIDE1IS = 0x441a, // 01 000 1000 0011 010 - VAAE1IS = 0x441b, // 01 000 1000 0011 011 - ALLE1IS = 0x641c, // 01 100 1000 0011 100 - VALE1IS = 0x441d, // 01 000 1000 0011 101 - VALE2IS = 0x641d, // 01 100 1000 0011 101 - VALE3IS = 0x741d, // 01 110 1000 0011 101 - VMALLS12E1IS = 0x641e, // 01 100 1000 0011 110 - VAALE1IS = 0x441f, // 01 000 1000 0011 111 - IPAS2E1 = 0x6421, // 01 100 1000 0100 001 - IPAS2LE1 = 0x6425, // 01 100 1000 0100 101 - VMALLE1 = 0x4438, // 01 000 1000 0111 000 - ALLE2 = 0x6438, // 01 100 1000 0111 000 - ALLE3 = 0x7438, // 01 110 1000 0111 000 - VAE1 = 0x4439, // 01 000 1000 0111 001 - VAE2 = 0x6439, // 01 100 1000 0111 001 - VAE3 = 0x7439, // 01 110 1000 0111 001 - ASIDE1 = 0x443a, // 01 000 1000 0111 010 - VAAE1 = 0x443b, // 01 000 1000 0111 011 - ALLE1 = 0x643c, // 01 100 1000 0111 100 - VALE1 = 0x443d, // 01 000 1000 0111 101 - VALE2 = 0x643d, // 01 100 1000 0111 101 - VALE3 = 0x743d, // 01 110 1000 0111 101 - VMALLS12E1 = 0x643e, // 01 100 1000 0111 110 - VAALE1 = 0x443f // 01 000 1000 0111 111 - }; - - struct TLBIMapper : NamedImmMapper { - const static Mapping TLBIPairs[]; - - TLBIMapper(); - }; - - static inline bool NeedsRegister(TLBIValues Val) { - switch (Val) { - case VMALLE1IS: - case ALLE2IS: - case ALLE3IS: - case ALLE1IS: - case VMALLS12E1IS: - case VMALLE1: - case ALLE2: - case ALLE3: - case ALLE1: - case VMALLS12E1: - return false; - default: - return true; - } - } -} - -namespace AArch64II { - - enum TOF { - //===--------------------------------------------------------------===// - // AArch64 Specific MachineOperand flags. - - MO_NO_FLAG, - - // MO_GOT - Represents a relocation referring to the GOT entry of a given - // symbol. Used in adrp. - MO_GOT, - - // MO_GOT_LO12 - Represents a relocation referring to the low 12 bits of the - // GOT entry of a given symbol. Used in ldr only. - MO_GOT_LO12, - - // MO_DTPREL_* - Represents a relocation referring to the offset from a - // module's dynamic thread pointer. Used in the local-dynamic TLS access - // model. - MO_DTPREL_G1, - MO_DTPREL_G0_NC, - - // MO_GOTTPREL_* - Represents a relocation referring to a GOT entry - // providing the offset of a variable from the thread-pointer. Used in - // initial-exec TLS model where this offset is assigned in the static thread - // block and thus known by the dynamic linker. - MO_GOTTPREL, - MO_GOTTPREL_LO12, - - // MO_TLSDESC_* - Represents a relocation referring to a GOT entry providing - // a TLS descriptor chosen by the dynamic linker. Used for the - // general-dynamic and local-dynamic TLS access models where very littls is - // known at link-time. - MO_TLSDESC, - MO_TLSDESC_LO12, - - // MO_TPREL_* - Represents a relocation referring to the offset of a - // variable from the thread pointer itself. Used in the local-exec TLS - // access model. - MO_TPREL_G1, - MO_TPREL_G0_NC, - - // MO_LO12 - On a symbol operand, this represents a relocation containing - // lower 12 bits of the address. Used in add/sub/ldr/str. - MO_LO12, - - // MO_ABS_G* - Represent the 16-bit granules of an absolute reference using - // movz/movk instructions. - MO_ABS_G3, - MO_ABS_G2_NC, - MO_ABS_G1_NC, - MO_ABS_G0_NC - }; -} - -class APFloat; - -namespace A64Imms { - bool isFPImm(const APFloat &Val, uint32_t &Imm8Bits); - - inline bool isFPImm(const APFloat &Val) { - uint32_t Imm8; - return isFPImm(Val, Imm8); - } - - bool isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits); - bool isLogicalImmBits(unsigned RegWidth, uint32_t Bits, uint64_t &Imm); - - bool isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); - bool isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); - - // We sometimes want to know whether the immediate is representable with a - // MOVN but *not* with a MOVZ (because that would take priority). - bool isOnlyMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); - - uint64_t decodeNeonModImm(unsigned Val, unsigned OpCmode, unsigned &EltBits); - bool decodeNeonModShiftImm(unsigned OpCmode, unsigned &ShiftImm, - unsigned &ShiftOnesIn); - } - -} // end namespace llvm; - -#endif diff --git a/lib/Target/AArch64/Utils/CMakeLists.txt b/lib/Target/AArch64/Utils/CMakeLists.txt deleted file mode 100644 index 8ee03a7571b4..000000000000 --- a/lib/Target/AArch64/Utils/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_llvm_library(LLVMAArch64Utils - AArch64BaseInfo.cpp - ) diff --git a/lib/Target/AArch64/Utils/LLVMBuild.txt b/lib/Target/AArch64/Utils/LLVMBuild.txt deleted file mode 100644 index 4acecc935e2a..000000000000 --- a/lib/Target/AArch64/Utils/LLVMBuild.txt +++ /dev/null @@ -1,23 +0,0 @@ -;===- ./lib/Target/AArch646/Utils/LLVMBuild.txt ----------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = AArch64Utils -parent = AArch64 -required_libraries = Support -add_to_library_groups = AArch64 diff --git a/lib/Target/AArch64/Utils/Makefile b/lib/Target/AArch64/Utils/Makefile deleted file mode 100644 index 0f4a64527123..000000000000 --- a/lib/Target/AArch64/Utils/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/AArch64/Utils/Makefile -------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMAArch64Utils - -# Hack: we need to include 'main' AArch64 target directory to grab private headers -#CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM64/ARM64AsmPrinter.cpp b/lib/Target/ARM64/ARM64AsmPrinter.cpp index 78f9ed12f56d..7e17985bf4a2 100644 --- a/lib/Target/ARM64/ARM64AsmPrinter.cpp +++ b/lib/Target/ARM64/ARM64AsmPrinter.cpp @@ -508,4 +508,7 @@ void ARM64AsmPrinter::EmitInstruction(const MachineInstr *MI) { extern "C" void LLVMInitializeARM64AsmPrinter() { RegisterAsmPrinter X(TheARM64leTarget); RegisterAsmPrinter Y(TheARM64beTarget); + + RegisterAsmPrinter Z(TheAArch64leTarget); + RegisterAsmPrinter W(TheAArch64beTarget); } diff --git a/lib/Target/ARM64/ARM64TargetMachine.cpp b/lib/Target/ARM64/ARM64TargetMachine.cpp index 5a8c5c6015d0..fc73145be3f7 100644 --- a/lib/Target/ARM64/ARM64TargetMachine.cpp +++ b/lib/Target/ARM64/ARM64TargetMachine.cpp @@ -57,6 +57,9 @@ extern "C" void LLVMInitializeARM64Target() { // Register the target. RegisterTargetMachine X(TheARM64leTarget); RegisterTargetMachine Y(TheARM64beTarget); + + RegisterTargetMachine Z(TheAArch64leTarget); + RegisterTargetMachine W(TheAArch64beTarget); } /// TargetMachine ctor - Create an ARM64 architecture model. diff --git a/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp b/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp index 0c422c5cece8..4d710db1d93b 100644 --- a/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp +++ b/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp @@ -3957,6 +3957,9 @@ ARM64AsmParser::classifySymbolRef(const MCExpr *Expr, extern "C" void LLVMInitializeARM64AsmParser() { RegisterMCAsmParser X(TheARM64leTarget); RegisterMCAsmParser Y(TheARM64beTarget); + + RegisterMCAsmParser Z(TheAArch64leTarget); + RegisterMCAsmParser W(TheAArch64beTarget); } #define GET_REGISTER_MATCHER diff --git a/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp b/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp index 4fa9339d2b7a..bb47b3a0982a 100644 --- a/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp +++ b/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp @@ -242,6 +242,15 @@ extern "C" void LLVMInitializeARM64Disassembler() { createARM64ExternalSymbolizer); TargetRegistry::RegisterMCSymbolizer(TheARM64beTarget, createARM64ExternalSymbolizer); + + TargetRegistry::RegisterMCDisassembler(TheAArch64leTarget, + createARM64Disassembler); + TargetRegistry::RegisterMCDisassembler(TheAArch64beTarget, + createARM64Disassembler); + TargetRegistry::RegisterMCSymbolizer(TheAArch64leTarget, + createARM64ExternalSymbolizer); + TargetRegistry::RegisterMCSymbolizer(TheAArch64beTarget, + createARM64ExternalSymbolizer); } static const unsigned FPR128DecoderTable[] = { diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp index 9775a471f521..079d3588f6ea 100644 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp +++ b/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp @@ -141,44 +141,70 @@ extern "C" void LLVMInitializeARM64TargetMC() { // Register the MC asm info. RegisterMCAsmInfoFn X(TheARM64leTarget, createARM64MCAsmInfo); RegisterMCAsmInfoFn Y(TheARM64beTarget, createARM64MCAsmInfo); + RegisterMCAsmInfoFn Z(TheAArch64leTarget, createARM64MCAsmInfo); + RegisterMCAsmInfoFn W(TheAArch64beTarget, createARM64MCAsmInfo); // Register the MC codegen info. TargetRegistry::RegisterMCCodeGenInfo(TheARM64leTarget, createARM64MCCodeGenInfo); TargetRegistry::RegisterMCCodeGenInfo(TheARM64beTarget, createARM64MCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheAArch64leTarget, + createARM64MCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheAArch64beTarget, + createARM64MCCodeGenInfo); // Register the MC instruction info. TargetRegistry::RegisterMCInstrInfo(TheARM64leTarget, createARM64MCInstrInfo); TargetRegistry::RegisterMCInstrInfo(TheARM64beTarget, createARM64MCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheAArch64leTarget, createARM64MCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheAArch64beTarget, createARM64MCInstrInfo); // Register the MC register info. TargetRegistry::RegisterMCRegInfo(TheARM64leTarget, createARM64MCRegisterInfo); TargetRegistry::RegisterMCRegInfo(TheARM64beTarget, createARM64MCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheAArch64leTarget, createARM64MCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheAArch64beTarget, createARM64MCRegisterInfo); // Register the MC subtarget info. TargetRegistry::RegisterMCSubtargetInfo(TheARM64leTarget, createARM64MCSubtargetInfo); TargetRegistry::RegisterMCSubtargetInfo(TheARM64beTarget, createARM64MCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(TheAArch64leTarget, + createARM64MCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(TheAArch64beTarget, + createARM64MCSubtargetInfo); // Register the asm backend. TargetRegistry::RegisterMCAsmBackend(TheARM64leTarget, createARM64leAsmBackend); TargetRegistry::RegisterMCAsmBackend(TheARM64beTarget, createARM64beAsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheAArch64leTarget, createARM64leAsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheAArch64beTarget, createARM64beAsmBackend); // Register the MC Code Emitter TargetRegistry::RegisterMCCodeEmitter(TheARM64leTarget, createARM64MCCodeEmitter); TargetRegistry::RegisterMCCodeEmitter(TheARM64beTarget, createARM64MCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheAArch64leTarget, + createARM64MCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheAArch64beTarget, + createARM64MCCodeEmitter); // Register the object streamer. TargetRegistry::RegisterMCObjectStreamer(TheARM64leTarget, createMCStreamer); TargetRegistry::RegisterMCObjectStreamer(TheARM64beTarget, createMCStreamer); + TargetRegistry::RegisterMCObjectStreamer(TheAArch64leTarget, createMCStreamer); + TargetRegistry::RegisterMCObjectStreamer(TheAArch64beTarget, createMCStreamer); // Register the MCInstPrinter. TargetRegistry::RegisterMCInstPrinter(TheARM64leTarget, createARM64MCInstPrinter); TargetRegistry::RegisterMCInstPrinter(TheARM64beTarget, createARM64MCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheAArch64leTarget, + createARM64MCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheAArch64beTarget, + createARM64MCInstPrinter); } diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h b/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h index 954dcdbb8bc0..f2e9c17a3789 100644 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h +++ b/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h @@ -31,6 +31,8 @@ class raw_ostream; extern Target TheARM64leTarget; extern Target TheARM64beTarget; +extern Target TheAArch64leTarget; +extern Target TheAArch64beTarget; MCCodeEmitter *createARM64MCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, diff --git a/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp b/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp index c2b6f5c70456..247566825ab3 100644 --- a/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp +++ b/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp @@ -14,6 +14,8 @@ using namespace llvm; namespace llvm { Target TheARM64leTarget; Target TheARM64beTarget; +Target TheAArch64leTarget; +Target TheAArch64beTarget; } // end namespace llvm extern "C" void LLVMInitializeARM64TargetInfo() { @@ -21,4 +23,9 @@ extern "C" void LLVMInitializeARM64TargetInfo() { "ARM64 (little endian)"); RegisterTarget Y(TheARM64beTarget, "arm64_be", "ARM64 (big endian)"); + + RegisterTarget Z( + TheAArch64leTarget, "aarch64", "ARM64 (little endian)"); + RegisterTarget W( + TheAArch64beTarget, "aarch64_be", "ARM64 (big endian)"); } diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt index 13abaf8ce7a1..da2309ba0cbe 100644 --- a/lib/Target/LLVMBuild.txt +++ b/lib/Target/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = AArch64 ARM ARM64 CppBackend Hexagon MSP430 NVPTX Mips PowerPC R600 Sparc SystemZ X86 XCore +subdirectories = ARM ARM64 CppBackend Hexagon MSP430 NVPTX Mips PowerPC R600 Sparc SystemZ X86 XCore ; This is a special group whose required libraries are extended (by llvm-build) ; with the best execution engine (the native JIT, if available, or the diff --git a/test/CodeGen/AArch64/128bit_load_store.ll b/test/CodeGen/AArch64/128bit_load_store.ll index 2360e858b574..56f67873f848 100644 --- a/test/CodeGen/AArch64/128bit_load_store.ll +++ b/test/CodeGen/AArch64/128bit_load_store.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=neon| FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 define void @test_store_f128(fp128* %ptr, fp128 %val) #0 { @@ -21,9 +20,6 @@ define void @test_vstrq_p128(i128* %ptr, i128 %val) #0 { ; CHECK-ARM64-LABEL: test_vstrq_p128 ; CHECK-ARM64: stp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}] -; CHECK-AARCH64-LABEL: test_vstrq_p128 -; CHECK-AARCH64: str {{x[0-9]+}}, [{{x[0-9]+}}, #8] -; CHECK-AARCH64: str {{x[0-9]+}}, [{{x[0-9]+}}] entry: %0 = bitcast i128* %ptr to fp128* %1 = bitcast i128 %val to fp128 @@ -35,9 +31,6 @@ define i128 @test_vldrq_p128(i128* readonly %ptr) #2 { ; CHECK-ARM64-LABEL: test_vldrq_p128 ; CHECK-ARM64: ldp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}] -; CHECK-AARCH64-LABEL: test_vldrq_p128 -; CHECK-AARCH64: ldr {{x[0-9]+}}, [{{x[0-9]+}}] -; CHECK-AARCH64: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #8] entry: %0 = bitcast i128* %ptr to fp128* %1 = load fp128* %0, align 16 diff --git a/test/CodeGen/AArch64/adc.ll b/test/CodeGen/AArch64/adc.ll index 8742e450897c..892573ba06b1 100644 --- a/test/CodeGen/AArch64/adc.ll +++ b/test/CodeGen/AArch64/adc.ll @@ -1,5 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-apple-ios7.0 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s diff --git a/test/CodeGen/AArch64/addsub-shifted.ll b/test/CodeGen/AArch64/addsub-shifted.ll index f3fdbefb47ae..0a93edd8290a 100644 --- a/test/CodeGen/AArch64/addsub-shifted.ll +++ b/test/CodeGen/AArch64/addsub-shifted.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs %s -o - -mtriple=arm64-apple-ios7.0 | FileCheck %s @var32 = global i32 0 diff --git a/test/CodeGen/AArch64/addsub.ll b/test/CodeGen/AArch64/addsub.ll index b64ad2a83d87..3aa427c352c6 100644 --- a/test/CodeGen/AArch64/addsub.ll +++ b/test/CodeGen/AArch64/addsub.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-linux-gnu | FileCheck %s ; Note that this should be refactored (for efficiency if nothing else) diff --git a/test/CodeGen/AArch64/addsub_ext.ll b/test/CodeGen/AArch64/addsub_ext.ll index d33933e92232..cd01f594dcde 100644 --- a/test/CodeGen/AArch64/addsub_ext.ll +++ b/test/CodeGen/AArch64/addsub_ext.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs %s -o - -mtriple=arm64-linux-gnu | FileCheck %s @var8 = global i8 0 diff --git a/test/CodeGen/AArch64/alloca.ll b/test/CodeGen/AArch64/alloca.ll index f73365b20c24..7cab200b1ea7 100644 --- a/test/CodeGen/AArch64/alloca.ll +++ b/test/CodeGen/AArch64/alloca.ll @@ -1,6 +1,4 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 ; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP-AARCH64 %s ; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP-ARM64 %s declare void @use_addr(i8*) @@ -54,8 +52,6 @@ define i64 @test_alloca_with_local(i64 %n) { ; CHECK: bl use_addr %val = load i64* %loc -; CHECK-AARCH64: sub x[[TMP:[0-9]+]], x29, #[[LOC_FROM_FP]] -; CHECK-AARCH64: ldr x0, [x[[TMP]]] ; CHECK-ARM64: ldur x0, [x29, #-[[LOC_FROM_FP]]] @@ -68,13 +64,7 @@ define i64 @test_alloca_with_local(i64 %n) { define void @test_variadic_alloca(i64 %n, ...) { ; CHECK-LABEL: test_variadic_alloca: -; CHECK-AARCH64: sub sp, sp, #{{[0-9]+}} -; CHECK-AARCH64: add x29, sp, #192 -; CHECK-AARCH64: sub [[TMP:x[0-9]+]], x29, #192 -; CHECK-AARCH64: add x8, [[TMP]], #0 -; CHECK-AARCH64-FP: str q7, [x8, #112] ; [...] -; CHECK-AARCH64-FP: str q1, [x8, #16] ; CHECK-NOFP-AARCH64: sub sp, sp, #80 @@ -112,9 +102,6 @@ define void @test_variadic_alloca(i64 %n, ...) { ; CHECK: bl use_addr ret void -; CHECK-AARCH64: sub sp, x29, #192 -; CHECK-AARCH64: ldp x29, x30, [sp, #192] -; CHECK-AARCH64: add sp, sp, #208 ; CHECK-NOFP-AARCH64: sub sp, x29, #64 ; CHECK-NOFP-AARCH64: ldp x29, x30, [sp, #64] @@ -127,11 +114,6 @@ define void @test_variadic_alloca(i64 %n, ...) { define void @test_alloca_large_frame(i64 %n) { ; CHECK-LABEL: test_alloca_large_frame: -; CHECK-AARCH64: sub sp, sp, #496 -; CHECK-AARCH64: stp x29, x30, [sp, #480] -; CHECK-AARCH64: add x29, sp, #480 -; CHECK-AARCH64: sub sp, sp, #48 -; CHECK-AARCH64: sub sp, sp, #1953, lsl #12 ; CHECK-ARM64: stp x20, x19, [sp, #-32]! ; CHECK-ARM64: stp x29, x30, [sp, #16] @@ -145,9 +127,6 @@ define void @test_alloca_large_frame(i64 %n) { call void @use_addr_loc(i8* %addr1, i64* %addr2) ret void -; CHECK-AARCH64: sub sp, x29, #480 -; CHECK-AARCH64: ldp x29, x30, [sp, #480] -; CHECK-AARCH64: add sp, sp, #496 ; CHECK-ARM64: sub sp, x29, #16 ; CHECK-ARM64: ldp x29, x30, [sp, #16] diff --git a/test/CodeGen/AArch64/analyze-branch.ll b/test/CodeGen/AArch64/analyze-branch.ll index b4fbf2edc483..1d4daec5f43d 100644 --- a/test/CodeGen/AArch64/analyze-branch.ll +++ b/test/CodeGen/AArch64/analyze-branch.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s ; RUN: llc -mtriple=arm64-none-linux-gnu < %s | FileCheck %s ; This test checks that LLVM can do basic stripping and reapplying of branches diff --git a/test/CodeGen/AArch64/andCmpBrToTBZ.ll b/test/CodeGen/AArch64/andCmpBrToTBZ.ll deleted file mode 100644 index f564a5587f61..000000000000 --- a/test/CodeGen/AArch64/andCmpBrToTBZ.ll +++ /dev/null @@ -1,74 +0,0 @@ -; RUN: llc -O1 -march=aarch64 -enable-andcmp-sinking=true < %s | FileCheck %s -; arm64 has separate copy of this test - -; ModuleID = 'and-cbz-extr-mr.bc' -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128" -target triple = "aarch64-none-linux-gnu" - -define zeroext i1 @foo(i1 %IsEditable, i1 %isTextField, i8* %str1, i8* %str2, i8* %str3, i8* %str4, i8* %str5, i8* %str6, i8* %str7, i8* %str8, i8* %str9, i8* %str10, i8* %str11, i8* %str12, i8* %str13, i32 %int1, i8* %str14) unnamed_addr #0 align 2 { -; CHECK: foo: -entry: - %tobool = icmp eq i8* %str14, null - br i1 %tobool, label %return, label %if.end - -; CHECK: %if.end -; CHECK: tbz -if.end: ; preds = %entry - %and.i.i.i = and i32 %int1, 4 - %tobool.i.i.i = icmp eq i32 %and.i.i.i, 0 - br i1 %tobool.i.i.i, label %if.end12, label %land.rhs.i - -land.rhs.i: ; preds = %if.end - %cmp.i.i.i = icmp eq i8* %str12, %str13 - br i1 %cmp.i.i.i, label %if.then3, label %lor.rhs.i.i.i - -lor.rhs.i.i.i: ; preds = %land.rhs.i - %cmp.i13.i.i.i = icmp eq i8* %str10, %str11 - br i1 %cmp.i13.i.i.i, label %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit, label %if.end5 - -_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit: ; preds = %lor.rhs.i.i.i - %cmp.i.i.i.i = icmp eq i8* %str8, %str9 - br i1 %cmp.i.i.i.i, label %if.then3, label %if.end5 - -if.then3: ; preds = %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit, %land.rhs.i - %tmp11 = load i8* %str14, align 8 - %tmp12 = and i8 %tmp11, 2 - %tmp13 = icmp ne i8 %tmp12, 0 - br label %return - -if.end5: ; preds = %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit, %lor.rhs.i.i.i -; CHECK: %if.end5 -; CHECK: tbz - br i1 %tobool.i.i.i, label %if.end12, label %land.rhs.i19 - -land.rhs.i19: ; preds = %if.end5 - %cmp.i.i.i18 = icmp eq i8* %str6, %str7 - br i1 %cmp.i.i.i18, label %if.then7, label %lor.rhs.i.i.i23 - -lor.rhs.i.i.i23: ; preds = %land.rhs.i19 - %cmp.i13.i.i.i22 = icmp eq i8* %str3, %str4 - br i1 %cmp.i13.i.i.i22, label %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit28, label %if.end12 - -_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit28: ; preds = %lor.rhs.i.i.i23 - %cmp.i.i.i.i26 = icmp eq i8* %str1, %str2 - br i1 %cmp.i.i.i.i26, label %if.then7, label %if.end12 - -if.then7: ; preds = %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit28, %land.rhs.i19 - br i1 %isTextField, label %if.then9, label %if.end12 - -if.then9: ; preds = %if.then7 - %tmp23 = load i8* %str5, align 8 - %tmp24 = and i8 %tmp23, 2 - %tmp25 = icmp ne i8 %tmp24, 0 - br label %return - -if.end12: ; preds = %if.then7, %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit28, %lor.rhs.i.i.i23, %if.end5, %if.end - %lnot = xor i1 %IsEditable, true - br label %return - -return: ; preds = %if.end12, %if.then9, %if.then3, %entry - %retval.0 = phi i1 [ %tmp13, %if.then3 ], [ %tmp25, %if.then9 ], [ %lnot, %if.end12 ], [ true, %entry ] - ret i1 %retval.0 -} - -attributes #0 = { nounwind ssp } diff --git a/test/CodeGen/AArch64/assertion-rc-mismatch.ll b/test/CodeGen/AArch64/assertion-rc-mismatch.ll index f09203f2211f..bcf206ec9bed 100644 --- a/test/CodeGen/AArch64/assertion-rc-mismatch.ll +++ b/test/CodeGen/AArch64/assertion-rc-mismatch.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s ; Test case related to . diff --git a/test/CodeGen/AArch64/atomic-ops-not-barriers.ll b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll index fc4db9097aae..162430b9b76b 100644 --- a/test/CodeGen/AArch64/atomic-ops-not-barriers.ll +++ b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s define i32 @foo(i32* %var, i1 %cond) { diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll index f8db05fd416e..58ea735c8093 100644 --- a/test/CodeGen/AArch64/atomic-ops.ll +++ b/test/CodeGen/AArch64/atomic-ops.ll @@ -1,5 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-REG %s ; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 ; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-REG @@ -502,8 +500,6 @@ define i8 @test_atomic_load_min_i8(i8 %offset) nounwind { ; CHECK: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp w0, w[[OLD]], sxtb -; CHECK-AARCH64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt ; CHECK-ARM64-NEXT: sxtb w[[OLD_EXT:[0-9]+]], w[[OLD]] ; CHECK-ARM64-NEXT: cmp w[[OLD_EXT]], w0, sxtb @@ -528,8 +524,6 @@ define i16 @test_atomic_load_min_i16(i16 %offset) nounwind { ; CHECK: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp w0, w[[OLD]], sxth -; CHECK-AARCH64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt ; CHECK-ARM64-NEXT: sxth w[[OLD_EXT:[0-9]+]], w[[OLD]] ; CHECK-ARM64-NEXT: cmp w[[OLD_EXT]], w0, sxth @@ -555,8 +549,6 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind { ; CHECK: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp w0, w[[OLD]] -; CHECK-AARCH64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt ; CHECK-ARM64-NEXT: cmp w[[OLD]], w0 ; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le @@ -581,8 +573,6 @@ define i64 @test_atomic_load_min_i64(i64 %offset) nounwind { ; CHECK: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]] ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp x0, x[[OLD]] -; CHECK-AARCH64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt ; CHECK-ARM64-NEXT: cmp x[[OLD]], x0 ; CHECK-ARM64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, le @@ -607,8 +597,6 @@ define i8 @test_atomic_load_max_i8(i8 %offset) nounwind { ; CHECK: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp w0, w[[OLD]], sxtb -; CHECK-AARCH64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt ; CHECK-ARM64-NEXT: sxtb w[[OLD_EXT:[0-9]+]], w[[OLD]] ; CHECK-ARM64-NEXT: cmp w[[OLD_EXT]], w0, sxtb @@ -634,8 +622,6 @@ define i16 @test_atomic_load_max_i16(i16 %offset) nounwind { ; CHECK: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp w0, w[[OLD]], sxth -; CHECK-AARCH64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt ; CHECK-ARM64-NEXT: sxth w[[OLD_EXT:[0-9]+]], w[[OLD]] ; CHECK-ARM64-NEXT: cmp w[[OLD_EXT]], w0, sxth @@ -661,8 +647,6 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind { ; CHECK: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp w0, w[[OLD]] -; CHECK-AARCH64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt ; CHECK-ARM64-NEXT: cmp w[[OLD]], w0 ; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt @@ -687,8 +671,6 @@ define i64 @test_atomic_load_max_i64(i64 %offset) nounwind { ; CHECK: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp x0, x[[OLD]] -; CHECK-AARCH64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lt ; CHECK-ARM64-NEXT: cmp x[[OLD]], x0 ; CHECK-ARM64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt @@ -713,8 +695,6 @@ define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind { ; CHECK: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp w0, w[[OLD]], uxtb -; CHECK-AARCH64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi ; CHECK-ARM64-NEXT: cmp w[[OLD]], w0, uxtb ; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls @@ -739,8 +719,6 @@ define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind { ; CHECK: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp w0, w[[OLD]], uxth -; CHECK-AARCH64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi ; CHECK-ARM64-NEXT: cmp w[[OLD]], w0, uxth ; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls @@ -765,8 +743,6 @@ define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind { ; CHECK: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp w0, w[[OLD]] -; CHECK-AARCH64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi ; CHECK-ARM64-NEXT: cmp w[[OLD]], w0 ; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls @@ -791,8 +767,6 @@ define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind { ; CHECK: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]] ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp x0, x[[OLD]] -; CHECK-AARCH64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi ; CHECK-ARM64-NEXT: cmp x[[OLD]], x0 ; CHECK-ARM64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, ls @@ -817,8 +791,6 @@ define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind { ; CHECK: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp w0, w[[OLD]], uxtb -; CHECK-AARCH64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo ; CHECK-ARM64-NEXT: cmp w[[OLD]], w0, uxtb ; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi @@ -843,8 +815,6 @@ define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind { ; CHECK: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp w0, w[[OLD]], uxth -; CHECK-AARCH64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo ; CHECK-ARM64-NEXT: cmp w[[OLD]], w0, uxth ; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi @@ -869,8 +839,6 @@ define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind { ; CHECK: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp w0, w[[OLD]] -; CHECK-AARCH64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo ; CHECK-ARM64-NEXT: cmp w[[OLD]], w0 ; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi @@ -895,8 +863,6 @@ define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind { ; CHECK: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp x0, x[[OLD]] -; CHECK-AARCH64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lo ; CHECK-ARM64-NEXT: cmp x[[OLD]], x0 ; CHECK-ARM64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi diff --git a/test/CodeGen/AArch64/basic-pic.ll b/test/CodeGen/AArch64/basic-pic.ll index c63610bccae5..2c69bee0d1b3 100644 --- a/test/CodeGen/AArch64/basic-pic.ll +++ b/test/CodeGen/AArch64/basic-pic.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic %s -o - | FileCheck %s ; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -relocation-model=pic %s -o - | FileCheck %s @var = global i32 0 diff --git a/test/CodeGen/AArch64/bitfield-insert-0.ll b/test/CodeGen/AArch64/bitfield-insert-0.ll index 9272e1edfb94..8959e1b6959e 100644 --- a/test/CodeGen/AArch64/bitfield-insert-0.ll +++ b/test/CodeGen/AArch64/bitfield-insert-0.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -disassemble - | FileCheck %s ; RUN: llc -mtriple=arm64-linux-gnu -filetype=obj -o - %s | llvm-objdump -disassemble - | FileCheck %s ; The encoding of lsb -> immr in the CGed bitfield instructions was wrong at one diff --git a/test/CodeGen/AArch64/bitfield-insert.ll b/test/CodeGen/AArch64/bitfield-insert.ll index b67aa0fa23f0..8b0b4dafe6c0 100644 --- a/test/CodeGen/AArch64/bitfield-insert.ll +++ b/test/CodeGen/AArch64/bitfield-insert.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 ; RUN: llc -mtriple=arm64-none-linux-gnu < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 ; First, a simple example from Clang. The registers could plausibly be @@ -64,8 +63,6 @@ define void @test_whole64(i64* %existing, i64* %new) { define void @test_whole32_from64(i64* %existing, i64* %new) { ; CHECK-LABEL: test_whole32_from64: -; CHECK-AARCH64: bfi {{w[0-9]+}}, {{w[0-9]+}}, #{{0|16}}, #16 -; CHECK-AARCH64-NOT: and ; CHECK-ARM64: bfxil {{x[0-9]+}}, {{x[0-9]+}}, #0, #16 @@ -88,7 +85,6 @@ define void @test_32bit_masked(i32 *%existing, i32 *%new) { ; CHECK-ARM64: and ; CHECK: bfi [[INSERT:w[0-9]+]], {{w[0-9]+}}, #3, #4 -; CHECK-AARCH64: and {{w[0-9]+}}, [[INSERT]], #0xff %oldval = load volatile i32* %existing %oldval_keep = and i32 %oldval, 135 ; = 0x87 @@ -107,7 +103,6 @@ define void @test_64bit_masked(i64 *%existing, i64 *%new) { ; CHECK-LABEL: test_64bit_masked: ; CHECK-ARM64: and ; CHECK: bfi [[INSERT:x[0-9]+]], {{x[0-9]+}}, #40, #8 -; CHECK-AARCH64: and {{x[0-9]+}}, [[INSERT]], #0xffff00000000 %oldval = load volatile i64* %existing %oldval_keep = and i64 %oldval, 1095216660480 ; = 0xff_0000_0000 @@ -128,7 +123,6 @@ define void @test_32bit_complexmask(i32 *%existing, i32 *%new) { ; CHECK-ARM64: and ; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #3, #4 -; CHECK-AARCH64: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} %oldval = load volatile i32* %existing %oldval_keep = and i32 %oldval, 647 ; = 0x287 diff --git a/test/CodeGen/AArch64/bitfield.ll b/test/CodeGen/AArch64/bitfield.ll index 92f6d74908bb..71ffe30c9281 100644 --- a/test/CodeGen/AArch64/bitfield.ll +++ b/test/CodeGen/AArch64/bitfield.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 @var32 = global i32 0 @@ -24,7 +23,6 @@ define void @test_extendb(i8 %var) { %uxt64 = zext i8 %var to i64 store volatile i64 %uxt64, i64* @var64 -; CHECK-AARCH64: uxtb {{x[0-9]+}}, {{w[0-9]+}} ; CHECK-ARM64: and {{x[0-9]+}}, {{x[0-9]+}}, #0xff ret void } @@ -49,7 +47,6 @@ define void @test_extendh(i16 %var) { %uxt64 = zext i16 %var to i64 store volatile i64 %uxt64, i64* @var64 -; CHECK-AARCH64: uxth {{x[0-9]+}}, {{w[0-9]+}} ; CHECK-ARM64: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffff ret void } @@ -63,7 +60,6 @@ define void @test_extendw(i32 %var) { %uxt64 = zext i32 %var to i64 store volatile i64 %uxt64, i64* @var64 -; CHECK-AARCH64: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #0, #32 ; CHECK-ARM64: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #32 ret void } diff --git a/test/CodeGen/AArch64/blockaddress.ll b/test/CodeGen/AArch64/blockaddress.ll index c22ce1caf4d5..0cbdd3988b72 100644 --- a/test/CodeGen/AArch64/blockaddress.ll +++ b/test/CodeGen/AArch64/blockaddress.ll @@ -1,5 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -code-model=large -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s ; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -code-model=large -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s diff --git a/test/CodeGen/AArch64/bool-loads.ll b/test/CodeGen/AArch64/bool-loads.ll index 37cc8e42f175..5d92ef67d0eb 100644 --- a/test/CodeGen/AArch64/bool-loads.ll +++ b/test/CodeGen/AArch64/bool-loads.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s ; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s @var = global i1 0 diff --git a/test/CodeGen/AArch64/breg.ll b/test/CodeGen/AArch64/breg.ll index 285c19ddee30..137173bc4f33 100644 --- a/test/CodeGen/AArch64/breg.ll +++ b/test/CodeGen/AArch64/breg.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s @stored_label = global i8* null diff --git a/test/CodeGen/AArch64/callee-save.ll b/test/CodeGen/AArch64/callee-save.ll index 6a2832ceaadf..9b04a8f979b1 100644 --- a/test/CodeGen/AArch64/callee-save.ll +++ b/test/CodeGen/AArch64/callee-save.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK-ARM64 @var = global float 0.0 diff --git a/test/CodeGen/AArch64/code-model-large-abs.ll b/test/CodeGen/AArch64/code-model-large-abs.ll index b2b1fa7a5728..0408e6f4898a 100644 --- a/test/CodeGen/AArch64/code-model-large-abs.ll +++ b/test/CodeGen/AArch64/code-model-large-abs.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -code-model=large < %s | FileCheck %s ; RUN: llc -mtriple=arm64-linux-gnu -code-model=large -o - %s | FileCheck %s @var8 = global i8 0 diff --git a/test/CodeGen/AArch64/compare-branch.ll b/test/CodeGen/AArch64/compare-branch.ll index 31b9829d8a1f..accbadd4d4eb 100644 --- a/test/CodeGen/AArch64/compare-branch.ll +++ b/test/CodeGen/AArch64/compare-branch.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s @var32 = global i32 0 diff --git a/test/CodeGen/AArch64/complex-copy-noneon.ll b/test/CodeGen/AArch64/complex-copy-noneon.ll index 137ea5f0ff51..f65b11612828 100644 --- a/test/CodeGen/AArch64/complex-copy-noneon.ll +++ b/test/CodeGen/AArch64/complex-copy-noneon.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-neon < %s ; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=-neon < %s ; The DAG combiner decided to use a vector load/store for this struct copy diff --git a/test/CodeGen/AArch64/concatvector-bugs.ll b/test/CodeGen/AArch64/concatvector-bugs.ll deleted file mode 100644 index 8d167e42c72b..000000000000 --- a/test/CodeGen/AArch64/concatvector-bugs.ll +++ /dev/null @@ -1,70 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -; Bug: i8 type in FRP8 register but not registering with register class causes segmentation fault. -; Fix: Removed i8 type from FPR8 register class. - -; Not relevant to arm64. - -define void @test_concatvector_v8i8() { -entry.split: - br i1 undef, label %if.then, label %if.end - -if.then: ; preds = %entry.split - unreachable - -if.end: ; preds = %entry.split - br i1 undef, label %if.then9, label %if.end18 - -if.then9: ; preds = %if.end - unreachable - -if.end18: ; preds = %if.end - br label %for.body - -for.body: ; preds = %for.inc, %if.end18 - br i1 false, label %if.then30, label %for.inc - -if.then30: ; preds = %for.body - unreachable - -for.inc: ; preds = %for.body - br i1 undef, label %for.end, label %for.body - -for.end: ; preds = %for.inc - br label %for.body77 - -for.body77: ; preds = %for.body77, %for.end - br i1 undef, label %for.end106, label %for.body77 - -for.end106: ; preds = %for.body77 - br i1 undef, label %for.body130.us.us, label %stmt.for.body130.us.us - -stmt.for.body130.us.us: ; preds = %stmt.for.body130.us.us, %for.end106 - %_p_splat.us = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer - store <8 x i8> %_p_splat.us, <8 x i8>* undef, align 1 - br label %stmt.for.body130.us.us - -for.body130.us.us: ; preds = %for.body130.us.us, %for.end106 - br label %for.body130.us.us -} - -declare <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32>, i32) - -define <8 x i16> @test_splat(i32 %l) nounwind { -; CHECK-LABEL: test_splat: -; CHECK: ret - %lhs = insertelement <1 x i32> undef, i32 %l, i32 0 - %shift = tail call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %lhs, i32 11) - %vec = shufflevector <1 x i16> %shift, <1 x i16> undef, <8 x i32> zeroinitializer - ret <8 x i16> %vec -} - - -define <8 x i16> @test_notsplat(<8 x i16> %a, <8 x i16> %b, i32 %l) nounwind { -; CHECK-LABEL: test_notsplat: -; CHECK: ret -entry: - %lhs = insertelement <1 x i32> undef, i32 %l, i32 0 - %shift = tail call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %lhs, i32 11) - %vec = shufflevector <1 x i16> %shift, <1 x i16> undef, <8 x i32> - ret <8 x i16> %vec -} diff --git a/test/CodeGen/AArch64/cond-sel.ll b/test/CodeGen/AArch64/cond-sel.ll index 2ee49a2f6edb..96e11b12a171 100644 --- a/test/CodeGen/AArch64/cond-sel.ll +++ b/test/CodeGen/AArch64/cond-sel.ll @@ -1,6 +1,4 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mcpu=cyclone | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s @var32 = global i32 0 @@ -47,7 +45,6 @@ define void @test_floatcsel(float %lhs32, float %rhs32, double %lhs64, double %r ; CHECK-NOFP-NOT: fcmp %val2 = select i1 %tst2, i64 9, i64 15 store i64 %val2, i64* @var64 -; CHECK-AARCH64: movz x[[CONST15:[0-9]+]], #15 ; CHECK-ARM64: orr w[[CONST15:[0-9]+]], wzr, #0xf ; CHECK: movz {{[wx]}}[[CONST9:[0-9]+]], #{{9|0x9}} ; CHECK: csel [[MAYBETRUE:x[0-9]+]], x[[CONST9]], x[[CONST15]], eq diff --git a/test/CodeGen/AArch64/cpus.ll b/test/CodeGen/AArch64/cpus.ll index 23c06be3a1dc..f0f36bd5cea5 100644 --- a/test/CodeGen/AArch64/cpus.ll +++ b/test/CodeGen/AArch64/cpus.ll @@ -1,9 +1,5 @@ ; This tests that llc accepts all valid AArch64 CPUs -; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s -; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=cortex-a53 2>&1 | FileCheck %s -; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=cortex-a57 2>&1 | FileCheck %s -; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a53 2>&1 | FileCheck %s diff --git a/test/CodeGen/AArch64/directcond.ll b/test/CodeGen/AArch64/directcond.ll index cd9a863bd658..832a01046b0f 100644 --- a/test/CodeGen/AArch64/directcond.ll +++ b/test/CodeGen/AArch64/directcond.ll @@ -1,13 +1,9 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) { ; CHECK-LABEL: test_select_i32: %val = select i1 %bit, i32 %a, i32 %b -; CHECK-AARCH64: movz [[ONE:w[0-9]+]], #1 -; CHECK-AARCH64: tst w0, [[ONE]] ; CHECK-ARM64: tst w0, #0x1 ; CHECK-NEXT: csel w0, w1, w2, ne @@ -17,8 +13,6 @@ define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) { define i64 @test_select_i64(i1 %bit, i64 %a, i64 %b) { ; CHECK-LABEL: test_select_i64: %val = select i1 %bit, i64 %a, i64 %b -; CHECK-AARCH64: movz [[ONE:w[0-9]+]], #1 -; CHECK-AARCH64: tst w0, [[ONE]] ; CHECK-ARM64: tst w0, #0x1 ; CHECK-NEXT: csel x0, x1, x2, ne @@ -28,8 +22,6 @@ define i64 @test_select_i64(i1 %bit, i64 %a, i64 %b) { define float @test_select_float(i1 %bit, float %a, float %b) { ; CHECK-LABEL: test_select_float: %val = select i1 %bit, float %a, float %b -; CHECK-AARCH64: movz [[ONE:w[0-9]+]], #1 -; CHECK-AARCH64: tst w0, [[ONE]] ; CHECK-ARM64: tst w0, #0x1 ; CHECK-NEXT: fcsel s0, s0, s1, ne ; CHECK-NOFP-NOT: fcsel @@ -39,8 +31,6 @@ define float @test_select_float(i1 %bit, float %a, float %b) { define double @test_select_double(i1 %bit, double %a, double %b) { ; CHECK-LABEL: test_select_double: %val = select i1 %bit, double %a, double %b -; CHECK-AARCH64: movz [[ONE:w[0-9]+]], #1 -; CHECK-AARCH64: tst w0, [[ONE]] ; CHECK-ARM64: tst w0, #0x1 ; CHECK-NEXT: fcsel d0, d0, d1, ne ; CHECK-NOFP-NOT: fcsel diff --git a/test/CodeGen/AArch64/dp-3source.ll b/test/CodeGen/AArch64/dp-3source.ll index 433ce209a72d..22bd4a844e1a 100644 --- a/test/CodeGen/AArch64/dp-3source.ll +++ b/test/CodeGen/AArch64/dp-3source.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s define i32 @test_madd32(i32 %val0, i32 %val1, i32 %val2) { diff --git a/test/CodeGen/AArch64/dp1.ll b/test/CodeGen/AArch64/dp1.ll index 41ef1951997f..b09ce3668dc9 100644 --- a/test/CodeGen/AArch64/dp1.ll +++ b/test/CodeGen/AArch64/dp1.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s @var32 = global i32 0 diff --git a/test/CodeGen/AArch64/dp2.ll b/test/CodeGen/AArch64/dp2.ll index 391418d75508..71b31696372a 100644 --- a/test/CodeGen/AArch64/dp2.ll +++ b/test/CodeGen/AArch64/dp2.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64 | FileCheck %s @var32_0 = global i32 0 diff --git a/test/CodeGen/AArch64/eliminate-trunc.ll b/test/CodeGen/AArch64/eliminate-trunc.ll index 314a94dda14e..02a085acf03e 100644 --- a/test/CodeGen/AArch64/eliminate-trunc.ll +++ b/test/CodeGen/AArch64/eliminate-trunc.ll @@ -1,11 +1,7 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefix=CHECK-AARCH64 ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-apple-ios7.0 -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-ARM64 ; Check trunc i64 operation is translated as a subregister access ; eliminating an i32 induction varible. -; CHECK-AARCH64: add {{x[0-9]+}}, {{x[0-9]+}}, #1 -; CHECK-AARCH64-NOT: add {{w[0-9]+}}, {{w[0-9]+}}, #1 -; CHECK-AARCH64-NEXT: cmp {{w[0-9]+}}, {{w[0-9]+}}, uxtw ; CHECK-ARM64-NOT: add {{x[0-9]+}}, {{x[0-9]+}}, #1 ; CHECK-ARM64: add {{w[0-9]+}}, {{w[0-9]+}}, #1 diff --git a/test/CodeGen/AArch64/extern-weak.ll b/test/CodeGen/AArch64/extern-weak.ll index 7c78f9a815e1..8f418455ffa0 100644 --- a/test/CodeGen/AArch64/extern-weak.ll +++ b/test/CodeGen/AArch64/extern-weak.ll @@ -1,5 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -o - < %s | FileCheck %s --check-prefix=CHECK-AARCH64 -; RUN: llc -mtriple=aarch64-none-linux-gnu -code-model=large -o - < %s | FileCheck --check-prefix=CHECK-LARGE %s ; RUN: llc -mtriple=arm64-none-linux-gnu -o - %s | FileCheck %s --check-prefix=CHECK-ARM64 ; RUN: llc -mtriple=arm64-none-linux-gnu -code-model=large -o - %s | FileCheck --check-prefix=CHECK-LARGE %s @@ -9,10 +7,7 @@ define i32()* @foo() { ; The usual ADRP/ADD pair can't be used for a weak reference because it must ; evaluate to 0 if the symbol is undefined. We use a litpool entry. ret i32()* @var -; CHECK-AARCH64: .LCPI0_0: -; CHECK-AARCH64-NEXT: .xword var -; CHECK-AARCH64: ldr x0, [{{x[0-9]+}}, #:lo12:.LCPI0_0] ; CHECK-ARM64: adrp x[[ADDRHI:[0-9]+]], :got:var ; CHECK-ARM64: ldr x0, [x[[ADDRHI]], :got_lo12:var] @@ -30,11 +25,7 @@ define i32()* @foo() { define i32* @bar() { %addr = getelementptr [10 x i32]* @arr_var, i32 0, i32 5 -; CHECK-AARCH64: .LCPI1_0: -; CHECK-AARCH64-NEXT: .xword arr_var -; CHECK-AARCH64: ldr [[BASE:x[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI1_0] -; CHECK-AARCH64: add x0, [[BASE]], #20 ; CHECK-ARM64: adrp x[[ADDRHI:[0-9]+]], :got:arr_var ; CHECK-ARM64: ldr [[BASE:x[0-9]+]], [x[[ADDRHI]], :got_lo12:arr_var] @@ -54,8 +45,6 @@ define i32* @bar() { define i32* @wibble() { ret i32* @defined_weak_var -; CHECK-AARCH64: adrp [[BASE:x[0-9]+]], defined_weak_var -; CHECK-AARCH64: add x0, [[BASE]], #:lo12:defined_weak_var ; CHECK-ARM64: adrp [[BASE:x[0-9]+]], defined_weak_var ; CHECK-ARM64: add x0, [[BASE]], :lo12:defined_weak_var diff --git a/test/CodeGen/AArch64/extract.ll b/test/CodeGen/AArch64/extract.ll index f066b59af2fd..1fc9387fecc0 100644 --- a/test/CodeGen/AArch64/extract.ll +++ b/test/CodeGen/AArch64/extract.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s define i64 @ror_i64(i64 %in) { diff --git a/test/CodeGen/AArch64/fastcc-reserved.ll b/test/CodeGen/AArch64/fastcc-reserved.ll index 97410aa50265..09a6ae3ccd2a 100644 --- a/test/CodeGen/AArch64/fastcc-reserved.ll +++ b/test/CodeGen/AArch64/fastcc-reserved.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -tailcallopt | FileCheck %s --check-prefix=CHECK-ARM64 ; This test is designed to be run in the situation where the diff --git a/test/CodeGen/AArch64/fastcc.ll b/test/CodeGen/AArch64/fastcc.ll index fb9b4ac57edd..b641de0ee290 100644 --- a/test/CodeGen/AArch64/fastcc.ll +++ b/test/CodeGen/AArch64/fastcc.ll @@ -1,6 +1,4 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s -check-prefix CHECK-TAIL ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -tailcallopt | FileCheck %s -check-prefix CHECK-ARM64-TAIL -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck --check-prefix=CHECK-ARM64 %s ; Without tailcallopt fastcc still means the caller cleans up the diff --git a/test/CodeGen/AArch64/fcmp.ll b/test/CodeGen/AArch64/fcmp.ll index fe2c3260a8b0..c54e3e62941e 100644 --- a/test/CodeGen/AArch64/fcmp.ll +++ b/test/CodeGen/AArch64/fcmp.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck %s declare void @bar(i32) diff --git a/test/CodeGen/AArch64/fcvt-fixed.ll b/test/CodeGen/AArch64/fcvt-fixed.ll index 5d7c83ebfb36..40800d00e50f 100644 --- a/test/CodeGen/AArch64/fcvt-fixed.ll +++ b/test/CodeGen/AArch64/fcvt-fixed.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-apple-ios7.0 -O0 diff --git a/test/CodeGen/AArch64/fcvt-int.ll b/test/CodeGen/AArch64/fcvt-int.ll index a85b02538ef5..d549c7e78421 100644 --- a/test/CodeGen/AArch64/fcvt-int.ll +++ b/test/CodeGen/AArch64/fcvt-int.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s define i32 @test_floattoi32(float %in) { diff --git a/test/CodeGen/AArch64/flags-multiuse.ll b/test/CodeGen/AArch64/flags-multiuse.ll index cae6856d799e..667c05d1653a 100644 --- a/test/CodeGen/AArch64/flags-multiuse.ll +++ b/test/CodeGen/AArch64/flags-multiuse.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s ; LLVM should be able to cope with multiple uses of the same flag-setting diff --git a/test/CodeGen/AArch64/floatdp_1source.ll b/test/CodeGen/AArch64/floatdp_1source.ll index 5d11d3f0e211..8c02787a2340 100644 --- a/test/CodeGen/AArch64/floatdp_1source.ll +++ b/test/CodeGen/AArch64/floatdp_1source.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s @varhalf = global half 0.0 diff --git a/test/CodeGen/AArch64/floatdp_2source.ll b/test/CodeGen/AArch64/floatdp_2source.ll index 0a0933e0e95e..8e98b784bb9d 100644 --- a/test/CodeGen/AArch64/floatdp_2source.ll +++ b/test/CodeGen/AArch64/floatdp_2source.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu -mcpu=cyclone | FileCheck %s @varfloat = global float 0.0 diff --git a/test/CodeGen/AArch64/fp-cond-sel.ll b/test/CodeGen/AArch64/fp-cond-sel.ll index ed9f36d948ef..07cbb4919e61 100644 --- a/test/CodeGen/AArch64/fp-cond-sel.ll +++ b/test/CodeGen/AArch64/fp-cond-sel.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu -mcpu=cyclone | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 @varfloat = global float 0.0 @@ -13,7 +12,6 @@ define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) { %tst1 = icmp ugt i32 %lhs32, %rhs32 %val1 = select i1 %tst1, float 0.0, float 1.0 store float %val1, float* @varfloat -; CHECK-AARCH64: ldr s[[FLT0:[0-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:.LCPI ; CHECK-ARM64: movi v[[FLT0:[0-9]+]].2d, #0 ; CHECK: fmov s[[FLT1:[0-9]+]], #1.0 ; CHECK: fcsel {{s[0-9]+}}, s[[FLT0]], s[[FLT1]], hi @@ -22,7 +20,6 @@ define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) { %tst2 = icmp sle i64 %lhs64, %rhs64 %val2 = select i1 %tst2, double 1.0, double 0.0 store double %val2, double* @vardouble -; CHECK-AARCH64: ldr d[[FLT0:[0-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:.LCPI ; FLT0 is reused from above on ARM64. ; CHECK: fmov d[[FLT1:[0-9]+]], #1.0 ; CHECK: fcsel {{d[0-9]+}}, d[[FLT1]], d[[FLT0]], le diff --git a/test/CodeGen/AArch64/fp-dp3.ll b/test/CodeGen/AArch64/fp-dp3.ll index e6da7c8762b3..53113b59127d 100644 --- a/test/CodeGen/AArch64/fp-dp3.ll +++ b/test/CodeGen/AArch64/fp-dp3.ll @@ -1,5 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -fp-contract=fast | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -check-prefix=CHECK-NOFAST ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu -fp-contract=fast | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s -check-prefix=CHECK-NOFAST diff --git a/test/CodeGen/AArch64/fp128-folding.ll b/test/CodeGen/AArch64/fp128-folding.ll index 91445e2c849f..4b19deb976c6 100644 --- a/test/CodeGen/AArch64/fp128-folding.ll +++ b/test/CodeGen/AArch64/fp128-folding.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s declare void @bar(i8*, i8*, i32*) diff --git a/test/CodeGen/AArch64/fp128.ll b/test/CodeGen/AArch64/fp128.ll deleted file mode 100644 index 56089e33e65d..000000000000 --- a/test/CodeGen/AArch64/fp128.ll +++ /dev/null @@ -1,282 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 -; arm64 has a separate copy of this test. -@lhs = global fp128 zeroinitializer -@rhs = global fp128 zeroinitializer - -define fp128 @test_add() { -; CHECK-LABEL: test_add: - - %lhs = load fp128* @lhs - %rhs = load fp128* @rhs -; CHECK: ldr q0, [{{x[0-9]+}}, {{#?}}:lo12:lhs] -; CHECK: ldr q1, [{{x[0-9]+}}, {{#?}}:lo12:rhs] - - %val = fadd fp128 %lhs, %rhs -; CHECK: bl __addtf3 - ret fp128 %val -} - -define fp128 @test_sub() { -; CHECK-LABEL: test_sub: - - %lhs = load fp128* @lhs - %rhs = load fp128* @rhs -; CHECK: ldr q0, [{{x[0-9]+}}, {{#?}}:lo12:lhs] -; CHECK: ldr q1, [{{x[0-9]+}}, {{#?}}:lo12:rhs] - - %val = fsub fp128 %lhs, %rhs -; CHECK: bl __subtf3 - ret fp128 %val -} - -define fp128 @test_mul() { -; CHECK-LABEL: test_mul: - - %lhs = load fp128* @lhs - %rhs = load fp128* @rhs -; CHECK: ldr q0, [{{x[0-9]+}}, {{#?}}:lo12:lhs] -; CHECK: ldr q1, [{{x[0-9]+}}, {{#?}}:lo12:rhs] - - %val = fmul fp128 %lhs, %rhs -; CHECK: bl __multf3 - ret fp128 %val -} - -define fp128 @test_div() { -; CHECK-LABEL: test_div: - - %lhs = load fp128* @lhs - %rhs = load fp128* @rhs -; CHECK: ldr q0, [{{x[0-9]+}}, {{#?}}:lo12:lhs] -; CHECK: ldr q1, [{{x[0-9]+}}, {{#?}}:lo12:rhs] - - %val = fdiv fp128 %lhs, %rhs -; CHECK: bl __divtf3 - ret fp128 %val -} - -@var32 = global i32 0 -@var64 = global i64 0 - -define void @test_fptosi() { -; CHECK-LABEL: test_fptosi: - %val = load fp128* @lhs - - %val32 = fptosi fp128 %val to i32 - store i32 %val32, i32* @var32 -; CHECK: bl __fixtfsi - - %val64 = fptosi fp128 %val to i64 - store i64 %val64, i64* @var64 -; CHECK: bl __fixtfdi - - ret void -} - -define void @test_fptoui() { -; CHECK-LABEL: test_fptoui: - %val = load fp128* @lhs - - %val32 = fptoui fp128 %val to i32 - store i32 %val32, i32* @var32 -; CHECK: bl __fixunstfsi - - %val64 = fptoui fp128 %val to i64 - store i64 %val64, i64* @var64 -; CHECK: bl __fixunstfdi - - ret void -} - -define void @test_sitofp() { -; CHECK-LABEL: test_sitofp: - - %src32 = load i32* @var32 - %val32 = sitofp i32 %src32 to fp128 - store volatile fp128 %val32, fp128* @lhs -; CHECK: bl __floatsitf - - %src64 = load i64* @var64 - %val64 = sitofp i64 %src64 to fp128 - store volatile fp128 %val64, fp128* @lhs -; CHECK: bl __floatditf - - ret void -} - -define void @test_uitofp() { -; CHECK-LABEL: test_uitofp: - - %src32 = load i32* @var32 - %val32 = uitofp i32 %src32 to fp128 - store volatile fp128 %val32, fp128* @lhs -; CHECK: bl __floatunsitf - - %src64 = load i64* @var64 - %val64 = uitofp i64 %src64 to fp128 - store volatile fp128 %val64, fp128* @lhs -; CHECK: bl __floatunditf - - ret void -} - -define i1 @test_setcc1() { -; CHECK-LABEL: test_setcc1: - - %lhs = load fp128* @lhs - %rhs = load fp128* @rhs -; CHECK: ldr q0, [{{x[0-9]+}}, {{#?}}:lo12:lhs] -; CHECK: ldr q1, [{{x[0-9]+}}, {{#?}}:lo12:rhs] - -; Technically, everything after the call to __letf2 is redundant, but we'll let -; LLVM have its fun for now. - %val = fcmp ole fp128 %lhs, %rhs -; CHECK: bl __letf2 -; CHECK: cmp w0, #0 -; CHECK: cset w0, le - - ret i1 %val -; CHECK: ret -} - -define i1 @test_setcc2() { -; CHECK-LABEL: test_setcc2: - - %lhs = load fp128* @lhs - %rhs = load fp128* @rhs -; CHECK: ldr q0, [{{x[0-9]+}}, {{#?}}:lo12:lhs] -; CHECK: ldr q1, [{{x[0-9]+}}, {{#?}}:lo12:rhs] - -; Technically, everything after the call to __letf2 is redundant, but we'll let -; LLVM have its fun for now. - %val = fcmp ugt fp128 %lhs, %rhs -; CHECK: bl __gttf2 -; CHECK: cmp w0, #0 -; CHECK: cset [[GT:w[0-9]+]], gt - -; CHECK: bl __unordtf2 -; CHECK: cmp w0, #0 -; CHECK: cset [[UNORDERED:w[0-9]+]], ne - -; CHECK: orr w0, [[UNORDERED]], [[GT]] - - ret i1 %val -; CHECK: ret -} - -define i32 @test_br_cc() { -; CHECK-LABEL: test_br_cc: - - %lhs = load fp128* @lhs - %rhs = load fp128* @rhs -; CHECK: ldr q0, [{{x[0-9]+}}, {{#?}}:lo12:lhs] -; CHECK: ldr q1, [{{x[0-9]+}}, {{#?}}:lo12:rhs] - - ; olt == !uge, which LLVM unfortunately "optimizes" this to. - %cond = fcmp olt fp128 %lhs, %rhs -; CHECK: bl __getf2 -; CHECK: cmp w0, #0 -; CHECK: cset [[OGE:w[0-9]+]], ge - -; CHECK: bl __unordtf2 -; CHECK: cmp w0, #0 -; CHECK: cset [[UNORDERED:w[0-9]+]], ne - -; CHECK: orr [[UGE:w[0-9]+]], [[UNORDERED]], [[OGE]] -; CHECK: cbnz [[UGE]], [[RET29:.LBB[0-9]+_[0-9]+]] - br i1 %cond, label %iftrue, label %iffalse - -iftrue: - ret i32 42 -; CHECK-NEXT: BB# -; CHECK-NEXT: movz {{x0|w0}}, #42 -; CHECK-NEXT: b [[REALRET:.LBB[0-9]+_[0-9]+]] - -iffalse: - ret i32 29 -; CHECK: [[RET29]]: -; CHECK-NEXT: movz {{x0|w0}}, #29 -; CHECK-NEXT: [[REALRET]]: -; CHECK: ret -} - -define void @test_select(i1 %cond, fp128 %lhs, fp128 %rhs) { -; CHECK-LABEL: test_select: - - %val = select i1 %cond, fp128 %lhs, fp128 %rhs - store fp128 %val, fp128* @lhs -; CHECK-AARCH64: cmp {{w[0-9]+}}, #0 -; CHECK-AARCH64: str q1, [sp] -; CHECK-ARM64: tst {{w[0-9]+}}, #0x1 -; CHECK-NEXT: b.eq [[IFFALSE:.LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: BB# -; CHECK-AARCH64-NEXT: str q0, [sp] -; CHECK-ARM64-NEXT: orr v[[DEST:[0-9]+]].16b, v0.16b, v0.16b -; CHECK-NEXT: [[IFFALSE]]: -; CHECK-AARCH64-NEXT: ldr q[[DEST:[0-9]+]], [sp] -; CHECK: str q[[DEST]], [{{x[0-9]+}}, {{#?}}:lo12:lhs] - ret void -; CHECK: ret -} - -@varfloat = global float 0.0 -@vardouble = global double 0.0 - -define void @test_round() { -; CHECK-LABEL: test_round: - - %val = load fp128* @lhs - - %float = fptrunc fp128 %val to float - store float %float, float* @varfloat -; CHECK: bl __trunctfsf2 -; CHECK: str s0, [{{x[0-9]+}}, {{#?}}:lo12:varfloat] - - %double = fptrunc fp128 %val to double - store double %double, double* @vardouble -; CHECK: bl __trunctfdf2 -; CHECK: str d0, [{{x[0-9]+}}, {{#?}}:lo12:vardouble] - - ret void -} - -define void @test_extend() { -; CHECK-LABEL: test_extend: - - %val = load fp128* @lhs - - %float = load float* @varfloat - %fromfloat = fpext float %float to fp128 - store volatile fp128 %fromfloat, fp128* @lhs -; CHECK: bl __extendsftf2 -; CHECK: str q0, [{{x[0-9]+}}, {{#?}}:lo12:lhs] - - %double = load double* @vardouble - %fromdouble = fpext double %double to fp128 - store volatile fp128 %fromdouble, fp128* @lhs -; CHECK: bl __extenddftf2 -; CHECK: str q0, [{{x[0-9]+}}, {{#?}}:lo12:lhs] - - ret void -; CHECK: ret -} - -define fp128 @test_neg(fp128 %in) { -; CHECK: [[MINUS0:.LCPI[0-9]+_0]]: -; Make sure the weird hex constant below *is* -0.0 -; CHECK-NEXT: fp128 -0 - -; CHECK-LABEL: test_neg: - - ; Could in principle be optimized to fneg which we can't select, this makes - ; sure that doesn't happen. - %ret = fsub fp128 0xL00000000000000008000000000000000, %in -; CHECK-AARCH64: str q0, [sp, #-16] -; CHECK-AARCH64-NEXT: ldr q1, [sp], #16 -; CHECK-ARM64: orr v1.16b, v0.16b, v0.16b -; CHECK: ldr q0, [{{x[0-9]+}}, {{#?}}:lo12:[[MINUS0]]] -; CHECK: bl __subtf3 - - ret fp128 %ret -; CHECK: ret -} diff --git a/test/CodeGen/AArch64/fpimm.ll b/test/CodeGen/AArch64/fpimm.ll index e5aafb545611..e279d5b00969 100644 --- a/test/CodeGen/AArch64/fpimm.ll +++ b/test/CodeGen/AArch64/fpimm.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s @varf32 = global float 0.0 diff --git a/test/CodeGen/AArch64/frameaddr.ll b/test/CodeGen/AArch64/frameaddr.ll index 78fc13b37ead..85d95e21c9b7 100644 --- a/test/CodeGen/AArch64/frameaddr.ll +++ b/test/CodeGen/AArch64/frameaddr.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s define i8* @t() nounwind { diff --git a/test/CodeGen/AArch64/free-zext.ll b/test/CodeGen/AArch64/free-zext.ll index 584ce2844da1..d69105eec381 100644 --- a/test/CodeGen/AArch64/free-zext.ll +++ b/test/CodeGen/AArch64/free-zext.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s define i64 @test_free_zext(i8* %a, i16* %b) { diff --git a/test/CodeGen/AArch64/func-argpassing.ll b/test/CodeGen/AArch64/func-argpassing.ll index 5b3e6c89db6e..129ab25c8772 100644 --- a/test/CodeGen/AArch64/func-argpassing.ll +++ b/test/CodeGen/AArch64/func-argpassing.ll @@ -1,7 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-AARCH64 --check-prefix=CHECK-LE %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE-AARCH64 --check-prefix=CHECK-BE %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM64 %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s @@ -67,8 +63,6 @@ define void @check_byval_align(i32* byval %ignore, %myStruct* byval align 16 %st %val0 = load volatile i32* %addr0 ; Some weird move means x0 is used for one access -; CHECK-AARCH64: add x[[STRUCTVAL_ADDR:[0-9]+]], sp, #16 -; CHECK-AARCH64: ldr [[REG32:w[0-9]+]], [x[[STRUCTVAL_ADDR]], #12] ; CHECK-ARM64: ldr [[REG32:w[0-9]+]], [sp, #28] store i32 %val0, i32* @var32 ; CHECK: str [[REG32]], [{{x[0-9]+}}, {{#?}}:lo12:var32] @@ -166,9 +160,7 @@ define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3, ; Beware as above: the offset would be different on big-endian ; machines if the first ldr were changed to use s-registers. ; CHECK-ARM64: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp] -; CHECK-AARCH64: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp] ; CHECK-ARM64: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat] -; CHECK-AARCH64: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat] ret void } @@ -196,7 +188,6 @@ define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3, ; CHECK-BE-AARCH64: ldr {{x[0-9]+}}, [sp, #24] ; Important point is that we address sp+24 for second dword -; CHECK-AARCH64: ldr {{x[0-9]+}}, [sp, #16] ; CHECK-ARM64: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] ret void diff --git a/test/CodeGen/AArch64/func-calls.ll b/test/CodeGen/AArch64/func-calls.ll index 807bffe38ad0..8cb5f97e8888 100644 --- a/test/CodeGen/AArch64/func-calls.ll +++ b/test/CodeGen/AArch64/func-calls.ll @@ -1,7 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-BE --check-prefix=CHECK-NOFP %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-neon | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM64-NONEON %s @@ -93,10 +89,6 @@ define void @check_stack_args() { ; Want to check that the final double is passed in registers and ; that varstruct is passed on the stack. Rather dependent on how a ; memcpy gets created, but the following works for now. -; CHECK-AARCH64: mov x[[SPREG:[0-9]+]], sp -; CHECK-AARCH64-DAG: str {{w[0-9]+}}, [x[[SPREG]]] -; CHECK-AARCH64-DAG: str {{w[0-9]+}}, [x[[SPREG]], #12] -; CHECK-AARCH64-DAG: fmov d0, ; CHECK-ARM64-DAG: str {{q[0-9]+}}, [sp] ; CHECK-ARM64-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0 @@ -112,9 +104,6 @@ define void @check_stack_args() { call void @stacked_fpu(float -1.0, double 1.0, float 4.0, float 2.0, float -2.0, float -8.0, float 16.0, float 1.0, float 64.0) -; CHECK-AARCH64: ldr s[[STACKEDREG:[0-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:.LCPI -; CHECK-AARCH64: mov x0, sp -; CHECK-AARCH64: str d[[STACKEDREG]], [x0] ; CHECK-ARM64: movz [[SIXTY_FOUR:w[0-9]+]], #0x4280, lsl #16 ; CHECK-ARM64: str [[SIXTY_FOUR]], [sp] @@ -142,9 +131,6 @@ define void @check_i128_align() { i32 42, i128 %val) ; CHECK: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:var128] ; CHECK: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, #8] -; CHECK-AARCH64: mov x[[SPREG:[0-9]+]], sp -; CHECK-AARCH64: str [[I128HI]], [x[[SPREG]], #24] -; CHECK-AARCH64: str [[I128LO]], [x[[SPREG]], #16] ; CHECK-ARM64: stp [[I128LO]], [[I128HI]], [sp, #16] ; CHECK-ARM64-NONEON: stp [[I128LO]], [[I128HI]], [sp, #16] ; CHECK: bl check_i128_stackalign diff --git a/test/CodeGen/AArch64/global-alignment.ll b/test/CodeGen/AArch64/global-alignment.ll index 36b74e5a57c8..2bf4a2cbce4a 100644 --- a/test/CodeGen/AArch64/global-alignment.ll +++ b/test/CodeGen/AArch64/global-alignment.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s @var32 = global [3 x i32] zeroinitializer diff --git a/test/CodeGen/AArch64/global_merge_1.ll b/test/CodeGen/AArch64/global_merge_1.ll deleted file mode 100644 index e0587d6b9041..000000000000 --- a/test/CodeGen/AArch64/global_merge_1.ll +++ /dev/null @@ -1,17 +0,0 @@ -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s - -@m = internal global i32 0, align 4 -@n = internal global i32 0, align 4 - -define void @f1(i32 %a1, i32 %a2) { -; CHECK-LABEL: f1: -; CHECK: adrp x{{[0-9]+}}, _MergedGlobals -; CHECK-NOT: adrp - store i32 %a1, i32* @m, align 4 - store i32 %a2, i32* @n, align 4 - ret void -} - -; CHECK: .local _MergedGlobals -; CHECK: .comm _MergedGlobals,8,8 - diff --git a/test/CodeGen/AArch64/got-abuse.ll b/test/CodeGen/AArch64/got-abuse.ll index 216bfef7d5c5..c23edaf4360f 100644 --- a/test/CodeGen/AArch64/got-abuse.ll +++ b/test/CodeGen/AArch64/got-abuse.ll @@ -1,5 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj < %s ; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s ; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -filetype=obj -o - %s diff --git a/test/CodeGen/AArch64/i128-align.ll b/test/CodeGen/AArch64/i128-align.ll index fb363a9591b1..a1b4d6f5a446 100644 --- a/test/CodeGen/AArch64/i128-align.ll +++ b/test/CodeGen/AArch64/i128-align.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=arm64-apple-ios7.0 -verify-machineinstrs -o - %s | FileCheck %s %struct = type { i32, i128, i8 } diff --git a/test/CodeGen/AArch64/i128-shift.ll b/test/CodeGen/AArch64/i128-shift.ll deleted file mode 100644 index bfc9e3c09369..000000000000 --- a/test/CodeGen/AArch64/i128-shift.ll +++ /dev/null @@ -1,44 +0,0 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -; arm64 has its own version of this in long-shift.ll. We'll just use that. - -define i128 @test_i128_lsl(i128 %a, i32 %shift) { -; CHECK-LABEL: test_i128_lsl: - - %sh_prom = zext i32 %shift to i128 - %shl = shl i128 %a, %sh_prom - -; CHECK: movz [[SIXTYFOUR:x[0-9]+]], #64 -; CHECK-NEXT: sub [[REVSHAMT:x[0-9]+]], [[SIXTYFOUR]], [[SHAMT_32:w[0-9]+]], uxtw -; CHECK-NEXT: lsr [[TMP1:x[0-9]+]], [[LO:x[0-9]+]], [[REVSHAMT]] -; CHECK: lsl [[TMP2:x[0-9]+]], [[HI:x[0-9]+]], [[SHAMT:x[0-9]+]] -; CHECK-NEXT: orr [[FALSEVAL:x[0-9]+]], [[TMP1]], [[TMP2]] -; CHECK-NEXT: sub [[EXTRASHAMT:x[0-9]+]], [[SHAMT]], #64 -; CHECK-NEXT: lsl [[TMP3:x[0-9]+]], [[LO]], [[EXTRASHAMT]] -; CHECK-NEXT: cmp [[EXTRASHAMT]], #0 -; CHECK-NEXT: csel [[RESULTHI:x[0-9]+]], [[TMP3]], [[FALSEVAL]], ge -; CHECK-NEXT: lsl [[TMP4:x[0-9]+]], [[LO]], [[SHAMT]] -; CHECK-NEXT: csel [[RESULTLO:x[0-9]+]], xzr, [[TMP4]], ge - - ret i128 %shl -} - -define i128 @test_i128_shr(i128 %a, i32 %shift) { -; CHECK-LABEL: test_i128_shr: - - %sh_prom = zext i32 %shift to i128 - %shr = lshr i128 %a, %sh_prom - -; CHECK: movz [[SIXTYFOUR]], #64 -; CHECK-NEXT: sub [[REVSHAMT:x[0-9]+]], [[SIXTYFOUR]], [[SHAMT_32:w[0-9]+]], uxtw -; CHECK-NEXT: lsl [[TMP2:x[0-9]+]], [[HI:x[0-9]+]], [[REVSHAMT]] -; CHECK: lsr [[TMP1:x[0-9]+]], [[LO:x[0-9]+]], [[SHAMT:x[0-9]+]] -; CHECK-NEXT: orr [[FALSEVAL:x[0-9]+]], [[TMP1]], [[TMP2]] -; CHECK-NEXT: sub [[EXTRASHAMT:x[0-9]+]], [[SHAMT]], #64 -; CHECK-NEXT: lsr [[TRUEVAL:x[0-9]+]], [[HI]], [[EXTRASHAMT]] -; CHECK-NEXT: cmp [[EXTRASHAMT]], #0 -; CHECK-NEXT: csel [[RESULTLO:x[0-9]+]], [[TRUEVAL]], [[FALSEVAL]], ge -; CHECK-NEXT: lsr [[TMP3:x[0-9]+]], [[HI]], [[SHAMT]] -; CHECK-NEXT: csel [[RESULTHI:x[0-9]+]], xzr, [[TMP3]], ge - - ret i128 %shr -} diff --git a/test/CodeGen/AArch64/illegal-float-ops.ll b/test/CodeGen/AArch64/illegal-float-ops.ll index 49443d240986..8320f3ab0443 100644 --- a/test/CodeGen/AArch64/illegal-float-ops.ll +++ b/test/CodeGen/AArch64/illegal-float-ops.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s @varfloat = global float 0.0 diff --git a/test/CodeGen/AArch64/init-array.ll b/test/CodeGen/AArch64/init-array.ll index 22b7cc5cf954..d3ed363821c3 100644 --- a/test/CodeGen/AArch64/init-array.ll +++ b/test/CodeGen/AArch64/init-array.ll @@ -1,5 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -use-init-array < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-none-none-eabi -verify-machineinstrs -use-init-array < %s | FileCheck %s ; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -use-init-array -o - %s | FileCheck %s ; RUN: llc -mtriple=arm64-none-none-eabi -verify-machineinstrs -use-init-array -o - %s | FileCheck %s diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badI.ll b/test/CodeGen/AArch64/inline-asm-constraints-badI.ll index 91921d5aa3b7..7ca9ade9cc62 100644 --- a/test/CodeGen/AArch64/inline-asm-constraints-badI.ll +++ b/test/CodeGen/AArch64/inline-asm-constraints-badI.ll @@ -1,4 +1,3 @@ -; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s ; RUN: not llc -mtriple=arm64-none-linux-gnu -o - %s define void @foo() { diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badK.ll b/test/CodeGen/AArch64/inline-asm-constraints-badK.ll index cc4558fa54eb..6ffc05dcbde1 100644 --- a/test/CodeGen/AArch64/inline-asm-constraints-badK.ll +++ b/test/CodeGen/AArch64/inline-asm-constraints-badK.ll @@ -1,4 +1,3 @@ -; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s ; RUN: not llc -mtriple=arm64-apple-ios7.0 -o - %s define void @foo() { diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll b/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll index 820063392488..6bc633814c7e 100644 --- a/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll +++ b/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll @@ -1,4 +1,3 @@ -; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s ; RUN: not llc -mtriple=arm64-none-linux-gnu -o - %s define void @foo() { diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badL.ll b/test/CodeGen/AArch64/inline-asm-constraints-badL.ll index e7b8173f6abe..3c2f60c1f837 100644 --- a/test/CodeGen/AArch64/inline-asm-constraints-badL.ll +++ b/test/CodeGen/AArch64/inline-asm-constraints-badL.ll @@ -1,4 +1,3 @@ -; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s ; RUN: not llc -mtriple=arm64-apple-ios7.0 -o - %s define void @foo() { diff --git a/test/CodeGen/AArch64/inline-asm-constraints.ll b/test/CodeGen/AArch64/inline-asm-constraints.ll deleted file mode 100644 index 365453c5fec4..000000000000 --- a/test/CodeGen/AArch64/inline-asm-constraints.ll +++ /dev/null @@ -1,137 +0,0 @@ -;RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -no-integrated-as < %s | FileCheck %s - -define i64 @test_inline_constraint_r(i64 %base, i32 %offset) { -; CHECK-LABEL: test_inline_constraint_r: - %val = call i64 asm "add $0, $1, $2, sxtw", "=r,r,r"(i64 %base, i32 %offset) -; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw - ret i64 %val -} - -define i16 @test_small_reg(i16 %lhs, i16 %rhs) { -; CHECK-LABEL: test_small_reg: - %val = call i16 asm sideeffect "add $0, $1, $2, sxth", "=r,r,r"(i16 %lhs, i16 %rhs) -; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth - ret i16 %val -} - -define i64 @test_inline_constraint_r_imm(i64 %base, i32 %offset) { -; CHECK-LABEL: test_inline_constraint_r_imm: - %val = call i64 asm "add $0, $1, $2, sxtw", "=r,r,r"(i64 4, i32 12) -; CHECK: movz [[FOUR:x[0-9]+]], #4 -; CHECK: movz [[TWELVE:w[0-9]+]], #12 -; CHECK: add {{x[0-9]+}}, [[FOUR]], [[TWELVE]], sxtw - ret i64 %val -} - -; m is permitted to have a base/offset form. We don't do that -; currently though. -define i32 @test_inline_constraint_m(i32 *%ptr) { -; CHECK-LABEL: test_inline_constraint_m: - %val = call i32 asm "ldr $0, $1", "=r,m"(i32 *%ptr) -; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}] - ret i32 %val -} - -@arr = global [8 x i32] zeroinitializer - -; Q should *never* have base/offset form even if given the chance. -define i32 @test_inline_constraint_Q(i32 *%ptr) { -; CHECK-LABEL: test_inline_constraint_Q: - %val = call i32 asm "ldr $0, $1", "=r,Q"(i32* getelementptr([8 x i32]* @arr, i32 0, i32 1)) -; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}] - ret i32 %val -} - -@dump = global fp128 zeroinitializer - -define void @test_inline_constraint_w(<8 x i8> %vec64, <4 x float> %vec128, half %hlf, float %flt, double %dbl, fp128 %quad) { -; CHECK: test_inline_constraint_w: - call <8 x i8> asm sideeffect "add $0.8b, $1.8b, $1.8b", "=w,w"(<8 x i8> %vec64) - call <8 x i8> asm sideeffect "fadd $0.4s, $1.4s, $1.4s", "=w,w"(<4 x float> %vec128) -; CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -; CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s - - ; Arguably semantically dodgy to output "vN", but it's what GCC does - ; so purely for compatibility we want vector registers to be output. - call float asm sideeffect "fcvt ${0:s}, ${1:h}", "=w,w"(half undef) - call float asm sideeffect "fadd $0.2s, $0.2s, $0.2s", "=w,w"(float %flt) - call double asm sideeffect "fadd $0.2d, $0.2d, $0.2d", "=w,w"(double %dbl) - call fp128 asm sideeffect "fadd $0.2d, $0.2d, $0.2d", "=w,w"(fp128 %quad) -; CHECK: fcvt {{s[0-9]+}}, {{h[0-9]+}} -; CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -; CHECK: fadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -; CHECK: fadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - ret void -} - -define void @test_inline_constraint_I() { -; CHECK-LABEL: test_inline_constraint_I: - call void asm sideeffect "add x0, x0, $0", "I"(i32 0) - call void asm sideeffect "add x0, x0, $0", "I"(i64 4095) -; CHECK: add x0, x0, #0 -; CHECK: add x0, x0, #4095 - - ret void -} - -; Skip J because it's useless - -define void @test_inline_constraint_K() { -; CHECK-LABEL: test_inline_constraint_K: - call void asm sideeffect "and w0, w0, $0", "K"(i32 2863311530) ; = 0xaaaaaaaa - call void asm sideeffect "and w0, w0, $0", "K"(i32 65535) -; CHECK: and w0, w0, #-1431655766 -; CHECK: and w0, w0, #65535 - - ret void -} - -define void @test_inline_constraint_L() { -; CHECK-LABEL: test_inline_constraint_L: - call void asm sideeffect "and x0, x0, $0", "L"(i64 4294967296) ; = 0xaaaaaaaa - call void asm sideeffect "and x0, x0, $0", "L"(i64 65535) -; CHECK: and x0, x0, #4294967296 -; CHECK: and x0, x0, #65535 - - ret void -} - -; Skip M and N because we don't support MOV pseudo-instructions yet. - -@var = global i32 0 - -define void @test_inline_constraint_S() { -; CHECK-LABEL: test_inline_constraint_S: - call void asm sideeffect "adrp x0, $0", "S"(i32* @var) - call void asm sideeffect "adrp x0, ${0:A}", "S"(i32* @var) - call void asm sideeffect "add x0, x0, ${0:L}", "S"(i32* @var) -; CHECK: adrp x0, var -; CHECK: adrp x0, var -; CHECK: add x0, x0, #:lo12:var - ret void -} - -define i32 @test_inline_constraint_S_label(i1 %in) { -; CHECK-LABEL: test_inline_constraint_S_label: - call void asm sideeffect "adr x0, $0", "S"(i8* blockaddress(@test_inline_constraint_S_label, %loc)) -; CHECK: adr x0, .Ltmp{{[0-9]+}} - br i1 %in, label %loc, label %loc2 -loc: - ret i32 0 -loc2: - ret i32 42 -} - -define void @test_inline_constraint_Y() { -; CHECK-LABEL: test_inline_constraint_Y: - call void asm sideeffect "fcmp s0, $0", "Y"(float 0.0) -; CHECK: fcmp s0, #0.0 - ret void -} - -define void @test_inline_constraint_Z() { -; CHECK-LABEL: test_inline_constraint_Z: - call void asm sideeffect "cmp w0, $0", "Z"(i32 0) -; CHECK: cmp w0, #0 - ret void -} diff --git a/test/CodeGen/AArch64/inline-asm-modifiers.ll b/test/CodeGen/AArch64/inline-asm-modifiers.ll deleted file mode 100644 index cb66335b105b..000000000000 --- a/test/CodeGen/AArch64/inline-asm-modifiers.ll +++ /dev/null @@ -1,147 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -no-integrated-as < %s | FileCheck %s - -@var_simple = hidden global i32 0 -@var_got = global i32 0 -@var_tlsgd = thread_local global i32 0 -@var_tlsld = thread_local(localdynamic) global i32 0 -@var_tlsie = thread_local(initialexec) global i32 0 -@var_tlsle = thread_local(localexec) global i32 0 - -define void @test_inline_modifier_L() nounwind { -; CHECK-LABEL: test_inline_modifier_L: - call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_simple) - call void asm sideeffect "ldr x0, [x0, ${0:L}]", "S,~{x0}"(i32* @var_got) - call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_tlsgd) - call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_tlsld) - call void asm sideeffect "ldr x0, [x0, ${0:L}]", "S,~{x0}"(i32* @var_tlsie) - call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_tlsle) -; CHECK: add x0, x0, #:lo12:var_simple -; CHECK: ldr x0, [x0, #:got_lo12:var_got] -; CHECK: add x0, x0, #:tlsdesc_lo12:var_tlsgd -; CHECK: add x0, x0, #:dtprel_lo12:var_tlsld -; CHECK: ldr x0, [x0, #:gottprel_lo12:var_tlsie] -; CHECK: add x0, x0, #:tprel_lo12:var_tlsle - - call void asm sideeffect "add x0, x0, ${0:L}", "Si,~{x0}"(i32 64) - call void asm sideeffect "ldr x0, [x0, ${0:L}]", "Si,~{x0}"(i32 64) -; CHECK: add x0, x0, #64 -; CHECK: ldr x0, [x0, #64] - - ret void -} - -define void @test_inline_modifier_G() nounwind { -; CHECK-LABEL: test_inline_modifier_G: - call void asm sideeffect "add x0, x0, ${0:G}, lsl #12", "S,~{x0}"(i32* @var_tlsld) - call void asm sideeffect "add x0, x0, ${0:G}, lsl #12", "S,~{x0}"(i32* @var_tlsle) -; CHECK: add x0, x0, #:dtprel_hi12:var_tlsld, lsl #12 -; CHECK: add x0, x0, #:tprel_hi12:var_tlsle, lsl #12 - - call void asm sideeffect "add x0, x0, ${0:G}", "Si,~{x0}"(i32 42) -; CHECK: add x0, x0, #42 - ret void -} - -define void @test_inline_modifier_A() nounwind { -; CHECK-LABEL: test_inline_modifier_A: - call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_simple) - call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_got) - call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_tlsgd) - call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_tlsie) - ; N.b. All tprel and dtprel relocs are modified: lo12 or granules. -; CHECK: adrp x0, var_simple -; CHECK: adrp x0, :got:var_got -; CHECK: adrp x0, :tlsdesc:var_tlsgd -; CHECK: adrp x0, :gottprel:var_tlsie - - call void asm sideeffect "adrp x0, ${0:A}", "Si,~{x0}"(i32 40) -; CHECK: adrp x0, #40 - - ret void -} - -define void @test_inline_modifier_wx(i32 %small, i64 %big) nounwind { -; CHECK-LABEL: test_inline_modifier_wx: - call i32 asm sideeffect "add $0, $0, $0", "=r,0"(i32 %small) - call i32 asm sideeffect "add ${0:w}, ${0:w}, ${0:w}", "=r,0"(i32 %small) - call i32 asm sideeffect "add ${0:x}, ${0:x}, ${0:x}", "=r,0"(i32 %small) -; CHECK: //APP -; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} - - call i64 asm sideeffect "add $0, $0, $0", "=r,0"(i64 %big) - call i64 asm sideeffect "add ${0:w}, ${0:w}, ${0:w}", "=r,0"(i64 %big) - call i64 asm sideeffect "add ${0:x}, ${0:x}, ${0:x}", "=r,0"(i64 %big) -; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} -; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} - - call i32 asm sideeffect "add ${0:w}, ${1:w}, ${1:w}", "=r,r"(i32 0) - call i32 asm sideeffect "add ${0:x}, ${1:x}, ${1:x}", "=r,r"(i32 0) -; CHECK: add {{w[0-9]+}}, wzr, wzr -; CHECK: add {{x[0-9]+}}, xzr, xzr - - call i32 asm sideeffect "add ${0:w}, ${0:w}, ${1:w}", "=r,Ir,0"(i32 123, i32 %small) - call i64 asm sideeffect "add ${0:x}, ${0:x}, ${1:x}", "=r,Ir,0"(i32 456, i64 %big) -; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #123 -; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #456 - - ret void -} - -define void @test_inline_modifier_bhsdq() nounwind { -; CHECK-LABEL: test_inline_modifier_bhsdq: - call float asm sideeffect "ldr ${0:b}, [sp]", "=w"() - call float asm sideeffect "ldr ${0:h}, [sp]", "=w"() - call float asm sideeffect "ldr ${0:s}, [sp]", "=w"() - call float asm sideeffect "ldr ${0:d}, [sp]", "=w"() - call float asm sideeffect "ldr ${0:q}, [sp]", "=w"() -; CHECK: ldr b0, [sp] -; CHECK: ldr h0, [sp] -; CHECK: ldr s0, [sp] -; CHECK: ldr d0, [sp] -; CHECK: ldr q0, [sp] - - call double asm sideeffect "ldr ${0:b}, [sp]", "=w"() - call double asm sideeffect "ldr ${0:h}, [sp]", "=w"() - call double asm sideeffect "ldr ${0:s}, [sp]", "=w"() - call double asm sideeffect "ldr ${0:d}, [sp]", "=w"() - call double asm sideeffect "ldr ${0:q}, [sp]", "=w"() -; CHECK: ldr b0, [sp] -; CHECK: ldr h0, [sp] -; CHECK: ldr s0, [sp] -; CHECK: ldr d0, [sp] -; CHECK: ldr q0, [sp] - - call void asm sideeffect "fcmp b0, ${0:b}", "Yw"(float 0.0) - call void asm sideeffect "fcmp h0, ${0:h}", "Yw"(float 0.0) - call void asm sideeffect "fcmp s0, ${0:s}", "Yw"(float 0.0) - call void asm sideeffect "fcmp d0, ${0:d}", "Yw"(float 0.0) - call void asm sideeffect "fcmp q0, ${0:q}", "Yw"(float 0.0) -; CHECK: fcmp b0, #0 -; CHECK: fcmp h0, #0 -; CHECK: fcmp s0, #0 -; CHECK: fcmp d0, #0 -; CHECK: fcmp q0, #0 - - ret void -} - -define void @test_inline_modifier_c() nounwind { -; CHECK-LABEL: test_inline_modifier_c: - call void asm sideeffect "adr x0, ${0:c}", "i"(i32 3) -; CHECK: adr x0, 3 - - ret void -} - -define void @test_inline_modifier_a() nounwind { -; CHECK-LABEL: test_inline_modifier_a: - call void asm sideeffect "prfm pldl1keep, ${0:a}", "r"(i32* @var_simple) -; CHECK: adrp [[VARHI:x[0-9]+]], var_simple -; CHECK: add x[[VARADDR:[0-9]+]], [[VARHI]], #:lo12:var_simple -; CHECK: prfm pldl1keep, [x[[VARADDR]]] - ret void -} - diff --git a/test/CodeGen/AArch64/jump-table.ll b/test/CodeGen/AArch64/jump-table.ll index 87a42ba60a45..a0fcafa45100 100644 --- a/test/CodeGen/AArch64/jump-table.ll +++ b/test/CodeGen/AArch64/jump-table.ll @@ -1,6 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -; RUN: llc -code-model=large -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK-LARGE %s -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic <%s | FileCheck --check-prefix=CHECK-PIC %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck %s ; RUN: llc -code-model=large -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck --check-prefix=CHECK-LARGE %s ; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -o - %s | FileCheck --check-prefix=CHECK-PIC %s diff --git a/test/CodeGen/AArch64/large-consts.ll b/test/CodeGen/AArch64/large-consts.ll index b1f98b9cf9ed..b5f6c32eef4e 100644 --- a/test/CodeGen/AArch64/large-consts.ll +++ b/test/CodeGen/AArch64/large-consts.ll @@ -1,14 +1,9 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -o - %s -code-model=large -show-mc-encoding | FileCheck %s --check-prefix=CHECK-AARCH64 ; RUN: llc -mtriple=arm64-linux-gnu -o - %s -code-model=large -show-mc-encoding | FileCheck %s --check-prefix=CHECK-ARM64 ; Make sure the shift amount is encoded into the instructions by LLVM because ; it's not the linker's job to put it there. define double @foo() { -; CHECK-AARCH64: movz [[CPADDR:x[0-9]+]], #:abs_g3:.LCPI0_0 // encoding: [A,A,0xe0'A',0xd2'A'] -; CHECK-AARCH64: movk [[CPADDR]], #:abs_g2_nc:.LCPI0_0 // encoding: [A,A,0xc0'A',0xf2'A'] -; CHECK-AARCH64: movk [[CPADDR]], #:abs_g1_nc:.LCPI0_0 // encoding: [A,A,0xa0'A',0xf2'A'] -; CHECK-AARCH64: movk [[CPADDR]], #:abs_g0_nc:.LCPI0_0 // encoding: [A,A,0x80'A',0xf2'A'] ; CHECK-ARM64: movz [[CPADDR:x[0-9]+]], #:abs_g3:.LCPI0_0 // encoding: [0bAAA01000,A,0b111AAAAA,0xd2] ; CHECK-ARM64: movk [[CPADDR]], #:abs_g2_nc:.LCPI0_0 // encoding: [0bAAA01000,A,0b110AAAAA,0xf2] diff --git a/test/CodeGen/AArch64/large-frame.ll b/test/CodeGen/AArch64/large-frame.ll deleted file mode 100644 index 79dc6487f1fc..000000000000 --- a/test/CodeGen/AArch64/large-frame.ll +++ /dev/null @@ -1,120 +0,0 @@ -; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s -; arm64 has a separate copy: aarch64-large-frame.ll (codegen was too different). -declare void @use_addr(i8*) - -@addr = global i8* null - -define void @test_bigframe() { -; CHECK-LABEL: test_bigframe: -; CHECK: .cfi_startproc - - %var1 = alloca i8, i32 20000000 - %var2 = alloca i8, i32 16 - %var3 = alloca i8, i32 20000000 -; CHECK: sub sp, sp, #496 -; CHECK: .cfi_def_cfa sp, 496 -; CHECK: str x30, [sp, #488] - ; Total adjust is 39999536 -; CHECK: movz [[SUBCONST:x[0-9]+]], #22576 -; CHECK: movk [[SUBCONST]], #610, lsl #16 -; CHECK: sub sp, sp, [[SUBCONST]] -; CHECK: .cfi_def_cfa sp, 40000032 -; CHECK: .cfi_offset x30, -8 - - ; Total offset is 20000024 -; CHECK: movz [[VAR1OFFSET:x[0-9]+]], #11544 -; CHECK: movk [[VAR1OFFSET]], #305, lsl #16 -; CHECK: add {{x[0-9]+}}, sp, [[VAR1OFFSET]] - store volatile i8* %var1, i8** @addr - - %var1plus2 = getelementptr i8* %var1, i32 2 - store volatile i8* %var1plus2, i8** @addr - -; CHECK: movz [[VAR2OFFSET:x[0-9]+]], #11528 -; CHECK: movk [[VAR2OFFSET]], #305, lsl #16 -; CHECK: add {{x[0-9]+}}, sp, [[VAR2OFFSET]] - store volatile i8* %var2, i8** @addr - - %var2plus2 = getelementptr i8* %var2, i32 2 - store volatile i8* %var2plus2, i8** @addr - - store volatile i8* %var3, i8** @addr - - %var3plus2 = getelementptr i8* %var3, i32 2 - store volatile i8* %var3plus2, i8** @addr - -; CHECK: movz [[ADDCONST:x[0-9]+]], #22576 -; CHECK: movk [[ADDCONST]], #610, lsl #16 -; CHECK: add sp, sp, [[ADDCONST]] -; CHECK: .cfi_endproc - ret void -} - -define void @test_mediumframe() { -; CHECK-LABEL: test_mediumframe: - %var1 = alloca i8, i32 1000000 - %var2 = alloca i8, i32 16 - %var3 = alloca i8, i32 1000000 -; CHECK: sub sp, sp, #496 -; CHECK: str x30, [sp, #488] -; CHECK: sub sp, sp, #688 -; CHECK-NEXT: sub sp, sp, #488, lsl #12 - - store volatile i8* %var1, i8** @addr -; CHECK: add [[VAR1ADDR:x[0-9]+]], sp, #600 -; CHECK: add [[VAR1ADDR]], [[VAR1ADDR]], #244, lsl #12 - - %var1plus2 = getelementptr i8* %var1, i32 2 - store volatile i8* %var1plus2, i8** @addr -; CHECK: add [[VAR1PLUS2:x[0-9]+]], {{x[0-9]+}}, #2 - - store volatile i8* %var2, i8** @addr -; CHECK: add [[VAR2ADDR:x[0-9]+]], sp, #584 -; CHECK: add [[VAR2ADDR]], [[VAR2ADDR]], #244, lsl #12 - - %var2plus2 = getelementptr i8* %var2, i32 2 - store volatile i8* %var2plus2, i8** @addr -; CHECK: add [[VAR2PLUS2:x[0-9]+]], {{x[0-9]+}}, #2 - - store volatile i8* %var3, i8** @addr - - %var3plus2 = getelementptr i8* %var3, i32 2 - store volatile i8* %var3plus2, i8** @addr - -; CHECK: add sp, sp, #688 -; CHECK: add sp, sp, #488, lsl #12 -; CHECK: ldr x30, [sp, #488] -; CHECK: add sp, sp, #496 - ret void -} - - -@bigspace = global [8 x i64] zeroinitializer - -; If temporary registers are allocated for adjustment, they should *not* clobber -; argument registers. -define void @test_tempallocation([8 x i64] %val) nounwind { -; CHECK-LABEL: test_tempallocation: - %var = alloca i8, i32 1000000 -; CHECK: sub sp, sp, - -; Make sure the prologue is reasonably efficient -; CHECK-NEXT: stp x29, x30, [sp, -; CHECK-NEXT: stp x25, x26, [sp, -; CHECK-NEXT: stp x23, x24, [sp, -; CHECK-NEXT: stp x21, x22, [sp, -; CHECK-NEXT: stp x19, x20, [sp, - -; Make sure we don't trash an argument register -; CHECK-NOT: movz {{x[0-7],}} -; CHECK: sub sp, sp, - -; CHECK-NOT: movz {{x[0-7],}} - -; CHECK: bl use_addr - call void @use_addr(i8* %var) - - store [8 x i64] %val, [8 x i64]* @bigspace - ret void -; CHECK: ret -} diff --git a/test/CodeGen/AArch64/ldst-regoffset.ll b/test/CodeGen/AArch64/ldst-regoffset.ll index 2b42d8ec0830..b13634ca706a 100644 --- a/test/CodeGen/AArch64/ldst-regoffset.ll +++ b/test/CodeGen/AArch64/ldst-regoffset.ll @@ -1,5 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s diff --git a/test/CodeGen/AArch64/ldst-unscaledimm.ll b/test/CodeGen/AArch64/ldst-unscaledimm.ll index 36944ba9a8a8..d738cfdaa26a 100644 --- a/test/CodeGen/AArch64/ldst-unscaledimm.ll +++ b/test/CodeGen/AArch64/ldst-unscaledimm.ll @@ -1,5 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s diff --git a/test/CodeGen/AArch64/ldst-unsignedimm.ll b/test/CodeGen/AArch64/ldst-unsignedimm.ll index b3359b34f06a..d6475f904294 100644 --- a/test/CodeGen/AArch64/ldst-unsignedimm.ll +++ b/test/CodeGen/AArch64/ldst-unsignedimm.ll @@ -1,5 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s diff --git a/test/CodeGen/AArch64/lit.local.cfg b/test/CodeGen/AArch64/lit.local.cfg deleted file mode 100644 index c6f83453ac20..000000000000 --- a/test/CodeGen/AArch64/lit.local.cfg +++ /dev/null @@ -1,4 +0,0 @@ -targets = set(config.root.targets_to_build.split()) -if 'AArch64' not in targets or 'ARM64' not in targets: - config.unsupported = True - diff --git a/test/CodeGen/AArch64/literal_pools_float.ll b/test/CodeGen/AArch64/literal_pools_float.ll index 769a68bebc9c..6f9f3fc37722 100644 --- a/test/CodeGen/AArch64/literal_pools_float.ll +++ b/test/CodeGen/AArch64/literal_pools_float.ll @@ -1,7 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -code-model=large | FileCheck --check-prefix=CHECK-LARGE %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -code-model=large -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP-LARGE %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu -mcpu=cyclone | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu -code-model=large -mcpu=cyclone | FileCheck --check-prefix=CHECK-LARGE %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s diff --git a/test/CodeGen/AArch64/literal_pools_int.ll b/test/CodeGen/AArch64/literal_pools_int.ll deleted file mode 100644 index 33a73d58bb85..000000000000 --- a/test/CodeGen/AArch64/literal_pools_int.ll +++ /dev/null @@ -1,58 +0,0 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -code-model=large | FileCheck --check-prefix=CHECK-LARGE %s -; arm64 does not use literal pools for integers so there is nothing to check. - -@var32 = global i32 0 -@var64 = global i64 0 - -define void @foo() { -; CHECK-LABEL: foo: - %val32 = load i32* @var32 - %val64 = load i64* @var64 - - %val32_lit32 = and i32 %val32, 123456785 - store volatile i32 %val32_lit32, i32* @var32 -; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]] -; CHECK: ldr {{w[0-9]+}}, [x[[LITBASE]], {{#?}}:lo12:[[CURLIT]]] - -; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI0_[0-9]+]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]] -; CHECK-LARGE: ldr {{w[0-9]+}}, [x[[LITADDR]]] - - %val64_lit32 = and i64 %val64, 305402420 - store volatile i64 %val64_lit32, i64* @var64 -; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]] -; CHECK: ldr {{w[0-9]+}}, [x[[LITBASE]], {{#?}}:lo12:[[CURLIT]]] - -; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI0_[0-9]+]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]] -; CHECK-LARGE: ldr {{w[0-9]+}}, [x[[LITADDR]]] - - %val64_lit32signed = and i64 %val64, -12345678 - store volatile i64 %val64_lit32signed, i64* @var64 -; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]] -; CHECK: ldrsw {{x[0-9]+}}, [x[[LITBASE]], {{#?}}:lo12:[[CURLIT]]] - -; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI0_[0-9]+]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]] -; CHECK-LARGE: ldrsw {{x[0-9]+}}, [x[[LITADDR]]] - - %val64_lit64 = and i64 %val64, 1234567898765432 - store volatile i64 %val64_lit64, i64* @var64 -; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]] -; CHECK: ldr {{x[0-9]+}}, [x[[LITBASE]], {{#?}}:lo12:[[CURLIT]]] - -; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI0_[0-9]+]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]] -; CHECK-LARGE: ldr {{x[0-9]+}}, [x[[LITADDR]]] - - ret void -} diff --git a/test/CodeGen/AArch64/local_vars.ll b/test/CodeGen/AArch64/local_vars.ll index 1a76d5bcc1fe..4518fa210233 100644 --- a/test/CodeGen/AArch64/local_vars.ll +++ b/test/CodeGen/AArch64/local_vars.ll @@ -1,5 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 -disable-fp-elim | FileCheck -check-prefix CHECK-WITHFP-AARCH64 %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -disable-fp-elim | FileCheck -check-prefix CHECK-WITHFP-ARM64 %s diff --git a/test/CodeGen/AArch64/logical-imm.ll b/test/CodeGen/AArch64/logical-imm.ll index 3ae63ad16f67..a5e4a9956de7 100644 --- a/test/CodeGen/AArch64/logical-imm.ll +++ b/test/CodeGen/AArch64/logical-imm.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s @var32 = global i32 0 diff --git a/test/CodeGen/AArch64/logical_shifted_reg.ll b/test/CodeGen/AArch64/logical_shifted_reg.ll index 49b253bcfde6..608d44fc9d7b 100644 --- a/test/CodeGen/AArch64/logical_shifted_reg.ll +++ b/test/CodeGen/AArch64/logical_shifted_reg.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s @var1_32 = global i32 0 diff --git a/test/CodeGen/AArch64/mature-mc-support.ll b/test/CodeGen/AArch64/mature-mc-support.ll index 3359616fa8d2..2948da9f2007 100644 --- a/test/CodeGen/AArch64/mature-mc-support.ll +++ b/test/CodeGen/AArch64/mature-mc-support.ll @@ -1,10 +1,8 @@ ; Test that inline assembly is parsed by the MC layer when MC support is mature ; (even when the output is assembly). -; RUN: not llc -mtriple=aarch64-pc-linux < %s > /dev/null 2> %t1 ; RUN: FileCheck %s < %t1 -; RUN: not llc -mtriple=aarch64-pc-linux -filetype=obj < %s > /dev/null 2> %t2 ; RUN: FileCheck %s < %t2 ; RUN: not llc -mtriple=arm64-pc-linux < %s > /dev/null 2> %t3 diff --git a/test/CodeGen/AArch64/misched-basic-A53.ll b/test/CodeGen/AArch64/misched-basic-A53.ll deleted file mode 100644 index f80956e60fa2..000000000000 --- a/test/CodeGen/AArch64/misched-basic-A53.ll +++ /dev/null @@ -1,113 +0,0 @@ -; REQUIRES: asserts -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a53 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s -; arm64 now has a separate copy of this test. -; -; The Cortex-A53 machine model will cause the MADD instruction to be scheduled -; much higher than the ADD instructions in order to hide latency. When not -; specifying a subtarget, the MADD will remain near the end of the block. -; -; CHECK: ********** MI Scheduling ********** -; CHECK: main -; CHECK: *** Final schedule for BB#2 *** -; CHECK: SU(13) -; CHECK: MADDwwww -; CHECK: SU(4) -; CHECK: ADDwwi_lsl0_s -; CHECK: ********** INTERVALS ********** -@main.x = private unnamed_addr constant [8 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 4 -@main.y = private unnamed_addr constant [8 x i32] [i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2], align 4 - -; Function Attrs: nounwind -define i32 @main() #0 { -entry: - %retval = alloca i32, align 4 - %x = alloca [8 x i32], align 4 - %y = alloca [8 x i32], align 4 - %i = alloca i32, align 4 - %xx = alloca i32, align 4 - %yy = alloca i32, align 4 - store i32 0, i32* %retval - %0 = bitcast [8 x i32]* %x to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([8 x i32]* @main.x to i8*), i64 32, i32 4, i1 false) - %1 = bitcast [8 x i32]* %y to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([8 x i32]* @main.y to i8*), i64 32, i32 4, i1 false) - store i32 0, i32* %xx, align 4 - store i32 0, i32* %yy, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %2 = load i32* %i, align 4 - %cmp = icmp slt i32 %2, 8 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %3 = load i32* %i, align 4 - %idxprom = sext i32 %3 to i64 - %arrayidx = getelementptr inbounds [8 x i32]* %x, i32 0, i64 %idxprom - %4 = load i32* %arrayidx, align 4 - %add = add nsw i32 %4, 1 - store i32 %add, i32* %xx, align 4 - %5 = load i32* %xx, align 4 - %add1 = add nsw i32 %5, 12 - store i32 %add1, i32* %xx, align 4 - %6 = load i32* %xx, align 4 - %add2 = add nsw i32 %6, 23 - store i32 %add2, i32* %xx, align 4 - %7 = load i32* %xx, align 4 - %add3 = add nsw i32 %7, 34 - store i32 %add3, i32* %xx, align 4 - %8 = load i32* %i, align 4 - %idxprom4 = sext i32 %8 to i64 - %arrayidx5 = getelementptr inbounds [8 x i32]* %y, i32 0, i64 %idxprom4 - %9 = load i32* %arrayidx5, align 4 - %10 = load i32* %yy, align 4 - %mul = mul nsw i32 %10, %9 - store i32 %mul, i32* %yy, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %11 = load i32* %i, align 4 - %inc = add nsw i32 %11, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %12 = load i32* %xx, align 4 - %13 = load i32* %yy, align 4 - %add6 = add nsw i32 %12, %13 - ret i32 %add6 -} - - -; The Cortex-A53 machine model will cause the FDIVvvv_42 to be raised to -; hide latency. Whereas normally there would only be a single FADDvvv_4s -; after it, this test checks to make sure there are more than one. -; -; CHECK: ********** MI Scheduling ********** -; CHECK: neon4xfloat:BB#0 -; CHECK: *** Final schedule for BB#0 *** -; CHECK: FDIVvvv_4S -; CHECK: FADDvvv_4S -; CHECK: FADDvvv_4S -; CHECK: ********** INTERVALS ********** -define <4 x float> @neon4xfloat(<4 x float> %A, <4 x float> %B) { - %tmp1 = fadd <4 x float> %A, %B; - %tmp2 = fadd <4 x float> %A, %tmp1; - %tmp3 = fadd <4 x float> %A, %tmp2; - %tmp4 = fadd <4 x float> %A, %tmp3; - %tmp5 = fadd <4 x float> %A, %tmp4; - %tmp6 = fadd <4 x float> %A, %tmp5; - %tmp7 = fadd <4 x float> %A, %tmp6; - %tmp8 = fadd <4 x float> %A, %tmp7; - %tmp9 = fdiv <4 x float> %A, %B; - %tmp10 = fadd <4 x float> %tmp8, %tmp9; - - ret <4 x float> %tmp10 -} - -; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1 - -attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind } diff --git a/test/CodeGen/AArch64/movw-consts.ll b/test/CodeGen/AArch64/movw-consts.ll index 876eb52df62b..6fe000974d5f 100644 --- a/test/CodeGen/AArch64/movw-consts.ll +++ b/test/CodeGen/AArch64/movw-consts.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs -O0 < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 define i64 @test0() { @@ -10,49 +9,42 @@ define i64 @test0() { define i64 @test1() { ; CHECK-LABEL: test1: -; CHECK-AARCH64: movz x0, #1 ; CHECK-ARM64: orr w0, wzr, #0x1 ret i64 1 } define i64 @test2() { ; CHECK-LABEL: test2: -; CHECK-AARCH64: movz x0, #65535 ; CHECK-ARM64: orr w0, wzr, #0xffff ret i64 65535 } define i64 @test3() { ; CHECK-LABEL: test3: -; CHECK-AARCH64: movz x0, #1, lsl #16 ; CHECK-ARM64: orr w0, wzr, #0x10000 ret i64 65536 } define i64 @test4() { ; CHECK-LABEL: test4: -; CHECK-AARCH64: movz x0, #65535, lsl #16 ; CHECK-ARM64: orr w0, wzr, #0xffff0000 ret i64 4294901760 } define i64 @test5() { ; CHECK-LABEL: test5: -; CHECK-AARCH64: movz x0, #1, lsl #32 ; CHECK-ARM64: orr x0, xzr, #0x100000000 ret i64 4294967296 } define i64 @test6() { ; CHECK-LABEL: test6: -; CHECK-AARCH64: movz x0, #65535, lsl #32 ; CHECK-ARM64: orr x0, xzr, #0xffff00000000 ret i64 281470681743360 } define i64 @test7() { ; CHECK-LABEL: test7: -; CHECK-AARCH64: movz x0, #1, lsl #48 ; CHECK-ARM64: orr x0, xzr, #0x1000000000000 ret i64 281474976710656 } @@ -83,7 +75,6 @@ define i64 @test10() { define void @test11() { ; CHECK-LABEL: test11: -; CHECK-AARCH64: mov {{w[0-9]+}}, wzr ; CHECK-ARM64: str wzr store i32 0, i32* @var32 ret void @@ -91,7 +82,6 @@ define void @test11() { define void @test12() { ; CHECK-LABEL: test12: -; CHECK-AARCH64: movz {{w[0-9]+}}, #1 ; CHECK-ARM64: orr {{w[0-9]+}}, wzr, #0x1 store i32 1, i32* @var32 ret void @@ -99,7 +89,6 @@ define void @test12() { define void @test13() { ; CHECK-LABEL: test13: -; CHECK-AARCH64: movz {{w[0-9]+}}, #65535 ; CHECK-ARM64: orr {{w[0-9]+}}, wzr, #0xffff store i32 65535, i32* @var32 ret void @@ -107,7 +96,6 @@ define void @test13() { define void @test14() { ; CHECK-LABEL: test14: -; CHECK-AARCH64: movz {{w[0-9]+}}, #1, lsl #16 ; CHECK-ARM64: orr {{w[0-9]+}}, wzr, #0x10000 store i32 65536, i32* @var32 ret void @@ -115,7 +103,6 @@ define void @test14() { define void @test15() { ; CHECK-LABEL: test15: -; CHECK-AARCH64: movz {{w[0-9]+}}, #65535, lsl #16 ; CHECK-ARM64: orr {{w[0-9]+}}, wzr, #0xffff0000 store i32 4294901760, i32* @var32 ret void @@ -132,7 +119,6 @@ define i64 @test17() { ; CHECK-LABEL: test17: ; Mustn't MOVN w0 here. -; CHECK-AARCH64: movn x0, #2 ; CHECK-ARM64: orr x0, xzr, #0xfffffffffffffffd ret i64 -3 } diff --git a/test/CodeGen/AArch64/movw-shift-encoding.ll b/test/CodeGen/AArch64/movw-shift-encoding.ll index 8a0da4cb9324..2fe9dd4516e5 100644 --- a/test/CodeGen/AArch64/movw-shift-encoding.ll +++ b/test/CodeGen/AArch64/movw-shift-encoding.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-linux-gnu < %s -show-mc-encoding -code-model=large | FileCheck %s --check-prefix=CHECK-AARCH64 ; RUN: llc -mtriple=arm64-linux-gnu < %s -show-mc-encoding -code-model=large | FileCheck %s --check-prefix=CHECK-ARM64 @var = global i32 0 @@ -8,10 +7,6 @@ define i32* @get_var() { ret i32* @var -; CHECK-AARCH64: movz x0, #:abs_g3:var // encoding: [A,A,0xe0'A',0xd2'A'] -; CHECK-AARCH64: movk x0, #:abs_g2_nc:var // encoding: [A,A,0xc0'A',0xf2'A'] -; CHECK-AARCH64: movk x0, #:abs_g1_nc:var // encoding: [A,A,0xa0'A',0xf2'A'] -; CHECK-AARCH64: movk x0, #:abs_g0_nc:var // encoding: [A,A,0x80'A',0xf2'A'] ; CHECK-ARM64: movz x0, #:abs_g3:var // encoding: [0bAAA00000,A,0b111AAAAA,0xd2] ; CHECK-ARM64: movk x0, #:abs_g2_nc:var // encoding: [0bAAA00000,A,0b110AAAAA,0xf2] diff --git a/test/CodeGen/AArch64/mul-lohi.ll b/test/CodeGen/AArch64/mul-lohi.ll index 3b027f2d4f10..0689fbdcc078 100644 --- a/test/CodeGen/AArch64/mul-lohi.ll +++ b/test/CodeGen/AArch64/mul-lohi.ll @@ -1,5 +1,3 @@ -; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s -; RUN: llc -mtriple=aarch64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s ; RUN: llc -mtriple=arm64-apple-ios7.0 %s -o - | FileCheck %s ; RUN: llc -mtriple=arm64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s diff --git a/test/CodeGen/AArch64/named-reg-alloc.ll b/test/CodeGen/AArch64/named-reg-alloc.ll deleted file mode 100644 index 31d72f6be0ef..000000000000 --- a/test/CodeGen/AArch64/named-reg-alloc.ll +++ /dev/null @@ -1,14 +0,0 @@ -; RUN: not llc < %s -mtriple=aarch64-linux-gnueabi 2>&1 | FileCheck %s -; arm64 has separate copy of this test - -define i32 @get_stack() nounwind { -entry: -; FIXME: Include an allocatable-specific error message -; CHECK: Invalid register name global variable - %sp = call i32 @llvm.read_register.i32(metadata !0) - ret i32 %sp -} - -declare i32 @llvm.read_register.i32(metadata) nounwind - -!0 = metadata !{metadata !"x5\00"} diff --git a/test/CodeGen/AArch64/named-reg-notareg.ll b/test/CodeGen/AArch64/named-reg-notareg.ll deleted file mode 100644 index 66d013137cf4..000000000000 --- a/test/CodeGen/AArch64/named-reg-notareg.ll +++ /dev/null @@ -1,13 +0,0 @@ -; RUN: not llc < %s -mtriple=aarch64-linux-gnueabi 2>&1 | FileCheck %s -; arm64 has separate copy of this test - -define i32 @get_stack() nounwind { -entry: -; CHECK: Invalid register name global variable - %sp = call i32 @llvm.read_register.i32(metadata !0) - ret i32 %sp -} - -declare i32 @llvm.read_register.i32(metadata) nounwind - -!0 = metadata !{metadata !"notareg\00"} diff --git a/test/CodeGen/AArch64/neon-2velem-high.ll b/test/CodeGen/AArch64/neon-2velem-high.ll deleted file mode 100644 index ebdb5b7132d8..000000000000 --- a/test/CodeGen/AArch64/neon-2velem-high.ll +++ /dev/null @@ -1,331 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s -; arm64 has copied test in its directory due to differing intrinsics. -declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) - -declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) - -declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>) - -declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>) - -declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>) - -declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) - -declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) - -declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) - -declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>) - -declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) - -declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>) - -declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) - -define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) { -; CHECK: test_vmull_high_n_s16: -; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -entry: - %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 - %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 - %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 - %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 - %vmull15.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) - ret <4 x i32> %vmull15.i.i -} - -define <2 x i64> @test_vmull_high_n_s32(<4 x i32> %a, i32 %b) { -; CHECK: test_vmull_high_n_s32: -; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -entry: - %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 - %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 - %vmull9.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) - ret <2 x i64> %vmull9.i.i -} - -define <4 x i32> @test_vmull_high_n_u16(<8 x i16> %a, i16 %b) { -; CHECK: test_vmull_high_n_u16: -; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -entry: - %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 - %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 - %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 - %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 - %vmull15.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) - ret <4 x i32> %vmull15.i.i -} - -define <2 x i64> @test_vmull_high_n_u32(<4 x i32> %a, i32 %b) { -; CHECK: test_vmull_high_n_u32: -; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -entry: - %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 - %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 - %vmull9.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) - ret <2 x i64> %vmull9.i.i -} - -define <4 x i32> @test_vqdmull_high_n_s16(<8 x i16> %a, i16 %b) { -; CHECK: test_vqdmull_high_n_s16: -; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -entry: - %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 - %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 - %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 - %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 - %vqdmull15.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) - ret <4 x i32> %vqdmull15.i.i -} - -define <2 x i64> @test_vqdmull_high_n_s32(<4 x i32> %a, i32 %b) { -; CHECK: test_vqdmull_high_n_s32: -; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -entry: - %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 - %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 - %vqdmull9.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) - ret <2 x i64> %vqdmull9.i.i -} - -define <4 x i32> @test_vmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { -; CHECK: test_vmlal_high_n_s16: -; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}] -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 - %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 - %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 - %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) - %add.i.i = add <4 x i32> %vmull2.i.i.i, %a - ret <4 x i32> %add.i.i -} - -define <2 x i64> @test_vmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { -; CHECK: test_vmlal_high_n_s32: -; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 - %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) - %add.i.i = add <2 x i64> %vmull2.i.i.i, %a - ret <2 x i64> %add.i.i -} - -define <4 x i32> @test_vmlal_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) { -; CHECK: test_vmlal_high_n_u16: -; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}] -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 - %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 - %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 - %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) - %add.i.i = add <4 x i32> %vmull2.i.i.i, %a - ret <4 x i32> %add.i.i -} - -define <2 x i64> @test_vmlal_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) { -; CHECK: test_vmlal_high_n_u32: -; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 - %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) - %add.i.i = add <2 x i64> %vmull2.i.i.i, %a - ret <2 x i64> %add.i.i -} - -define <4 x i32> @test_vqdmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { -; CHECK: test_vqdmlal_high_n_s16: -; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}] -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 - %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 - %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 - %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vqdmlal15.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) - %vqdmlal17.i.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i) - ret <4 x i32> %vqdmlal17.i.i -} - -define <2 x i64> @test_vqdmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { -; CHECK: test_vqdmlal_high_n_s32: -; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 - %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vqdmlal9.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) - %vqdmlal11.i.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i) - ret <2 x i64> %vqdmlal11.i.i -} - -define <4 x i32> @test_vmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { -; CHECK: test_vmlsl_high_n_s16: -; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}] -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 - %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 - %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 - %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) - %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i - ret <4 x i32> %sub.i.i -} - -define <2 x i64> @test_vmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { -; CHECK: test_vmlsl_high_n_s32: -; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 - %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) - %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i - ret <2 x i64> %sub.i.i -} - -define <4 x i32> @test_vmlsl_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) { -; CHECK: test_vmlsl_high_n_u16: -; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}] -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 - %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 - %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 - %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) - %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i - ret <4 x i32> %sub.i.i -} - -define <2 x i64> @test_vmlsl_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) { -; CHECK: test_vmlsl_high_n_u32: -; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 - %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) - %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i - ret <2 x i64> %sub.i.i -} - -define <4 x i32> @test_vqdmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { -; CHECK: test_vqdmlsl_high_n_s16: -; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}] -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 - %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 - %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 - %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vqdmlsl15.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) - %vqdmlsl17.i.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i) - ret <4 x i32> %vqdmlsl17.i.i -} - -define <2 x i64> @test_vqdmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { -; CHECK: test_vqdmlsl_high_n_s32: -; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 - %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vqdmlsl9.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) - %vqdmlsl11.i.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i) - ret <2 x i64> %vqdmlsl11.i.i -} - -define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) { -; CHECK: test_vmul_n_f32: -; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -entry: - %vecinit.i = insertelement <2 x float> undef, float %b, i32 0 - %vecinit1.i = insertelement <2 x float> %vecinit.i, float %b, i32 1 - %mul.i = fmul <2 x float> %vecinit1.i, %a - ret <2 x float> %mul.i -} - -define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) { -; CHECK: test_vmulq_n_f32: -; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -entry: - %vecinit.i = insertelement <4 x float> undef, float %b, i32 0 - %vecinit1.i = insertelement <4 x float> %vecinit.i, float %b, i32 1 - %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %b, i32 2 - %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %b, i32 3 - %mul.i = fmul <4 x float> %vecinit3.i, %a - ret <4 x float> %mul.i -} - -define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) { -; CHECK: test_vmulq_n_f64: -; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -entry: - %vecinit.i = insertelement <2 x double> undef, double %b, i32 0 - %vecinit1.i = insertelement <2 x double> %vecinit.i, double %b, i32 1 - %mul.i = fmul <2 x double> %vecinit1.i, %a - ret <2 x double> %mul.i -} - -define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) { -; CHECK: test_vfma_n_f32: -; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}] -entry: - %vecinit.i = insertelement <2 x float> undef, float %n, i32 0 - %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1 - %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %vecinit1.i, <2 x float> %a) - ret <2 x float> %0 -} - -define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) { -; CHECK: test_vfmaq_n_f32: -; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] -entry: - %vecinit.i = insertelement <4 x float> undef, float %n, i32 0 - %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1 - %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2 - %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3 - %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %vecinit3.i, <4 x float> %a) - ret <4 x float> %0 -} - -define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) { -; CHECK: test_vfms_n_f32: -; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}] -entry: - %vecinit.i = insertelement <2 x float> undef, float %n, i32 0 - %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1 - %0 = fsub <2 x float> , %b - %1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %vecinit1.i, <2 x float> %a) - ret <2 x float> %1 -} - -define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) { -; CHECK: test_vfmsq_n_f32: -; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] -entry: - %vecinit.i = insertelement <4 x float> undef, float %n, i32 0 - %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1 - %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2 - %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3 - %0 = fsub <4 x float> , %b - %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %vecinit3.i, <4 x float> %a) - ret <4 x float> %1 -} diff --git a/test/CodeGen/AArch64/neon-2velem.ll b/test/CodeGen/AArch64/neon-2velem.ll deleted file mode 100644 index b9d0e84f16c2..000000000000 --- a/test/CodeGen/AArch64/neon-2velem.ll +++ /dev/null @@ -1,2854 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s -; arm64 has copied test in its directory due to differing intrinsics. - -declare <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double>, <2 x double>) - -declare <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float>, <4 x float>) - -declare <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float>, <2 x float>) - -declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) - -declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) - -declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>) - -declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>) - -declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>) - -declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>) - -declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>) - -declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>) - -declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>) - -declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>) - -declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>) - -declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) - -declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) - -declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) - -declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>) - -declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) - -declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>) - -declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmla_lane_s16: -; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %mul = mul <4 x i16> %shuffle, %b - %add = add <4 x i16> %mul, %a - ret <4 x i16> %add -} - -define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlaq_lane_s16: -; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> - %mul = mul <8 x i16> %shuffle, %b - %add = add <8 x i16> %mul, %a - ret <8 x i16> %add -} - -define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmla_lane_s32: -; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %mul = mul <2 x i32> %shuffle, %b - %add = add <2 x i32> %mul, %a - ret <2 x i32> %add -} - -define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlaq_lane_s32: -; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> - %mul = mul <4 x i32> %shuffle, %b - %add = add <4 x i32> %mul, %a - ret <4 x i32> %add -} - -define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmla_laneq_s16: -; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %mul = mul <4 x i16> %shuffle, %b - %add = add <4 x i16> %mul, %a - ret <4 x i16> %add -} - -define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlaq_laneq_s16: -; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> - %mul = mul <8 x i16> %shuffle, %b - %add = add <8 x i16> %mul, %a - ret <8 x i16> %add -} - -define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmla_laneq_s32: -; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %mul = mul <2 x i32> %shuffle, %b - %add = add <2 x i32> %mul, %a - ret <2 x i32> %add -} - -define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlaq_laneq_s32: -; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> - %mul = mul <4 x i32> %shuffle, %b - %add = add <4 x i32> %mul, %a - ret <4 x i32> %add -} - -define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmls_lane_s16: -; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %mul = mul <4 x i16> %shuffle, %b - %sub = sub <4 x i16> %a, %mul - ret <4 x i16> %sub -} - -define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlsq_lane_s16: -; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> - %mul = mul <8 x i16> %shuffle, %b - %sub = sub <8 x i16> %a, %mul - ret <8 x i16> %sub -} - -define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmls_lane_s32: -; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %mul = mul <2 x i32> %shuffle, %b - %sub = sub <2 x i32> %a, %mul - ret <2 x i32> %sub -} - -define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlsq_lane_s32: -; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> - %mul = mul <4 x i32> %shuffle, %b - %sub = sub <4 x i32> %a, %mul - ret <4 x i32> %sub -} - -define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmls_laneq_s16: -; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %mul = mul <4 x i16> %shuffle, %b - %sub = sub <4 x i16> %a, %mul - ret <4 x i16> %sub -} - -define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlsq_laneq_s16: -; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> - %mul = mul <8 x i16> %shuffle, %b - %sub = sub <8 x i16> %a, %mul - ret <8 x i16> %sub -} - -define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmls_laneq_s32: -; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %mul = mul <2 x i32> %shuffle, %b - %sub = sub <2 x i32> %a, %mul - ret <2 x i32> %sub -} - -define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlsq_laneq_s32: -; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> - %mul = mul <4 x i32> %shuffle, %b - %sub = sub <4 x i32> %a, %mul - ret <4 x i32> %sub -} - -define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmul_lane_s16: -; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %mul = mul <4 x i16> %shuffle, %a - ret <4 x i16> %mul -} - -define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmulq_lane_s16: -; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> - %mul = mul <8 x i16> %shuffle, %a - ret <8 x i16> %mul -} - -define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmul_lane_s32: -; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %mul = mul <2 x i32> %shuffle, %a - ret <2 x i32> %mul -} - -define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmulq_lane_s32: -; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> - %mul = mul <4 x i32> %shuffle, %a - ret <4 x i32> %mul -} - -define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmul_lane_u16: -; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %mul = mul <4 x i16> %shuffle, %a - ret <4 x i16> %mul -} - -define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmulq_lane_u16: -; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> - %mul = mul <8 x i16> %shuffle, %a - ret <8 x i16> %mul -} - -define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmul_lane_u32: -; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %mul = mul <2 x i32> %shuffle, %a - ret <2 x i32> %mul -} - -define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmulq_lane_u32: -; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> - %mul = mul <4 x i32> %shuffle, %a - ret <4 x i32> %mul -} - -define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmul_laneq_s16: -; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %mul = mul <4 x i16> %shuffle, %a - ret <4 x i16> %mul -} - -define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmulq_laneq_s16: -; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> - %mul = mul <8 x i16> %shuffle, %a - ret <8 x i16> %mul -} - -define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmul_laneq_s32: -; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %mul = mul <2 x i32> %shuffle, %a - ret <2 x i32> %mul -} - -define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmulq_laneq_s32: -; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> - %mul = mul <4 x i32> %shuffle, %a - ret <4 x i32> %mul -} - -define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmul_laneq_u16: -; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %mul = mul <4 x i16> %shuffle, %a - ret <4 x i16> %mul -} - -define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmulq_laneq_u16: -; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> - %mul = mul <8 x i16> %shuffle, %a - ret <8 x i16> %mul -} - -define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmul_laneq_u32: -; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %mul = mul <2 x i32> %shuffle, %a - ret <2 x i32> %mul -} - -define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmulq_laneq_u32: -; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> - %mul = mul <4 x i32> %shuffle, %a - ret <4 x i32> %mul -} - -define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { -; CHECK: test_vfma_lane_f32: -; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> - %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) - ret <2 x float> %0 -} - -declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) - -define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) { -; CHECK: test_vfmaq_lane_f32: -; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> - %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) - ret <4 x float> %0 -} - -declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) - -define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) { -; CHECK: test_vfma_laneq_f32: -; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> - %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) - ret <2 x float> %0 -} - -define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) { -; CHECK: test_vfmaq_laneq_f32: -; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> - %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) - ret <4 x float> %0 -} - -define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { -; CHECK: test_vfms_lane_f32: -; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %sub = fsub <2 x float> , %v - %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> - %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) - ret <2 x float> %0 -} - -define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) { -; CHECK: test_vfmsq_lane_f32: -; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %sub = fsub <2 x float> , %v - %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> - %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) - ret <4 x float> %0 -} - -define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) { -; CHECK: test_vfms_laneq_f32: -; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %sub = fsub <4 x float> , %v - %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> - %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) - ret <2 x float> %0 -} - -define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) { -; CHECK: test_vfmsq_laneq_f32: -; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %sub = fsub <4 x float> , %v - %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> - %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) - ret <4 x float> %0 -} - -define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) { -; CHECK: test_vfmaq_lane_f64: -; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -entry: - %lane = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer - %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) - ret <2 x double> %0 -} - -declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) - -define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) { -; CHECK: test_vfmaq_laneq_f64: -; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] -; CHECK-NEXT: ret -entry: - %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> - %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) - ret <2 x double> %0 -} - -define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) { -; CHECK: test_vfmsq_lane_f64: -; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -entry: - %sub = fsub <1 x double> , %v - %lane = shufflevector <1 x double> %sub, <1 x double> undef, <2 x i32> zeroinitializer - %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) - ret <2 x double> %0 -} - -define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) { -; CHECK: test_vfmsq_laneq_f64: -; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] -; CHECK-NEXT: ret -entry: - %sub = fsub <2 x double> , %v - %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> - %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) - ret <2 x double> %0 -} - -define float @test_vfmas_laneq_f32(float %a, float %b, <4 x float> %v) { -; CHECK-LABEL: test_vfmas_laneq_f32 -; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %extract = extractelement <4 x float> %v, i32 3 - %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) - ret float %0 -} - -declare float @llvm.fma.f32(float, float, float) - -define double @test_vfmsd_lane_f64(double %a, double %b, <1 x double> %v) { -; CHECK-LABEL: test_vfmsd_lane_f64 -; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -entry: - %extract.rhs = extractelement <1 x double> %v, i32 0 - %extract = fsub double -0.000000e+00, %extract.rhs - %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a) - ret double %0 -} - -declare double @llvm.fma.f64(double, double, double) - -define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) { -; CHECK: test_vfmss_laneq_f32 -; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %extract.rhs = extractelement <4 x float> %v, i32 3 - %extract = fsub float -0.000000e+00, %extract.rhs - %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) - ret float %0 -} - -define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) { -; CHECK-LABEL: test_vfmsd_laneq_f64 -; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] -; CHECK-NEXT: ret -entry: - %extract.rhs = extractelement <2 x double> %v, i32 1 - %extract = fsub double -0.000000e+00, %extract.rhs - %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a) - ret double %0 -} - -define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlal_lane_s16: -; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlal_lane_s32: -; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlal_laneq_s16: -; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlal_laneq_s32: -; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlal_high_lane_s16: -; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlal_high_lane_s32: -; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlal_high_laneq_s16: -; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlal_high_laneq_s32: -; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlsl_lane_s16: -; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlsl_lane_s32: -; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlsl_laneq_s16: -; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlsl_laneq_s32: -; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlsl_high_lane_s16: -; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlsl_high_lane_s32: -; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlsl_high_laneq_s16: -; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlsl_high_laneq_s32: -; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlal_lane_u16: -; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlal_lane_u32: -; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlal_laneq_u16: -; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlal_laneq_u32: -; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlal_high_lane_u16: -; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlal_high_lane_u32: -; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlal_high_laneq_u16: -; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlal_high_laneq_u32: -; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlsl_lane_u16: -; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlsl_lane_u32: -; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlsl_laneq_u16: -; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlsl_laneq_u32: -; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlsl_high_lane_u16: -; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlsl_high_lane_u32: -; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlsl_high_laneq_u16: -; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlsl_high_laneq_u32: -; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmull_lane_s16: -; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmull_lane_s32: -; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmull_lane_u16: -; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmull_lane_u32: -; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmull_high_lane_s16: -; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmull_high_lane_s32: -; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmull_high_lane_u16: -; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmull_high_lane_u32: -; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmull_laneq_s16: -; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmull_laneq_s32: -; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmull_laneq_u16: -; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmull_laneq_u32: -; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmull_high_laneq_s16: -; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmull_high_laneq_s32: -; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmull_high_laneq_u16: -; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmull_high_laneq_u32: -; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vqdmlal_lane_s16: -; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) - ret <4 x i32> %vqdmlal4.i -} - -define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vqdmlal_lane_s32: -; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) - ret <2 x i64> %vqdmlal4.i -} - -define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vqdmlal_high_lane_s16: -; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) - ret <4 x i32> %vqdmlal4.i -} - -define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vqdmlal_high_lane_s32: -; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) - ret <2 x i64> %vqdmlal4.i -} - -define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vqdmlsl_lane_s16: -; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) - ret <4 x i32> %vqdmlsl4.i -} - -define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vqdmlsl_lane_s32: -; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) - ret <2 x i64> %vqdmlsl4.i -} - -define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vqdmlsl_high_lane_s16: -; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) - ret <4 x i32> %vqdmlsl4.i -} - -define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vqdmlsl_high_lane_s32: -; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) - ret <2 x i64> %vqdmlsl4.i -} - -define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vqdmull_lane_s16: -; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i32> %vqdmull2.i -} - -define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vqdmull_lane_s32: -; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i64> %vqdmull2.i -} - -define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { -; CHECK: test_vqdmull_laneq_s16: -; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i32> %vqdmull2.i -} - -define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { -; CHECK: test_vqdmull_laneq_s32: -; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i64> %vqdmull2.i -} - -define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vqdmull_high_lane_s16: -; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - ret <4 x i32> %vqdmull2.i -} - -define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vqdmull_high_lane_s32: -; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - ret <2 x i64> %vqdmull2.i -} - -define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { -; CHECK: test_vqdmull_high_laneq_s16: -; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - ret <4 x i32> %vqdmull2.i -} - -define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { -; CHECK: test_vqdmull_high_laneq_s32: -; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - ret <2 x i64> %vqdmull2.i -} - -define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vqdmulh_lane_s16: -; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vqdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i16> %vqdmulh2.i -} - -define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vqdmulhq_lane_s16: -; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> - %vqdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) - ret <8 x i16> %vqdmulh2.i -} - -define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vqdmulh_lane_s32: -; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vqdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i32> %vqdmulh2.i -} - -define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vqdmulhq_lane_s32: -; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> - %vqdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) - ret <4 x i32> %vqdmulh2.i -} - -define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vqrdmulh_lane_s16: -; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vqrdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i16> %vqrdmulh2.i -} - -define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vqrdmulhq_lane_s16: -; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> - %vqrdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) - ret <8 x i16> %vqrdmulh2.i -} - -define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vqrdmulh_lane_s32: -; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vqrdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i32> %vqrdmulh2.i -} - -define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vqrdmulhq_lane_s32: -; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> - %vqrdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) - ret <4 x i32> %vqrdmulh2.i -} - -define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) { -; CHECK: test_vmul_lane_f32: -; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> - %mul = fmul <2 x float> %shuffle, %a - ret <2 x float> %mul -} - -define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) { -; CHECK: test_vmul_lane_f64: -; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -entry: - %0 = bitcast <1 x double> %a to <8 x i8> - %1 = bitcast <8 x i8> %0 to double - %extract = extractelement <1 x double> %v, i32 0 - %2 = fmul double %1, %extract - %3 = insertelement <1 x double> undef, double %2, i32 0 - ret <1 x double> %3 -} - -define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) { -; CHECK: test_vmulq_lane_f32: -; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> - %mul = fmul <4 x float> %shuffle, %a - ret <4 x float> %mul -} - -define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) { -; CHECK: test_vmulq_lane_f64: -; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer - %mul = fmul <2 x double> %shuffle, %a - ret <2 x double> %mul -} - -define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) { -; CHECK: test_vmul_laneq_f32: -; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> - %mul = fmul <2 x float> %shuffle, %a - ret <2 x float> %mul -} - -define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) { -; CHECK: test_vmul_laneq_f64: -; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] -; CHECK-NEXT: ret -entry: - %0 = bitcast <1 x double> %a to <8 x i8> - %1 = bitcast <8 x i8> %0 to double - %extract = extractelement <2 x double> %v, i32 1 - %2 = fmul double %1, %extract - %3 = insertelement <1 x double> undef, double %2, i32 0 - ret <1 x double> %3 -} - -define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) { -; CHECK: test_vmulq_laneq_f32: -; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> - %mul = fmul <4 x float> %shuffle, %a - ret <4 x float> %mul -} - -define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) { -; CHECK: test_vmulq_laneq_f64: -; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> - %mul = fmul <2 x double> %shuffle, %a - ret <2 x double> %mul -} - -define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) { -; CHECK: test_vmulx_lane_f32: -; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> - %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) - ret <2 x float> %vmulx2.i -} - -define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) { -; CHECK: test_vmulxq_lane_f32: -; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> - %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) - ret <4 x float> %vmulx2.i -} - -define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) { -; CHECK: test_vmulxq_lane_f64: -; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer - %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) - ret <2 x double> %vmulx2.i -} - -define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) { -; CHECK: test_vmulx_laneq_f32: -; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> - %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) - ret <2 x float> %vmulx2.i -} - -define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) { -; CHECK: test_vmulxq_laneq_f32: -; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> - %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) - ret <4 x float> %vmulx2.i -} - -define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) { -; CHECK: test_vmulxq_laneq_f64: -; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> - %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) - ret <2 x double> %vmulx2.i -} - -define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmla_lane_s16_0: -; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i16> %shuffle, %b - %add = add <4 x i16> %mul, %a - ret <4 x i16> %add -} - -define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlaq_lane_s16_0: -; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer - %mul = mul <8 x i16> %shuffle, %b - %add = add <8 x i16> %mul, %a - ret <8 x i16> %add -} - -define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmla_lane_s32_0: -; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %mul = mul <2 x i32> %shuffle, %b - %add = add <2 x i32> %mul, %a - ret <2 x i32> %add -} - -define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlaq_lane_s32_0: -; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i32> %shuffle, %b - %add = add <4 x i32> %mul, %a - ret <4 x i32> %add -} - -define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmla_laneq_s16_0: -; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i16> %shuffle, %b - %add = add <4 x i16> %mul, %a - ret <4 x i16> %add -} - -define <8 x i16> @test_vmlaq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlaq_laneq_s16_0: -; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer - %mul = mul <8 x i16> %shuffle, %b - %add = add <8 x i16> %mul, %a - ret <8 x i16> %add -} - -define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmla_laneq_s32_0: -; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %mul = mul <2 x i32> %shuffle, %b - %add = add <2 x i32> %mul, %a - ret <2 x i32> %add -} - -define <4 x i32> @test_vmlaq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlaq_laneq_s32_0: -; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i32> %shuffle, %b - %add = add <4 x i32> %mul, %a - ret <4 x i32> %add -} - -define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmls_lane_s16_0: -; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i16> %shuffle, %b - %sub = sub <4 x i16> %a, %mul - ret <4 x i16> %sub -} - -define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlsq_lane_s16_0: -; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer - %mul = mul <8 x i16> %shuffle, %b - %sub = sub <8 x i16> %a, %mul - ret <8 x i16> %sub -} - -define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmls_lane_s32_0: -; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %mul = mul <2 x i32> %shuffle, %b - %sub = sub <2 x i32> %a, %mul - ret <2 x i32> %sub -} - -define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlsq_lane_s32_0: -; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i32> %shuffle, %b - %sub = sub <4 x i32> %a, %mul - ret <4 x i32> %sub -} - -define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmls_laneq_s16_0: -; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i16> %shuffle, %b - %sub = sub <4 x i16> %a, %mul - ret <4 x i16> %sub -} - -define <8 x i16> @test_vmlsq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlsq_laneq_s16_0: -; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer - %mul = mul <8 x i16> %shuffle, %b - %sub = sub <8 x i16> %a, %mul - ret <8 x i16> %sub -} - -define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmls_laneq_s32_0: -; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %mul = mul <2 x i32> %shuffle, %b - %sub = sub <2 x i32> %a, %mul - ret <2 x i32> %sub -} - -define <4 x i32> @test_vmlsq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlsq_laneq_s32_0: -; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i32> %shuffle, %b - %sub = sub <4 x i32> %a, %mul - ret <4 x i32> %sub -} - -define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmul_lane_s16_0: -; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i16> %shuffle, %a - ret <4 x i16> %mul -} - -define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmulq_lane_s16_0: -; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer - %mul = mul <8 x i16> %shuffle, %a - ret <8 x i16> %mul -} - -define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmul_lane_s32_0: -; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %mul = mul <2 x i32> %shuffle, %a - ret <2 x i32> %mul -} - -define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmulq_lane_s32_0: -; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i32> %shuffle, %a - ret <4 x i32> %mul -} - -define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmul_lane_u16_0: -; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i16> %shuffle, %a - ret <4 x i16> %mul -} - -define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmulq_lane_u16_0: -; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer - %mul = mul <8 x i16> %shuffle, %a - ret <8 x i16> %mul -} - -define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmul_lane_u32_0: -; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %mul = mul <2 x i32> %shuffle, %a - ret <2 x i32> %mul -} - -define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmulq_lane_u32_0: -; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i32> %shuffle, %a - ret <4 x i32> %mul -} - -define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmul_laneq_s16_0: -; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i16> %shuffle, %a - ret <4 x i16> %mul -} - -define <8 x i16> @test_vmulq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmulq_laneq_s16_0: -; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer - %mul = mul <8 x i16> %shuffle, %a - ret <8 x i16> %mul -} - -define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmul_laneq_s32_0: -; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %mul = mul <2 x i32> %shuffle, %a - ret <2 x i32> %mul -} - -define <4 x i32> @test_vmulq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmulq_laneq_s32_0: -; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i32> %shuffle, %a - ret <4 x i32> %mul -} - -define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmul_laneq_u16_0: -; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i16> %shuffle, %a - ret <4 x i16> %mul -} - -define <8 x i16> @test_vmulq_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmulq_laneq_u16_0: -; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer - %mul = mul <8 x i16> %shuffle, %a - ret <8 x i16> %mul -} - -define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmul_laneq_u32_0: -; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %mul = mul <2 x i32> %shuffle, %a - ret <2 x i32> %mul -} - -define <4 x i32> @test_vmulq_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmulq_laneq_u32_0: -; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i32> %shuffle, %a - ret <4 x i32> %mul -} - -define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) { -; CHECK: test_vfma_lane_f32_0: -; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer - %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) - ret <2 x float> %0 -} - -define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) { -; CHECK: test_vfmaq_lane_f32_0: -; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer - %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) - ret <4 x float> %0 -} - -define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) { -; CHECK: test_vfma_laneq_f32_0: -; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer - %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) - ret <2 x float> %0 -} - -define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) { -; CHECK: test_vfmaq_laneq_f32_0: -; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer - %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) - ret <4 x float> %0 -} - -define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) { -; CHECK: test_vfms_lane_f32_0: -; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %sub = fsub <2 x float> , %v - %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> zeroinitializer - %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) - ret <2 x float> %0 -} - -define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) { -; CHECK: test_vfmsq_lane_f32_0: -; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %sub = fsub <2 x float> , %v - %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> zeroinitializer - %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) - ret <4 x float> %0 -} - -define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) { -; CHECK: test_vfms_laneq_f32_0: -; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %sub = fsub <4 x float> , %v - %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> zeroinitializer - %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) - ret <2 x float> %0 -} - -define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) { -; CHECK: test_vfmsq_laneq_f32_0: -; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %sub = fsub <4 x float> , %v - %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> zeroinitializer - %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) - ret <4 x float> %0 -} - -define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) { -; CHECK: test_vfmaq_laneq_f64_0: -; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -entry: - %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer - %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) - ret <2 x double> %0 -} - -define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) { -; CHECK: test_vfmsq_laneq_f64_0: -; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -entry: - %sub = fsub <2 x double> , %v - %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> zeroinitializer - %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) - ret <2 x double> %0 -} - -define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlal_lane_s16_0: -; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlal_lane_s32_0: -; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlal_laneq_s16_0: -; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlal_laneq_s32_0: -; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlal_high_lane_s16_0: -; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlal_high_lane_s32_0: -; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlal_high_laneq_s16_0: -; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlal_high_laneq_s32_0: -; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlsl_lane_s16_0: -; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlsl_lane_s32_0: -; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlsl_laneq_s16_0: -; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlsl_laneq_s32_0: -; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlsl_high_lane_s16_0: -; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlsl_high_lane_s32_0: -; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlsl_high_laneq_s16_0: -; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlsl_high_laneq_s32_0: -; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlal_lane_u16_0: -; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlal_lane_u32_0: -; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlal_laneq_u16_0: -; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlal_laneq_u32_0: -; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlal_high_lane_u16_0: -; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlal_high_lane_u32_0: -; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlal_high_laneq_u16_0: -; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlal_high_laneq_u32_0: -; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlsl_lane_u16_0: -; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlsl_lane_u32_0: -; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlsl_laneq_u16_0: -; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlsl_laneq_u32_0: -; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlsl_high_lane_u16_0: -; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlsl_high_lane_u32_0: -; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlsl_high_laneq_u16_0: -; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlsl_high_laneq_u32_0: -; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmull_lane_s16_0: -; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmull_lane_s32_0: -; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmull_lane_u16_0: -; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmull_lane_u32_0: -; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmull_high_lane_s16_0: -; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmull_high_lane_s32_0: -; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmull_high_lane_u16_0: -; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmull_high_lane_u32_0: -; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmull_laneq_s16_0: -; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmull_laneq_s32_0: -; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmull_laneq_u16_0: -; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmull_laneq_u32_0: -; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmull_high_laneq_s16_0: -; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmull_high_laneq_s32_0: -; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmull_high_laneq_u16_0: -; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmull_high_laneq_u32_0: -; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vqdmlal_lane_s16_0: -; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) - ret <4 x i32> %vqdmlal4.i -} - -define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vqdmlal_lane_s32_0: -; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) - ret <2 x i64> %vqdmlal4.i -} - -define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vqdmlal_high_lane_s16_0: -; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) - ret <4 x i32> %vqdmlal4.i -} - -define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vqdmlal_high_lane_s32_0: -; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) - ret <2 x i64> %vqdmlal4.i -} - -define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vqdmlsl_lane_s16_0: -; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) - ret <4 x i32> %vqdmlsl4.i -} - -define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vqdmlsl_lane_s32_0: -; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) - ret <2 x i64> %vqdmlsl4.i -} - -define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vqdmlsl_high_lane_s16_0: -; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) - ret <4 x i32> %vqdmlsl4.i -} - -define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vqdmlsl_high_lane_s32_0: -; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) - ret <2 x i64> %vqdmlsl4.i -} - -define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vqdmull_lane_s16_0: -; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i32> %vqdmull2.i -} - -define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vqdmull_lane_s32_0: -; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i64> %vqdmull2.i -} - -define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { -; CHECK: test_vqdmull_laneq_s16_0: -; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i32> %vqdmull2.i -} - -define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { -; CHECK: test_vqdmull_laneq_s32_0: -; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i64> %vqdmull2.i -} - -define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vqdmull_high_lane_s16_0: -; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - ret <4 x i32> %vqdmull2.i -} - -define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vqdmull_high_lane_s32_0: -; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - ret <2 x i64> %vqdmull2.i -} - -define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { -; CHECK: test_vqdmull_high_laneq_s16_0: -; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - ret <4 x i32> %vqdmull2.i -} - -define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { -; CHECK: test_vqdmull_high_laneq_s32_0: -; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - ret <2 x i64> %vqdmull2.i -} - -define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vqdmulh_lane_s16_0: -; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vqdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i16> %vqdmulh2.i -} - -define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vqdmulhq_lane_s16_0: -; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer - %vqdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) - ret <8 x i16> %vqdmulh2.i -} - -define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vqdmulh_lane_s32_0: -; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vqdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i32> %vqdmulh2.i -} - -define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vqdmulhq_lane_s32_0: -; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer - %vqdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) - ret <4 x i32> %vqdmulh2.i -} - -define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vqrdmulh_lane_s16_0: -; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vqrdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i16> %vqrdmulh2.i -} - -define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vqrdmulhq_lane_s16_0: -; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer - %vqrdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) - ret <8 x i16> %vqrdmulh2.i -} - -define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vqrdmulh_lane_s32_0: -; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vqrdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i32> %vqrdmulh2.i -} - -define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vqrdmulhq_lane_s32_0: -; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer - %vqrdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) - ret <4 x i32> %vqrdmulh2.i -} - -define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) { -; CHECK: test_vmul_lane_f32_0: -; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer - %mul = fmul <2 x float> %shuffle, %a - ret <2 x float> %mul -} - -define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) { -; CHECK: test_vmulq_lane_f32_0: -; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer - %mul = fmul <4 x float> %shuffle, %a - ret <4 x float> %mul -} - -define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) { -; CHECK: test_vmul_laneq_f32_0: -; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer - %mul = fmul <2 x float> %shuffle, %a - ret <2 x float> %mul -} - -define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) { -; CHECK: test_vmul_laneq_f64_0: -; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -entry: - %0 = bitcast <1 x double> %a to <8 x i8> - %1 = bitcast <8 x i8> %0 to double - %extract = extractelement <2 x double> %v, i32 0 - %2 = fmul double %1, %extract - %3 = insertelement <1 x double> undef, double %2, i32 0 - ret <1 x double> %3 -} - -define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { -; CHECK: test_vmulq_laneq_f32_0: -; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer - %mul = fmul <4 x float> %shuffle, %a - ret <4 x float> %mul -} - -define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { -; CHECK: test_vmulq_laneq_f64_0: -; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer - %mul = fmul <2 x double> %shuffle, %a - ret <2 x double> %mul -} - -define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) { -; CHECK: test_vmulx_lane_f32_0: -; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer - %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) - ret <2 x float> %vmulx2.i -} - -define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) { -; CHECK: test_vmulxq_lane_f32_0: -; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer - %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) - ret <4 x float> %vmulx2.i -} - -define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) { -; CHECK: test_vmulxq_lane_f64_0: -; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer - %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) - ret <2 x double> %vmulx2.i -} - -define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) { -; CHECK: test_vmulx_laneq_f32_0: -; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer - %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) - ret <2 x float> %vmulx2.i -} - -define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { -; CHECK: test_vmulxq_laneq_f32_0: -; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer - %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) - ret <4 x float> %vmulx2.i -} - -define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { -; CHECK: test_vmulxq_laneq_f64_0: -; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer - %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) - ret <2 x double> %vmulx2.i -} - diff --git a/test/CodeGen/AArch64/neon-3vdiff.ll b/test/CodeGen/AArch64/neon-3vdiff.ll deleted file mode 100644 index dbe2a726b902..000000000000 --- a/test/CodeGen/AArch64/neon-3vdiff.ll +++ /dev/null @@ -1,1834 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has its own copy of this test in its directory. - -declare <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>) - -declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>) - -declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>) - -declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>) - -declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) - -declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) - -declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) - -declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>) - -declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) - -declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>) - -declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>) - -declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) - -declare <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8>, <8 x i8>) - -declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) - -declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) - -declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) - -declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>) - -declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>) - -declare <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>) - -declare <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64>, <2 x i64>) - -declare <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32>, <4 x i32>) - -declare <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16>, <8 x i16>) - -declare <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64>, <2 x i64>) - -declare <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32>, <4 x i32>) - -declare <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vaddl_s8: -; CHECK: saddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vmovl.i.i = sext <8 x i8> %a to <8 x i16> - %vmovl.i2.i = sext <8 x i8> %b to <8 x i16> - %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i - ret <8 x i16> %add.i -} - -define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vaddl_s16: -; CHECK: saddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vmovl.i.i = sext <4 x i16> %a to <4 x i32> - %vmovl.i2.i = sext <4 x i16> %b to <4 x i32> - %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i - ret <4 x i32> %add.i -} - -define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vaddl_s32: -; CHECK: saddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vmovl.i.i = sext <2 x i32> %a to <2 x i64> - %vmovl.i2.i = sext <2 x i32> %b to <2 x i64> - %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i - ret <2 x i64> %add.i -} - -define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vaddl_u8: -; CHECK: uaddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vmovl.i.i = zext <8 x i8> %a to <8 x i16> - %vmovl.i2.i = zext <8 x i8> %b to <8 x i16> - %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i - ret <8 x i16> %add.i -} - -define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vaddl_u16: -; CHECK: uaddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vmovl.i.i = zext <4 x i16> %a to <4 x i32> - %vmovl.i2.i = zext <4 x i16> %b to <4 x i32> - %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i - ret <4 x i32> %add.i -} - -define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vaddl_u32: -; CHECK: uaddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vmovl.i.i = zext <2 x i32> %a to <2 x i64> - %vmovl.i2.i = zext <2 x i32> %b to <2 x i64> - %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i - ret <2 x i64> %add.i -} - -define <8 x i16> @test_vaddl_high_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vaddl_high_s8: -; CHECK: saddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> - %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16> - %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> - %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16> - %add.i = add <8 x i16> %0, %1 - ret <8 x i16> %add.i -} - -define <4 x i32> @test_vaddl_high_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vaddl_high_s16: -; CHECK: saddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32> - %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32> - %add.i = add <4 x i32> %0, %1 - ret <4 x i32> %add.i -} - -define <2 x i64> @test_vaddl_high_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vaddl_high_s32: -; CHECK: saddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64> - %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64> - %add.i = add <2 x i64> %0, %1 - ret <2 x i64> %add.i -} - -define <8 x i16> @test_vaddl_high_u8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vaddl_high_u8: -; CHECK: uaddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> - %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> - %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> - %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16> - %add.i = add <8 x i16> %0, %1 - ret <8 x i16> %add.i -} - -define <4 x i32> @test_vaddl_high_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vaddl_high_u16: -; CHECK: uaddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32> - %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32> - %add.i = add <4 x i32> %0, %1 - ret <4 x i32> %add.i -} - -define <2 x i64> @test_vaddl_high_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vaddl_high_u32: -; CHECK: uaddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64> - %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64> - %add.i = add <2 x i64> %0, %1 - ret <2 x i64> %add.i -} - -define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) { -; CHECK: test_vaddw_s8: -; CHECK: saddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b -entry: - %vmovl.i.i = sext <8 x i8> %b to <8 x i16> - %add.i = add <8 x i16> %vmovl.i.i, %a - ret <8 x i16> %add.i -} - -define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) { -; CHECK: test_vaddw_s16: -; CHECK: saddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h -entry: - %vmovl.i.i = sext <4 x i16> %b to <4 x i32> - %add.i = add <4 x i32> %vmovl.i.i, %a - ret <4 x i32> %add.i -} - -define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) { -; CHECK: test_vaddw_s32: -; CHECK: saddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s -entry: - %vmovl.i.i = sext <2 x i32> %b to <2 x i64> - %add.i = add <2 x i64> %vmovl.i.i, %a - ret <2 x i64> %add.i -} - -define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) { -; CHECK: test_vaddw_u8: -; CHECK: uaddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b -entry: - %vmovl.i.i = zext <8 x i8> %b to <8 x i16> - %add.i = add <8 x i16> %vmovl.i.i, %a - ret <8 x i16> %add.i -} - -define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) { -; CHECK: test_vaddw_u16: -; CHECK: uaddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h -entry: - %vmovl.i.i = zext <4 x i16> %b to <4 x i32> - %add.i = add <4 x i32> %vmovl.i.i, %a - ret <4 x i32> %add.i -} - -define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) { -; CHECK: test_vaddw_u32: -; CHECK: uaddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s -entry: - %vmovl.i.i = zext <2 x i32> %b to <2 x i64> - %add.i = add <2 x i64> %vmovl.i.i, %a - ret <2 x i64> %add.i -} - -define <8 x i16> @test_vaddw_high_s8(<8 x i16> %a, <16 x i8> %b) { -; CHECK: test_vaddw_high_s8: -; CHECK: saddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b -entry: - %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> - %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16> - %add.i = add <8 x i16> %0, %a - ret <8 x i16> %add.i -} - -define <4 x i32> @test_vaddw_high_s16(<4 x i32> %a, <8 x i16> %b) { -; CHECK: test_vaddw_high_s16: -; CHECK: saddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h -entry: - %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32> - %add.i = add <4 x i32> %0, %a - ret <4 x i32> %add.i -} - -define <2 x i64> @test_vaddw_high_s32(<2 x i64> %a, <4 x i32> %b) { -; CHECK: test_vaddw_high_s32: -; CHECK: saddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s -entry: - %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64> - %add.i = add <2 x i64> %0, %a - ret <2 x i64> %add.i -} - -define <8 x i16> @test_vaddw_high_u8(<8 x i16> %a, <16 x i8> %b) { -; CHECK: test_vaddw_high_u8: -; CHECK: uaddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b -entry: - %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> - %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> - %add.i = add <8 x i16> %0, %a - ret <8 x i16> %add.i -} - -define <4 x i32> @test_vaddw_high_u16(<4 x i32> %a, <8 x i16> %b) { -; CHECK: test_vaddw_high_u16: -; CHECK: uaddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h -entry: - %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32> - %add.i = add <4 x i32> %0, %a - ret <4 x i32> %add.i -} - -define <2 x i64> @test_vaddw_high_u32(<2 x i64> %a, <4 x i32> %b) { -; CHECK: test_vaddw_high_u32: -; CHECK: uaddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s -entry: - %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64> - %add.i = add <2 x i64> %0, %a - ret <2 x i64> %add.i -} - -define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vsubl_s8: -; CHECK: ssubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vmovl.i.i = sext <8 x i8> %a to <8 x i16> - %vmovl.i2.i = sext <8 x i8> %b to <8 x i16> - %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i - ret <8 x i16> %sub.i -} - -define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vsubl_s16: -; CHECK: ssubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vmovl.i.i = sext <4 x i16> %a to <4 x i32> - %vmovl.i2.i = sext <4 x i16> %b to <4 x i32> - %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i - ret <4 x i32> %sub.i -} - -define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vsubl_s32: -; CHECK: ssubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vmovl.i.i = sext <2 x i32> %a to <2 x i64> - %vmovl.i2.i = sext <2 x i32> %b to <2 x i64> - %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i - ret <2 x i64> %sub.i -} - -define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vsubl_u8: -; CHECK: usubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vmovl.i.i = zext <8 x i8> %a to <8 x i16> - %vmovl.i2.i = zext <8 x i8> %b to <8 x i16> - %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i - ret <8 x i16> %sub.i -} - -define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vsubl_u16: -; CHECK: usubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vmovl.i.i = zext <4 x i16> %a to <4 x i32> - %vmovl.i2.i = zext <4 x i16> %b to <4 x i32> - %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i - ret <4 x i32> %sub.i -} - -define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vsubl_u32: -; CHECK: usubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vmovl.i.i = zext <2 x i32> %a to <2 x i64> - %vmovl.i2.i = zext <2 x i32> %b to <2 x i64> - %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i - ret <2 x i64> %sub.i -} - -define <8 x i16> @test_vsubl_high_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vsubl_high_s8: -; CHECK: ssubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> - %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16> - %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> - %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16> - %sub.i = sub <8 x i16> %0, %1 - ret <8 x i16> %sub.i -} - -define <4 x i32> @test_vsubl_high_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vsubl_high_s16: -; CHECK: ssubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32> - %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32> - %sub.i = sub <4 x i32> %0, %1 - ret <4 x i32> %sub.i -} - -define <2 x i64> @test_vsubl_high_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vsubl_high_s32: -; CHECK: ssubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64> - %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64> - %sub.i = sub <2 x i64> %0, %1 - ret <2 x i64> %sub.i -} - -define <8 x i16> @test_vsubl_high_u8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vsubl_high_u8: -; CHECK: usubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> - %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> - %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> - %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16> - %sub.i = sub <8 x i16> %0, %1 - ret <8 x i16> %sub.i -} - -define <4 x i32> @test_vsubl_high_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vsubl_high_u16: -; CHECK: usubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32> - %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32> - %sub.i = sub <4 x i32> %0, %1 - ret <4 x i32> %sub.i -} - -define <2 x i64> @test_vsubl_high_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vsubl_high_u32: -; CHECK: usubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64> - %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64> - %sub.i = sub <2 x i64> %0, %1 - ret <2 x i64> %sub.i -} - -define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) { -; CHECK: test_vsubw_s8: -; CHECK: ssubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b -entry: - %vmovl.i.i = sext <8 x i8> %b to <8 x i16> - %sub.i = sub <8 x i16> %a, %vmovl.i.i - ret <8 x i16> %sub.i -} - -define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) { -; CHECK: test_vsubw_s16: -; CHECK: ssubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h -entry: - %vmovl.i.i = sext <4 x i16> %b to <4 x i32> - %sub.i = sub <4 x i32> %a, %vmovl.i.i - ret <4 x i32> %sub.i -} - -define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) { -; CHECK: test_vsubw_s32: -; CHECK: ssubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s -entry: - %vmovl.i.i = sext <2 x i32> %b to <2 x i64> - %sub.i = sub <2 x i64> %a, %vmovl.i.i - ret <2 x i64> %sub.i -} - -define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) { -; CHECK: test_vsubw_u8: -; CHECK: usubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b -entry: - %vmovl.i.i = zext <8 x i8> %b to <8 x i16> - %sub.i = sub <8 x i16> %a, %vmovl.i.i - ret <8 x i16> %sub.i -} - -define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) { -; CHECK: test_vsubw_u16: -; CHECK: usubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h -entry: - %vmovl.i.i = zext <4 x i16> %b to <4 x i32> - %sub.i = sub <4 x i32> %a, %vmovl.i.i - ret <4 x i32> %sub.i -} - -define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) { -; CHECK: test_vsubw_u32: -; CHECK: usubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s -entry: - %vmovl.i.i = zext <2 x i32> %b to <2 x i64> - %sub.i = sub <2 x i64> %a, %vmovl.i.i - ret <2 x i64> %sub.i -} - -define <8 x i16> @test_vsubw_high_s8(<8 x i16> %a, <16 x i8> %b) { -; CHECK: test_vsubw_high_s8: -; CHECK: ssubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b -entry: - %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> - %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16> - %sub.i = sub <8 x i16> %a, %0 - ret <8 x i16> %sub.i -} - -define <4 x i32> @test_vsubw_high_s16(<4 x i32> %a, <8 x i16> %b) { -; CHECK: test_vsubw_high_s16: -; CHECK: ssubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h -entry: - %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32> - %sub.i = sub <4 x i32> %a, %0 - ret <4 x i32> %sub.i -} - -define <2 x i64> @test_vsubw_high_s32(<2 x i64> %a, <4 x i32> %b) { -; CHECK: test_vsubw_high_s32: -; CHECK: ssubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s -entry: - %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64> - %sub.i = sub <2 x i64> %a, %0 - ret <2 x i64> %sub.i -} - -define <8 x i16> @test_vsubw_high_u8(<8 x i16> %a, <16 x i8> %b) { -; CHECK: test_vsubw_high_u8: -; CHECK: usubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b -entry: - %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> - %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> - %sub.i = sub <8 x i16> %a, %0 - ret <8 x i16> %sub.i -} - -define <4 x i32> @test_vsubw_high_u16(<4 x i32> %a, <8 x i16> %b) { -; CHECK: test_vsubw_high_u16: -; CHECK: usubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h -entry: - %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32> - %sub.i = sub <4 x i32> %a, %0 - ret <4 x i32> %sub.i -} - -define <2 x i64> @test_vsubw_high_u32(<2 x i64> %a, <4 x i32> %b) { -; CHECK: test_vsubw_high_u32: -; CHECK: usubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s -entry: - %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64> - %sub.i = sub <2 x i64> %a, %0 - ret <2 x i64> %sub.i -} - -define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vaddhn_s16: -; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vaddhn.i = add <8 x i16> %a, %b - %vaddhn1.i = lshr <8 x i16> %vaddhn.i, - %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8> - ret <8 x i8> %vaddhn2.i -} - -define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vaddhn_s32: -; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vaddhn.i = add <4 x i32> %a, %b - %vaddhn1.i = lshr <4 x i32> %vaddhn.i, - %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16> - ret <4 x i16> %vaddhn2.i -} - -define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vaddhn_s64: -; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vaddhn.i = add <2 x i64> %a, %b - %vaddhn1.i = lshr <2 x i64> %vaddhn.i, - %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32> - ret <2 x i32> %vaddhn2.i -} - -define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vaddhn_u16: -; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vaddhn.i = add <8 x i16> %a, %b - %vaddhn1.i = lshr <8 x i16> %vaddhn.i, - %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8> - ret <8 x i8> %vaddhn2.i -} - -define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vaddhn_u32: -; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vaddhn.i = add <4 x i32> %a, %b - %vaddhn1.i = lshr <4 x i32> %vaddhn.i, - %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16> - ret <4 x i16> %vaddhn2.i -} - -define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vaddhn_u64: -; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vaddhn.i = add <2 x i64> %a, %b - %vaddhn1.i = lshr <2 x i64> %vaddhn.i, - %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32> - ret <2 x i32> %vaddhn2.i -} - -define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vaddhn_high_s16: -; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vaddhn.i.i = add <8 x i16> %a, %b - %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, - %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8> - %0 = bitcast <8 x i8> %r to <1 x i64> - %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> - %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> - ret <16 x i8> %2 -} - -define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vaddhn_high_s32: -; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vaddhn.i.i = add <4 x i32> %a, %b - %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, - %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16> - %0 = bitcast <4 x i16> %r to <1 x i64> - %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> - %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> - ret <8 x i16> %2 -} - -define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vaddhn_high_s64: -; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vaddhn.i.i = add <2 x i64> %a, %b - %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, - %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32> - %0 = bitcast <2 x i32> %r to <1 x i64> - %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> - %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> - ret <4 x i32> %2 -} - -define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vaddhn_high_u16: -; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vaddhn.i.i = add <8 x i16> %a, %b - %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, - %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8> - %0 = bitcast <8 x i8> %r to <1 x i64> - %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> - %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> - ret <16 x i8> %2 -} - -define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vaddhn_high_u32: -; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vaddhn.i.i = add <4 x i32> %a, %b - %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, - %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16> - %0 = bitcast <4 x i16> %r to <1 x i64> - %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> - %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> - ret <8 x i16> %2 -} - -define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vaddhn_high_u64: -; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vaddhn.i.i = add <2 x i64> %a, %b - %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, - %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32> - %0 = bitcast <2 x i32> %r to <1 x i64> - %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> - %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> - ret <4 x i32> %2 -} - -define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vraddhn_s16: -; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vraddhn2.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b) - ret <8 x i8> %vraddhn2.i -} - -define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vraddhn_s32: -; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vraddhn2.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b) - ret <4 x i16> %vraddhn2.i -} - -define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vraddhn_s64: -; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vraddhn2.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b) - ret <2 x i32> %vraddhn2.i -} - -define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vraddhn_u16: -; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vraddhn2.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b) - ret <8 x i8> %vraddhn2.i -} - -define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vraddhn_u32: -; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vraddhn2.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b) - ret <4 x i16> %vraddhn2.i -} - -define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vraddhn_u64: -; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vraddhn2.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b) - ret <2 x i32> %vraddhn2.i -} - -define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vraddhn_high_s16: -; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vraddhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b) - %0 = bitcast <8 x i8> %r to <1 x i64> - %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> - %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> - ret <16 x i8> %2 -} - -define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vraddhn_high_s32: -; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vraddhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b) - %0 = bitcast <4 x i16> %r to <1 x i64> - %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> - %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> - ret <8 x i16> %2 -} - -define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vraddhn_high_s64: -; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vraddhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b) - %0 = bitcast <2 x i32> %r to <1 x i64> - %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> - %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> - ret <4 x i32> %2 -} - -define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vraddhn_high_u16: -; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vraddhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b) - %0 = bitcast <8 x i8> %r to <1 x i64> - %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> - %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> - ret <16 x i8> %2 -} - -define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vraddhn_high_u32: -; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vraddhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b) - %0 = bitcast <4 x i16> %r to <1 x i64> - %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> - %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> - ret <8 x i16> %2 -} - -define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vraddhn_high_u64: -; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vraddhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b) - %0 = bitcast <2 x i32> %r to <1 x i64> - %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> - %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> - ret <4 x i32> %2 -} - -define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vsubhn_s16: -; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vsubhn.i = sub <8 x i16> %a, %b - %vsubhn1.i = lshr <8 x i16> %vsubhn.i, - %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8> - ret <8 x i8> %vsubhn2.i -} - -define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vsubhn_s32: -; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vsubhn.i = sub <4 x i32> %a, %b - %vsubhn1.i = lshr <4 x i32> %vsubhn.i, - %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16> - ret <4 x i16> %vsubhn2.i -} - -define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vsubhn_s64: -; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vsubhn.i = sub <2 x i64> %a, %b - %vsubhn1.i = lshr <2 x i64> %vsubhn.i, - %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32> - ret <2 x i32> %vsubhn2.i -} - -define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vsubhn_u16: -; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vsubhn.i = sub <8 x i16> %a, %b - %vsubhn1.i = lshr <8 x i16> %vsubhn.i, - %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8> - ret <8 x i8> %vsubhn2.i -} - -define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vsubhn_u32: -; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vsubhn.i = sub <4 x i32> %a, %b - %vsubhn1.i = lshr <4 x i32> %vsubhn.i, - %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16> - ret <4 x i16> %vsubhn2.i -} - -define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vsubhn_u64: -; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vsubhn.i = sub <2 x i64> %a, %b - %vsubhn1.i = lshr <2 x i64> %vsubhn.i, - %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32> - ret <2 x i32> %vsubhn2.i -} - -define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vsubhn_high_s16: -; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vsubhn.i.i = sub <8 x i16> %a, %b - %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, - %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8> - %0 = bitcast <8 x i8> %r to <1 x i64> - %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> - %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> - ret <16 x i8> %2 -} - -define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vsubhn_high_s32: -; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vsubhn.i.i = sub <4 x i32> %a, %b - %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, - %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16> - %0 = bitcast <4 x i16> %r to <1 x i64> - %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> - %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> - ret <8 x i16> %2 -} - -define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vsubhn_high_s64: -; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vsubhn.i.i = sub <2 x i64> %a, %b - %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, - %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32> - %0 = bitcast <2 x i32> %r to <1 x i64> - %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> - %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> - ret <4 x i32> %2 -} - -define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vsubhn_high_u16: -; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vsubhn.i.i = sub <8 x i16> %a, %b - %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, - %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8> - %0 = bitcast <8 x i8> %r to <1 x i64> - %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> - %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> - ret <16 x i8> %2 -} - -define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vsubhn_high_u32: -; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vsubhn.i.i = sub <4 x i32> %a, %b - %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, - %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16> - %0 = bitcast <4 x i16> %r to <1 x i64> - %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> - %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> - ret <8 x i16> %2 -} - -define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vsubhn_high_u64: -; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vsubhn.i.i = sub <2 x i64> %a, %b - %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, - %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32> - %0 = bitcast <2 x i32> %r to <1 x i64> - %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> - %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> - ret <4 x i32> %2 -} - -define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vrsubhn_s16: -; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vrsubhn2.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) - ret <8 x i8> %vrsubhn2.i -} - -define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vrsubhn_s32: -; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vrsubhn2.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) - ret <4 x i16> %vrsubhn2.i -} - -define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vrsubhn_s64: -; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vrsubhn2.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) - ret <2 x i32> %vrsubhn2.i -} - -define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vrsubhn_u16: -; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vrsubhn2.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) - ret <8 x i8> %vrsubhn2.i -} - -define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vrsubhn_u32: -; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vrsubhn2.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) - ret <4 x i16> %vrsubhn2.i -} - -define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vrsubhn_u64: -; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vrsubhn2.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) - ret <2 x i32> %vrsubhn2.i -} - -define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vrsubhn_high_s16: -; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vrsubhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) - %0 = bitcast <8 x i8> %r to <1 x i64> - %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> - %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> - ret <16 x i8> %2 -} - -define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vrsubhn_high_s32: -; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vrsubhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) - %0 = bitcast <4 x i16> %r to <1 x i64> - %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> - %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> - ret <8 x i16> %2 -} - -define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vrsubhn_high_s64: -; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vrsubhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) - %0 = bitcast <2 x i32> %r to <1 x i64> - %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> - %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> - ret <4 x i32> %2 -} - -define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vrsubhn_high_u16: -; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vrsubhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) - %0 = bitcast <8 x i8> %r to <1 x i64> - %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> - %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> - ret <16 x i8> %2 -} - -define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vrsubhn_high_u32: -; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vrsubhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) - %0 = bitcast <4 x i16> %r to <1 x i64> - %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> - %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> - ret <8 x i16> %2 -} - -define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vrsubhn_high_u64: -; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vrsubhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) - %0 = bitcast <2 x i32> %r to <1 x i64> - %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> - %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> - ret <4 x i32> %2 -} - -define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vabdl_s8: -; CHECK: sabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vabd.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b) - %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16> - ret <8 x i16> %vmovl.i.i -} - -define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vabdl_s16: -; CHECK: sabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vabd2.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b) - %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32> - ret <4 x i32> %vmovl.i.i -} - -define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vabdl_s32: -; CHECK: sabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vabd2.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b) - %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64> - ret <2 x i64> %vmovl.i.i -} - -define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vabdl_u8: -; CHECK: uabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vabd.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b) - %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16> - ret <8 x i16> %vmovl.i.i -} - -define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vabdl_u16: -; CHECK: uabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vabd2.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b) - %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32> - ret <4 x i32> %vmovl.i.i -} - -define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vabdl_u32: -; CHECK: uabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vabd2.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b) - %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64> - ret <2 x i64> %vmovl.i.i -} - -define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { -; CHECK: test_vabal_s8: -; CHECK: sabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c) - %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16> - %add.i = add <8 x i16> %vmovl.i.i.i, %a - ret <8 x i16> %add.i -} - -define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { -; CHECK: test_vabal_s16: -; CHECK: sabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c) - %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32> - %add.i = add <4 x i32> %vmovl.i.i.i, %a - ret <4 x i32> %add.i -} - -define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { -; CHECK: test_vabal_s32: -; CHECK: sabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c) - %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64> - %add.i = add <2 x i64> %vmovl.i.i.i, %a - ret <2 x i64> %add.i -} - -define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { -; CHECK: test_vabal_u8: -; CHECK: uabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c) - %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16> - %add.i = add <8 x i16> %vmovl.i.i.i, %a - ret <8 x i16> %add.i -} - -define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { -; CHECK: test_vabal_u16: -; CHECK: uabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c) - %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32> - %add.i = add <4 x i32> %vmovl.i.i.i, %a - ret <4 x i32> %add.i -} - -define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { -; CHECK: test_vabal_u32: -; CHECK: uabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c) - %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64> - %add.i = add <2 x i64> %vmovl.i.i.i, %a - ret <2 x i64> %add.i -} - -define <8 x i16> @test_vabdl_high_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vabdl_high_s8: -; CHECK: sabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> - %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> - %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) - %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16> - ret <8 x i16> %vmovl.i.i.i -} - -define <4 x i32> @test_vabdl_high_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vabdl_high_s16: -; CHECK: sabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32> - ret <4 x i32> %vmovl.i.i.i -} - -define <2 x i64> @test_vabdl_high_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vabdl_high_s32: -; CHECK: sabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64> - ret <2 x i64> %vmovl.i.i.i -} - -define <8 x i16> @test_vabdl_high_u8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vabdl_high_u8: -; CHECK: uabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> - %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> - %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) - %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16> - ret <8 x i16> %vmovl.i.i.i -} - -define <4 x i32> @test_vabdl_high_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vabdl_high_u16: -; CHECK: uabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32> - ret <4 x i32> %vmovl.i.i.i -} - -define <2 x i64> @test_vabdl_high_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vabdl_high_u32: -; CHECK: uabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64> - ret <2 x i64> %vmovl.i.i.i -} - -define <8 x i16> @test_vabal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { -; CHECK: test_vabal_high_s8: -; CHECK: sabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> - %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> - %vabd.i.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) - %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16> - %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a - ret <8 x i16> %add.i.i -} - -define <4 x i32> @test_vabal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { -; CHECK: test_vabal_high_s16: -; CHECK: sabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> - %vabd2.i.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32> - %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a - ret <4 x i32> %add.i.i -} - -define <2 x i64> @test_vabal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { -; CHECK: test_vabal_high_s32: -; CHECK: sabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> - %vabd2.i.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64> - %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a - ret <2 x i64> %add.i.i -} - -define <8 x i16> @test_vabal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { -; CHECK: test_vabal_high_u8: -; CHECK: uabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> - %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> - %vabd.i.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) - %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16> - %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a - ret <8 x i16> %add.i.i -} - -define <4 x i32> @test_vabal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { -; CHECK: test_vabal_high_u16: -; CHECK: uabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> - %vabd2.i.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32> - %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a - ret <4 x i32> %add.i.i -} - -define <2 x i64> @test_vabal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { -; CHECK: test_vabal_high_u32: -; CHECK: uabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> - %vabd2.i.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64> - %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a - ret <2 x i64> %add.i.i -} - -define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vmull_s8: -; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %a, <8 x i8> %b) - ret <8 x i16> %vmull.i -} - -define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vmull_s16: -; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %b) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vmull_s32: -; CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %b) - ret <2 x i64> %vmull2.i -} - -define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vmull_u8: -; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %a, <8 x i8> %b) - ret <8 x i16> %vmull.i -} - -define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vmull_u16: -; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %b) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vmull_u32: -; CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %b) - ret <2 x i64> %vmull2.i -} - -define <8 x i16> @test_vmull_high_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vmull_high_s8: -; CHECK: smull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> - %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> - %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) - ret <8 x i16> %vmull.i.i -} - -define <4 x i32> @test_vmull_high_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vmull_high_s16: -; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - ret <4 x i32> %vmull2.i.i -} - -define <2 x i64> @test_vmull_high_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vmull_high_s32: -; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - ret <2 x i64> %vmull2.i.i -} - -define <8 x i16> @test_vmull_high_u8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vmull_high_u8: -; CHECK: umull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> - %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> - %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) - ret <8 x i16> %vmull.i.i -} - -define <4 x i32> @test_vmull_high_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vmull_high_u16: -; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - ret <4 x i32> %vmull2.i.i -} - -define <2 x i64> @test_vmull_high_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vmull_high_u32: -; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - ret <2 x i64> %vmull2.i.i -} - -define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { -; CHECK: test_vmlal_s8: -; CHECK: smlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c) - %add.i = add <8 x i16> %vmull.i.i, %a - ret <8 x i16> %add.i -} - -define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { -; CHECK: test_vmlal_s16: -; CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c) - %add.i = add <4 x i32> %vmull2.i.i, %a - ret <4 x i32> %add.i -} - -define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { -; CHECK: test_vmlal_s32: -; CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c) - %add.i = add <2 x i64> %vmull2.i.i, %a - ret <2 x i64> %add.i -} - -define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { -; CHECK: test_vmlal_u8: -; CHECK: umlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c) - %add.i = add <8 x i16> %vmull.i.i, %a - ret <8 x i16> %add.i -} - -define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { -; CHECK: test_vmlal_u16: -; CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c) - %add.i = add <4 x i32> %vmull2.i.i, %a - ret <4 x i32> %add.i -} - -define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { -; CHECK: test_vmlal_u32: -; CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c) - %add.i = add <2 x i64> %vmull2.i.i, %a - ret <2 x i64> %add.i -} - -define <8 x i16> @test_vmlal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { -; CHECK: test_vmlal_high_s8: -; CHECK: smlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> - %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> - %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) - %add.i.i = add <8 x i16> %vmull.i.i.i, %a - ret <8 x i16> %add.i.i -} - -define <4 x i32> @test_vmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { -; CHECK: test_vmlal_high_s16: -; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - %add.i.i = add <4 x i32> %vmull2.i.i.i, %a - ret <4 x i32> %add.i.i -} - -define <2 x i64> @test_vmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { -; CHECK: test_vmlal_high_s32: -; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - %add.i.i = add <2 x i64> %vmull2.i.i.i, %a - ret <2 x i64> %add.i.i -} - -define <8 x i16> @test_vmlal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { -; CHECK: test_vmlal_high_u8: -; CHECK: umlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> - %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> - %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) - %add.i.i = add <8 x i16> %vmull.i.i.i, %a - ret <8 x i16> %add.i.i -} - -define <4 x i32> @test_vmlal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { -; CHECK: test_vmlal_high_u16: -; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - %add.i.i = add <4 x i32> %vmull2.i.i.i, %a - ret <4 x i32> %add.i.i -} - -define <2 x i64> @test_vmlal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { -; CHECK: test_vmlal_high_u32: -; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - %add.i.i = add <2 x i64> %vmull2.i.i.i, %a - ret <2 x i64> %add.i.i -} - -define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { -; CHECK: test_vmlsl_s8: -; CHECK: smlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c) - %sub.i = sub <8 x i16> %a, %vmull.i.i - ret <8 x i16> %sub.i -} - -define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { -; CHECK: test_vmlsl_s16: -; CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c) - %sub.i = sub <4 x i32> %a, %vmull2.i.i - ret <4 x i32> %sub.i -} - -define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { -; CHECK: test_vmlsl_s32: -; CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c) - %sub.i = sub <2 x i64> %a, %vmull2.i.i - ret <2 x i64> %sub.i -} - -define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { -; CHECK: test_vmlsl_u8: -; CHECK: umlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c) - %sub.i = sub <8 x i16> %a, %vmull.i.i - ret <8 x i16> %sub.i -} - -define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { -; CHECK: test_vmlsl_u16: -; CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c) - %sub.i = sub <4 x i32> %a, %vmull2.i.i - ret <4 x i32> %sub.i -} - -define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { -; CHECK: test_vmlsl_u32: -; CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c) - %sub.i = sub <2 x i64> %a, %vmull2.i.i - ret <2 x i64> %sub.i -} - -define <8 x i16> @test_vmlsl_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { -; CHECK: test_vmlsl_high_s8: -; CHECK: smlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> - %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> - %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) - %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i - ret <8 x i16> %sub.i.i -} - -define <4 x i32> @test_vmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { -; CHECK: test_vmlsl_high_s16: -; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i - ret <4 x i32> %sub.i.i -} - -define <2 x i64> @test_vmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { -; CHECK: test_vmlsl_high_s32: -; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i - ret <2 x i64> %sub.i.i -} - -define <8 x i16> @test_vmlsl_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { -; CHECK: test_vmlsl_high_u8: -; CHECK: umlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> - %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> - %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) - %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i - ret <8 x i16> %sub.i.i -} - -define <4 x i32> @test_vmlsl_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { -; CHECK: test_vmlsl_high_u16: -; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i - ret <4 x i32> %sub.i.i -} - -define <2 x i64> @test_vmlsl_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { -; CHECK: test_vmlsl_high_u32: -; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i - ret <2 x i64> %sub.i.i -} - -define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vqdmull_s16: -; CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %b) - ret <4 x i32> %vqdmull2.i -} - -define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vqdmull_s32: -; CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %b) - ret <2 x i64> %vqdmull2.i -} - -define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { -; CHECK: test_vqdmlal_s16: -; CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) - %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) - ret <4 x i32> %vqdmlal4.i -} - -define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { -; CHECK: test_vqdmlal_s32: -; CHECK: sqdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) - %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) - ret <2 x i64> %vqdmlal4.i -} - -define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { -; CHECK: test_vqdmlsl_s16: -; CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) - %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) - ret <4 x i32> %vqdmlsl4.i -} - -define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { -; CHECK: test_vqdmlsl_s32: -; CHECK: sqdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) - %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) - ret <2 x i64> %vqdmlsl4.i -} - -define <4 x i32> @test_vqdmull_high_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vqdmull_high_s16: -; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %vqdmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - ret <4 x i32> %vqdmull2.i.i -} - -define <2 x i64> @test_vqdmull_high_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vqdmull_high_s32: -; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %vqdmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - ret <2 x i64> %vqdmull2.i.i -} - -define <4 x i32> @test_vqdmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { -; CHECK: test_vqdmlal_high_s16: -; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> - %vqdmlal2.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - %vqdmlal4.i.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i.i) - ret <4 x i32> %vqdmlal4.i.i -} - -define <2 x i64> @test_vqdmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { -; CHECK: test_vqdmlal_high_s32: -; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> - %vqdmlal2.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - %vqdmlal4.i.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i.i) - ret <2 x i64> %vqdmlal4.i.i -} - -define <4 x i32> @test_vqdmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { -; CHECK: test_vqdmlsl_high_s16: -; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> - %vqdmlsl2.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - %vqdmlsl4.i.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i.i) - ret <4 x i32> %vqdmlsl4.i.i -} - -define <2 x i64> @test_vqdmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { -; CHECK: test_vqdmlsl_high_s32: -; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> - %vqdmlsl2.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - %vqdmlsl4.i.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i.i) - ret <2 x i64> %vqdmlsl4.i.i -} - -define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vmull_p8: -; CHECK: pmull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %a, <8 x i8> %b) - ret <8 x i16> %vmull.i -} - -define <8 x i16> @test_vmull_high_p8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vmull_high_p8: -; CHECK: pmull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> - %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> - %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) - ret <8 x i16> %vmull.i.i -} - -define i128 @test_vmull_p64(i64 %a, i64 %b) #4 { -; CHECK: test_vmull_p64 -; CHECK: pmull {{v[0-9]+}}.1q, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d -entry: - %vmull.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vmull1.i = insertelement <1 x i64> undef, i64 %b, i32 0 - %vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.vmull.p64(<1 x i64> %vmull.i, <1 x i64> %vmull1.i) #1 - %vmull3.i = bitcast <16 x i8> %vmull2.i to i128 - ret i128 %vmull3.i -} - -define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #4 { -; CHECK: test_vmull_high_p64 -; CHECK: pmull2 {{v[0-9]+}}.1q, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %0 = extractelement <2 x i64> %a, i32 1 - %1 = extractelement <2 x i64> %b, i32 1 - %vmull.i.i = insertelement <1 x i64> undef, i64 %0, i32 0 - %vmull1.i.i = insertelement <1 x i64> undef, i64 %1, i32 0 - %vmull2.i.i = tail call <16 x i8> @llvm.aarch64.neon.vmull.p64(<1 x i64> %vmull.i.i, <1 x i64> %vmull1.i.i) #1 - %vmull3.i.i = bitcast <16 x i8> %vmull2.i.i to i128 - ret i128 %vmull3.i.i -} - -declare <16 x i8> @llvm.aarch64.neon.vmull.p64(<1 x i64>, <1 x i64>) #5 - - diff --git a/test/CodeGen/AArch64/neon-aba-abd.ll b/test/CodeGen/AArch64/neon-aba-abd.ll deleted file mode 100644 index 1fe52565afe1..000000000000 --- a/test/CodeGen/AArch64/neon-aba-abd.ll +++ /dev/null @@ -1,237 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; arm64 has copied test in its own directory (different intrinsic names). - -declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_uabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_uabd_v8i8: - %abd = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: uabd v0.8b, v0.8b, v1.8b - ret <8 x i8> %abd -} - -define <8 x i8> @test_uaba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_uaba_v8i8: - %abd = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) - %aba = add <8 x i8> %lhs, %abd -; CHECK: uaba v0.8b, v0.8b, v1.8b - ret <8 x i8> %aba -} - -define <8 x i8> @test_sabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_sabd_v8i8: - %abd = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: sabd v0.8b, v0.8b, v1.8b - ret <8 x i8> %abd -} - -define <8 x i8> @test_saba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_saba_v8i8: - %abd = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) - %aba = add <8 x i8> %lhs, %abd -; CHECK: saba v0.8b, v0.8b, v1.8b - ret <8 x i8> %aba -} - -declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_uabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_uabd_v16i8: - %abd = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: uabd v0.16b, v0.16b, v1.16b - ret <16 x i8> %abd -} - -define <16 x i8> @test_uaba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_uaba_v16i8: - %abd = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) - %aba = add <16 x i8> %lhs, %abd -; CHECK: uaba v0.16b, v0.16b, v1.16b - ret <16 x i8> %aba -} - -define <16 x i8> @test_sabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_sabd_v16i8: - %abd = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: sabd v0.16b, v0.16b, v1.16b - ret <16 x i8> %abd -} - -define <16 x i8> @test_saba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_saba_v16i8: - %abd = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) - %aba = add <16 x i8> %lhs, %abd -; CHECK: saba v0.16b, v0.16b, v1.16b - ret <16 x i8> %aba -} - -declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_uabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_uabd_v4i16: - %abd = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: uabd v0.4h, v0.4h, v1.4h - ret <4 x i16> %abd -} - -define <4 x i16> @test_uaba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_uaba_v4i16: - %abd = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) - %aba = add <4 x i16> %lhs, %abd -; CHECK: uaba v0.4h, v0.4h, v1.4h - ret <4 x i16> %aba -} - -define <4 x i16> @test_sabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_sabd_v4i16: - %abd = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: sabd v0.4h, v0.4h, v1.4h - ret <4 x i16> %abd -} - -define <4 x i16> @test_saba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_saba_v4i16: - %abd = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) - %aba = add <4 x i16> %lhs, %abd -; CHECK: saba v0.4h, v0.4h, v1.4h - ret <4 x i16> %aba -} - -declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_uabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_uabd_v8i16: - %abd = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: uabd v0.8h, v0.8h, v1.8h - ret <8 x i16> %abd -} - -define <8 x i16> @test_uaba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_uaba_v8i16: - %abd = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) - %aba = add <8 x i16> %lhs, %abd -; CHECK: uaba v0.8h, v0.8h, v1.8h - ret <8 x i16> %aba -} - -define <8 x i16> @test_sabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_sabd_v8i16: - %abd = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: sabd v0.8h, v0.8h, v1.8h - ret <8 x i16> %abd -} - -define <8 x i16> @test_saba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_saba_v8i16: - %abd = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) - %aba = add <8 x i16> %lhs, %abd -; CHECK: saba v0.8h, v0.8h, v1.8h - ret <8 x i16> %aba -} - -declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_uabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_uabd_v2i32: - %abd = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: uabd v0.2s, v0.2s, v1.2s - ret <2 x i32> %abd -} - -define <2 x i32> @test_uaba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_uaba_v2i32: - %abd = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) - %aba = add <2 x i32> %lhs, %abd -; CHECK: uaba v0.2s, v0.2s, v1.2s - ret <2 x i32> %aba -} - -define <2 x i32> @test_sabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_sabd_v2i32: - %abd = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: sabd v0.2s, v0.2s, v1.2s - ret <2 x i32> %abd -} - -define <2 x i32> @test_sabd_v2i32_const() { -; CHECK: test_sabd_v2i32_const: -; CHECK: movi d1, #0xffffffff0000 -; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s - %1 = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32( - <2 x i32> , - <2 x i32> ) - ret <2 x i32> %1 -} - -define <2 x i32> @test_saba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_saba_v2i32: - %abd = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) - %aba = add <2 x i32> %lhs, %abd -; CHECK: saba v0.2s, v0.2s, v1.2s - ret <2 x i32> %aba -} - -declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_uabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_uabd_v4i32: - %abd = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: uabd v0.4s, v0.4s, v1.4s - ret <4 x i32> %abd -} - -define <4 x i32> @test_uaba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_uaba_v4i32: - %abd = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) - %aba = add <4 x i32> %lhs, %abd -; CHECK: uaba v0.4s, v0.4s, v1.4s - ret <4 x i32> %aba -} - -define <4 x i32> @test_sabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_sabd_v4i32: - %abd = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: sabd v0.4s, v0.4s, v1.4s - ret <4 x i32> %abd -} - -define <4 x i32> @test_saba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_saba_v4i32: - %abd = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) - %aba = add <4 x i32> %lhs, %abd -; CHECK: saba v0.4s, v0.4s, v1.4s - ret <4 x i32> %aba -} - -declare <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float>, <2 x float>) - -define <2 x float> @test_fabd_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; CHECK: test_fabd_v2f32: - %abd = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %lhs, <2 x float> %rhs) -; CHECK: fabd v0.2s, v0.2s, v1.2s - ret <2 x float> %abd -} - -declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>) - -define <4 x float> @test_fabd_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; CHECK: test_fabd_v4f32: - %abd = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %lhs, <4 x float> %rhs) -; CHECK: fabd v0.4s, v0.4s, v1.4s - ret <4 x float> %abd -} - -declare <2 x double> @llvm.arm.neon.vabds.v2f64(<2 x double>, <2 x double>) - -define <2 x double> @test_fabd_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; CHECK: test_fabd_v2f64: - %abd = call <2 x double> @llvm.arm.neon.vabds.v2f64(<2 x double> %lhs, <2 x double> %rhs) -; CHECK: fabd v0.2d, v0.2d, v1.2d - ret <2 x double> %abd -} diff --git a/test/CodeGen/AArch64/neon-across.ll b/test/CodeGen/AArch64/neon-across.ll deleted file mode 100644 index 98444d29a01a..000000000000 --- a/test/CodeGen/AArch64/neon-across.ll +++ /dev/null @@ -1,473 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has copied test in its own directory. - -declare float @llvm.aarch64.neon.vminnmv(<4 x float>) - -declare float @llvm.aarch64.neon.vmaxnmv(<4 x float>) - -declare float @llvm.aarch64.neon.vminv(<4 x float>) - -declare float @llvm.aarch64.neon.vmaxv(<4 x float>) - -declare <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v4i32(<4 x i32>) - -declare <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v8i16(<8 x i16>) - -declare <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v16i8(<16 x i8>) - -declare <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v4i16(<4 x i16>) - -declare <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v8i8(<8 x i8>) - -declare <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v4i32(<4 x i32>) - -declare <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v8i16(<8 x i16>) - -declare <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v16i8(<16 x i8>) - -declare <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v4i32(<4 x i32>) - -declare <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v8i16(<8 x i16>) - -declare <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v16i8(<16 x i8>) - -declare <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v4i16(<4 x i16>) - -declare <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v8i8(<8 x i8>) - -declare <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v4i16(<4 x i16>) - -declare <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v8i8(<8 x i8>) - -declare <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v4i32(<4 x i32>) - -declare <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v8i16(<8 x i16>) - -declare <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v16i8(<16 x i8>) - -declare <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v4i32(<4 x i32>) - -declare <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v8i16(<8 x i16>) - -declare <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v16i8(<16 x i8>) - -declare <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v4i16(<4 x i16>) - -declare <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v8i8(<8 x i8>) - -declare <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v4i16(<4 x i16>) - -declare <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v8i8(<8 x i8>) - -declare <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v4i32(<4 x i32>) - -declare <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v8i16(<8 x i16>) - -declare <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v16i8(<16 x i8>) - -declare <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v4i32(<4 x i32>) - -declare <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v8i16(<8 x i16>) - -declare <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v16i8(<16 x i8>) - -declare <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v4i16(<4 x i16>) - -declare <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v8i8(<8 x i8>) - -declare <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v4i16(<4 x i16>) - -declare <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v8i8(<8 x i8>) - -define i16 @test_vaddlv_s8(<8 x i8> %a) { -; CHECK: test_vaddlv_s8: -; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.8b -entry: - %saddlv.i = tail call <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v8i8(<8 x i8> %a) - %0 = extractelement <1 x i16> %saddlv.i, i32 0 - ret i16 %0 -} - -define i32 @test_vaddlv_s16(<4 x i16> %a) { -; CHECK: test_vaddlv_s16: -; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.4h -entry: - %saddlv.i = tail call <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v4i16(<4 x i16> %a) - %0 = extractelement <1 x i32> %saddlv.i, i32 0 - ret i32 %0 -} - -define i16 @test_vaddlv_u8(<8 x i8> %a) { -; CHECK: test_vaddlv_u8: -; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.8b -entry: - %uaddlv.i = tail call <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v8i8(<8 x i8> %a) - %0 = extractelement <1 x i16> %uaddlv.i, i32 0 - ret i16 %0 -} - -define i32 @test_vaddlv_u16(<4 x i16> %a) { -; CHECK: test_vaddlv_u16: -; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.4h -entry: - %uaddlv.i = tail call <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v4i16(<4 x i16> %a) - %0 = extractelement <1 x i32> %uaddlv.i, i32 0 - ret i32 %0 -} - -define i16 @test_vaddlvq_s8(<16 x i8> %a) { -; CHECK: test_vaddlvq_s8: -; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.16b -entry: - %saddlv.i = tail call <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v16i8(<16 x i8> %a) - %0 = extractelement <1 x i16> %saddlv.i, i32 0 - ret i16 %0 -} - -define i32 @test_vaddlvq_s16(<8 x i16> %a) { -; CHECK: test_vaddlvq_s16: -; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.8h -entry: - %saddlv.i = tail call <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v8i16(<8 x i16> %a) - %0 = extractelement <1 x i32> %saddlv.i, i32 0 - ret i32 %0 -} - -define i64 @test_vaddlvq_s32(<4 x i32> %a) { -; CHECK: test_vaddlvq_s32: -; CHECK: saddlv d{{[0-9]+}}, {{v[0-9]+}}.4s -entry: - %saddlv.i = tail call <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v4i32(<4 x i32> %a) - %0 = extractelement <1 x i64> %saddlv.i, i32 0 - ret i64 %0 -} - -define i16 @test_vaddlvq_u8(<16 x i8> %a) { -; CHECK: test_vaddlvq_u8: -; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.16b -entry: - %uaddlv.i = tail call <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v16i8(<16 x i8> %a) - %0 = extractelement <1 x i16> %uaddlv.i, i32 0 - ret i16 %0 -} - -define i32 @test_vaddlvq_u16(<8 x i16> %a) { -; CHECK: test_vaddlvq_u16: -; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.8h -entry: - %uaddlv.i = tail call <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v8i16(<8 x i16> %a) - %0 = extractelement <1 x i32> %uaddlv.i, i32 0 - ret i32 %0 -} - -define i64 @test_vaddlvq_u32(<4 x i32> %a) { -; CHECK: test_vaddlvq_u32: -; CHECK: uaddlv d{{[0-9]+}}, {{v[0-9]+}}.4s -entry: - %uaddlv.i = tail call <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v4i32(<4 x i32> %a) - %0 = extractelement <1 x i64> %uaddlv.i, i32 0 - ret i64 %0 -} - -define i8 @test_vmaxv_s8(<8 x i8> %a) { -; CHECK: test_vmaxv_s8: -; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.8b -entry: - %smaxv.i = tail call <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v8i8(<8 x i8> %a) - %0 = extractelement <1 x i8> %smaxv.i, i32 0 - ret i8 %0 -} - -define i16 @test_vmaxv_s16(<4 x i16> %a) { -; CHECK: test_vmaxv_s16: -; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.4h -entry: - %smaxv.i = tail call <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v4i16(<4 x i16> %a) - %0 = extractelement <1 x i16> %smaxv.i, i32 0 - ret i16 %0 -} - -define i8 @test_vmaxv_u8(<8 x i8> %a) { -; CHECK: test_vmaxv_u8: -; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.8b -entry: - %umaxv.i = tail call <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v8i8(<8 x i8> %a) - %0 = extractelement <1 x i8> %umaxv.i, i32 0 - ret i8 %0 -} - -define i16 @test_vmaxv_u16(<4 x i16> %a) { -; CHECK: test_vmaxv_u16: -; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.4h -entry: - %umaxv.i = tail call <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v4i16(<4 x i16> %a) - %0 = extractelement <1 x i16> %umaxv.i, i32 0 - ret i16 %0 -} - -define i8 @test_vmaxvq_s8(<16 x i8> %a) { -; CHECK: test_vmaxvq_s8: -; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.16b -entry: - %smaxv.i = tail call <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v16i8(<16 x i8> %a) - %0 = extractelement <1 x i8> %smaxv.i, i32 0 - ret i8 %0 -} - -define i16 @test_vmaxvq_s16(<8 x i16> %a) { -; CHECK: test_vmaxvq_s16: -; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.8h -entry: - %smaxv.i = tail call <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v8i16(<8 x i16> %a) - %0 = extractelement <1 x i16> %smaxv.i, i32 0 - ret i16 %0 -} - -define i32 @test_vmaxvq_s32(<4 x i32> %a) { -; CHECK: test_vmaxvq_s32: -; CHECK: smaxv s{{[0-9]+}}, {{v[0-9]+}}.4s -entry: - %smaxv.i = tail call <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v4i32(<4 x i32> %a) - %0 = extractelement <1 x i32> %smaxv.i, i32 0 - ret i32 %0 -} - -define i8 @test_vmaxvq_u8(<16 x i8> %a) { -; CHECK: test_vmaxvq_u8: -; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.16b -entry: - %umaxv.i = tail call <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v16i8(<16 x i8> %a) - %0 = extractelement <1 x i8> %umaxv.i, i32 0 - ret i8 %0 -} - -define i16 @test_vmaxvq_u16(<8 x i16> %a) { -; CHECK: test_vmaxvq_u16: -; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.8h -entry: - %umaxv.i = tail call <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v8i16(<8 x i16> %a) - %0 = extractelement <1 x i16> %umaxv.i, i32 0 - ret i16 %0 -} - -define i32 @test_vmaxvq_u32(<4 x i32> %a) { -; CHECK: test_vmaxvq_u32: -; CHECK: umaxv s{{[0-9]+}}, {{v[0-9]+}}.4s -entry: - %umaxv.i = tail call <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v4i32(<4 x i32> %a) - %0 = extractelement <1 x i32> %umaxv.i, i32 0 - ret i32 %0 -} - -define i8 @test_vminv_s8(<8 x i8> %a) { -; CHECK: test_vminv_s8: -; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.8b -entry: - %sminv.i = tail call <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v8i8(<8 x i8> %a) - %0 = extractelement <1 x i8> %sminv.i, i32 0 - ret i8 %0 -} - -define i16 @test_vminv_s16(<4 x i16> %a) { -; CHECK: test_vminv_s16: -; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.4h -entry: - %sminv.i = tail call <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v4i16(<4 x i16> %a) - %0 = extractelement <1 x i16> %sminv.i, i32 0 - ret i16 %0 -} - -define i8 @test_vminv_u8(<8 x i8> %a) { -; CHECK: test_vminv_u8: -; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.8b -entry: - %uminv.i = tail call <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v8i8(<8 x i8> %a) - %0 = extractelement <1 x i8> %uminv.i, i32 0 - ret i8 %0 -} - -define i16 @test_vminv_u16(<4 x i16> %a) { -; CHECK: test_vminv_u16: -; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.4h -entry: - %uminv.i = tail call <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v4i16(<4 x i16> %a) - %0 = extractelement <1 x i16> %uminv.i, i32 0 - ret i16 %0 -} - -define i8 @test_vminvq_s8(<16 x i8> %a) { -; CHECK: test_vminvq_s8: -; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.16b -entry: - %sminv.i = tail call <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v16i8(<16 x i8> %a) - %0 = extractelement <1 x i8> %sminv.i, i32 0 - ret i8 %0 -} - -define i16 @test_vminvq_s16(<8 x i16> %a) { -; CHECK: test_vminvq_s16: -; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.8h -entry: - %sminv.i = tail call <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v8i16(<8 x i16> %a) - %0 = extractelement <1 x i16> %sminv.i, i32 0 - ret i16 %0 -} - -define i32 @test_vminvq_s32(<4 x i32> %a) { -; CHECK: test_vminvq_s32: -; CHECK: sminv s{{[0-9]+}}, {{v[0-9]+}}.4s -entry: - %sminv.i = tail call <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v4i32(<4 x i32> %a) - %0 = extractelement <1 x i32> %sminv.i, i32 0 - ret i32 %0 -} - -define i8 @test_vminvq_u8(<16 x i8> %a) { -; CHECK: test_vminvq_u8: -; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.16b -entry: - %uminv.i = tail call <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v16i8(<16 x i8> %a) - %0 = extractelement <1 x i8> %uminv.i, i32 0 - ret i8 %0 -} - -define i16 @test_vminvq_u16(<8 x i16> %a) { -; CHECK: test_vminvq_u16: -; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.8h -entry: - %uminv.i = tail call <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v8i16(<8 x i16> %a) - %0 = extractelement <1 x i16> %uminv.i, i32 0 - ret i16 %0 -} - -define i32 @test_vminvq_u32(<4 x i32> %a) { -; CHECK: test_vminvq_u32: -; CHECK: uminv s{{[0-9]+}}, {{v[0-9]+}}.4s -entry: - %uminv.i = tail call <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v4i32(<4 x i32> %a) - %0 = extractelement <1 x i32> %uminv.i, i32 0 - ret i32 %0 -} - -define i8 @test_vaddv_s8(<8 x i8> %a) { -; CHECK: test_vaddv_s8: -; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b -entry: - %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v8i8(<8 x i8> %a) - %0 = extractelement <1 x i8> %vaddv.i, i32 0 - ret i8 %0 -} - -define i16 @test_vaddv_s16(<4 x i16> %a) { -; CHECK: test_vaddv_s16: -; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h -entry: - %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v4i16(<4 x i16> %a) - %0 = extractelement <1 x i16> %vaddv.i, i32 0 - ret i16 %0 -} - -define i8 @test_vaddv_u8(<8 x i8> %a) { -; CHECK: test_vaddv_u8: -; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b -entry: - %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v8i8(<8 x i8> %a) - %0 = extractelement <1 x i8> %vaddv.i, i32 0 - ret i8 %0 -} - -define i16 @test_vaddv_u16(<4 x i16> %a) { -; CHECK: test_vaddv_u16: -; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h -entry: - %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v4i16(<4 x i16> %a) - %0 = extractelement <1 x i16> %vaddv.i, i32 0 - ret i16 %0 -} - -define i8 @test_vaddvq_s8(<16 x i8> %a) { -; CHECK: test_vaddvq_s8: -; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b -entry: - %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v16i8(<16 x i8> %a) - %0 = extractelement <1 x i8> %vaddv.i, i32 0 - ret i8 %0 -} - -define i16 @test_vaddvq_s16(<8 x i16> %a) { -; CHECK: test_vaddvq_s16: -; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h -entry: - %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v8i16(<8 x i16> %a) - %0 = extractelement <1 x i16> %vaddv.i, i32 0 - ret i16 %0 -} - -define i32 @test_vaddvq_s32(<4 x i32> %a) { -; CHECK: test_vaddvq_s32: -; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s -entry: - %vaddv.i = tail call <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v4i32(<4 x i32> %a) - %0 = extractelement <1 x i32> %vaddv.i, i32 0 - ret i32 %0 -} - -define i8 @test_vaddvq_u8(<16 x i8> %a) { -; CHECK: test_vaddvq_u8: -; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b -entry: - %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v16i8(<16 x i8> %a) - %0 = extractelement <1 x i8> %vaddv.i, i32 0 - ret i8 %0 -} - -define i16 @test_vaddvq_u16(<8 x i16> %a) { -; CHECK: test_vaddvq_u16: -; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h -entry: - %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v8i16(<8 x i16> %a) - %0 = extractelement <1 x i16> %vaddv.i, i32 0 - ret i16 %0 -} - -define i32 @test_vaddvq_u32(<4 x i32> %a) { -; CHECK: test_vaddvq_u32: -; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s -entry: - %vaddv.i = tail call <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v4i32(<4 x i32> %a) - %0 = extractelement <1 x i32> %vaddv.i, i32 0 - ret i32 %0 -} - -define float @test_vmaxvq_f32(<4 x float> %a) { -; CHECK: test_vmaxvq_f32: -; CHECK: fmaxv s{{[0-9]+}}, {{v[0-9]+}}.4s -entry: - %0 = call float @llvm.aarch64.neon.vmaxv(<4 x float> %a) - ret float %0 -} - -define float @test_vminvq_f32(<4 x float> %a) { -; CHECK: test_vminvq_f32: -; CHECK: fminv s{{[0-9]+}}, {{v[0-9]+}}.4s -entry: - %0 = call float @llvm.aarch64.neon.vminv(<4 x float> %a) - ret float %0 -} - -define float @test_vmaxnmvq_f32(<4 x float> %a) { -; CHECK: test_vmaxnmvq_f32: -; CHECK: fmaxnmv s{{[0-9]+}}, {{v[0-9]+}}.4s -entry: - %0 = call float @llvm.aarch64.neon.vmaxnmv(<4 x float> %a) - ret float %0 -} - -define float @test_vminnmvq_f32(<4 x float> %a) { -; CHECK: test_vminnmvq_f32: -; CHECK: fminnmv s{{[0-9]+}}, {{v[0-9]+}}.4s -entry: - %0 = call float @llvm.aarch64.neon.vminnmv(<4 x float> %a) - ret float %0 -} - diff --git a/test/CodeGen/AArch64/neon-add-pairwise.ll b/test/CodeGen/AArch64/neon-add-pairwise.ll deleted file mode 100644 index d304094adb40..000000000000 --- a/test/CodeGen/AArch64/neon-add-pairwise.ll +++ /dev/null @@ -1,102 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; arm64 has a copy of this test in its own directory. - -declare <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_addp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; Using registers other than v0, v1 are possible, but would be odd. -; CHECK: test_addp_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: addp v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vpadd.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_addp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_addp_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vpadd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: addp v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_addp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_addp_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: addp v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -declare <8 x i16> @llvm.arm.neon.vpadd.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_addp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_addp_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vpadd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: addp v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_addp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_addp_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: addp v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vpadd.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_addp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_addp_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vpadd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: addp v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - - -declare <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64>, <2 x i64>) - -define <2 x i64> @test_addp_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_addp_v2i64: - %val = call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: addp v0.2d, v0.2d, v1.2d - ret <2 x i64> %val -} - -declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.arm.neon.vpadd.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.arm.neon.vpadd.v2f64(<2 x double>, <2 x double>) - -define <2 x float> @test_faddp_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; CHECK: test_faddp_v2f32: - %val = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %lhs, <2 x float> %rhs) -; CHECK: faddp v0.2s, v0.2s, v1.2s - ret <2 x float> %val -} - -define <4 x float> @test_faddp_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; CHECK: test_faddp_v4f32: - %val = call <4 x float> @llvm.arm.neon.vpadd.v4f32(<4 x float> %lhs, <4 x float> %rhs) -; CHECK: faddp v0.4s, v0.4s, v1.4s - ret <4 x float> %val -} - -define <2 x double> @test_faddp_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; CHECK: test_faddp_v2f64: - %val = call <2 x double> @llvm.arm.neon.vpadd.v2f64(<2 x double> %lhs, <2 x double> %rhs) -; CHECK: faddp v0.2d, v0.2d, v1.2d - ret <2 x double> %val -} - -define i32 @test_vaddv.v2i32(<2 x i32> %a) { -; CHECK-LABEL: test_vaddv.v2i32 -; CHECK: addp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %1 = tail call <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v2i32(<2 x i32> %a) - %2 = extractelement <1 x i32> %1, i32 0 - ret i32 %2 -} - -declare <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v2i32(<2 x i32>) diff --git a/test/CodeGen/AArch64/neon-add-sub.ll b/test/CodeGen/AArch64/neon-add-sub.ll deleted file mode 100644 index eebad4df106e..000000000000 --- a/test/CodeGen/AArch64/neon-add-sub.ll +++ /dev/null @@ -1,280 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has its own copy of this test - -define <8 x i8> @add8xi8(<8 x i8> %A, <8 x i8> %B) { -;CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b - %tmp3 = add <8 x i8> %A, %B; - ret <8 x i8> %tmp3 -} - -define <16 x i8> @add16xi8(<16 x i8> %A, <16 x i8> %B) { -;CHECK: add {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b - %tmp3 = add <16 x i8> %A, %B; - ret <16 x i8> %tmp3 -} - -define <4 x i16> @add4xi16(<4 x i16> %A, <4 x i16> %B) { -;CHECK: add {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h - %tmp3 = add <4 x i16> %A, %B; - ret <4 x i16> %tmp3 -} - -define <8 x i16> @add8xi16(<8 x i16> %A, <8 x i16> %B) { -;CHECK: add {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h - %tmp3 = add <8 x i16> %A, %B; - ret <8 x i16> %tmp3 -} - -define <2 x i32> @add2xi32(<2 x i32> %A, <2 x i32> %B) { -;CHECK: add {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %tmp3 = add <2 x i32> %A, %B; - ret <2 x i32> %tmp3 -} - -define <4 x i32> @add4x32(<4 x i32> %A, <4 x i32> %B) { -;CHECK: add {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s - %tmp3 = add <4 x i32> %A, %B; - ret <4 x i32> %tmp3 -} - -define <2 x i64> @add2xi64(<2 x i64> %A, <2 x i64> %B) { -;CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - %tmp3 = add <2 x i64> %A, %B; - ret <2 x i64> %tmp3 -} - -define <2 x float> @add2xfloat(<2 x float> %A, <2 x float> %B) { -;CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %tmp3 = fadd <2 x float> %A, %B; - ret <2 x float> %tmp3 -} - -define <4 x float> @add4xfloat(<4 x float> %A, <4 x float> %B) { -;CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s - %tmp3 = fadd <4 x float> %A, %B; - ret <4 x float> %tmp3 -} -define <2 x double> @add2xdouble(<2 x double> %A, <2 x double> %B) { -;CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - %tmp3 = fadd <2 x double> %A, %B; - ret <2 x double> %tmp3 -} - -define <8 x i8> @sub8xi8(<8 x i8> %A, <8 x i8> %B) { -;CHECK: sub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b - %tmp3 = sub <8 x i8> %A, %B; - ret <8 x i8> %tmp3 -} - -define <16 x i8> @sub16xi8(<16 x i8> %A, <16 x i8> %B) { -;CHECK: sub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b - %tmp3 = sub <16 x i8> %A, %B; - ret <16 x i8> %tmp3 -} - -define <4 x i16> @sub4xi16(<4 x i16> %A, <4 x i16> %B) { -;CHECK: sub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h - %tmp3 = sub <4 x i16> %A, %B; - ret <4 x i16> %tmp3 -} - -define <8 x i16> @sub8xi16(<8 x i16> %A, <8 x i16> %B) { -;CHECK: sub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h - %tmp3 = sub <8 x i16> %A, %B; - ret <8 x i16> %tmp3 -} - -define <2 x i32> @sub2xi32(<2 x i32> %A, <2 x i32> %B) { -;CHECK: sub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %tmp3 = sub <2 x i32> %A, %B; - ret <2 x i32> %tmp3 -} - -define <4 x i32> @sub4x32(<4 x i32> %A, <4 x i32> %B) { -;CHECK: sub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s - %tmp3 = sub <4 x i32> %A, %B; - ret <4 x i32> %tmp3 -} - -define <2 x i64> @sub2xi64(<2 x i64> %A, <2 x i64> %B) { -;CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - %tmp3 = sub <2 x i64> %A, %B; - ret <2 x i64> %tmp3 -} - -define <2 x float> @sub2xfloat(<2 x float> %A, <2 x float> %B) { -;CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %tmp3 = fsub <2 x float> %A, %B; - ret <2 x float> %tmp3 -} - -define <4 x float> @sub4xfloat(<4 x float> %A, <4 x float> %B) { -;CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s - %tmp3 = fsub <4 x float> %A, %B; - ret <4 x float> %tmp3 -} -define <2 x double> @sub2xdouble(<2 x double> %A, <2 x double> %B) { -;CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - %tmp3 = fsub <2 x double> %A, %B; - ret <2 x double> %tmp3 -} - -define <1 x double> @test_vadd_f64(<1 x double> %a, <1 x double> %b) { -; CHECK-LABEL: test_vadd_f64 -; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = fadd <1 x double> %a, %b - ret <1 x double> %1 -} - -define <1 x double> @test_vmul_f64(<1 x double> %a, <1 x double> %b) { -; CHECK-LABEL: test_vmul_f64 -; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = fmul <1 x double> %a, %b - ret <1 x double> %1 -} - -define <1 x double> @test_vdiv_f64(<1 x double> %a, <1 x double> %b) { -; CHECK-LABEL: test_vdiv_f64 -; CHECK: fdiv d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = fdiv <1 x double> %a, %b - ret <1 x double> %1 -} - -define <1 x double> @test_vmla_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) { -; CHECK-LABEL: test_vmla_f64 -; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} -; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = fmul <1 x double> %b, %c - %2 = fadd <1 x double> %1, %a - ret <1 x double> %2 -} - -define <1 x double> @test_vmls_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) { -; CHECK-LABEL: test_vmls_f64 -; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} -; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = fmul <1 x double> %b, %c - %2 = fsub <1 x double> %a, %1 - ret <1 x double> %2 -} - -define <1 x double> @test_vfms_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) { -; CHECK-LABEL: test_vfms_f64 -; CHECK: fmsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = fsub <1 x double> , %b - %2 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %1, <1 x double> %c, <1 x double> %a) - ret <1 x double> %2 -} - -define <1 x double> @test_vfma_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) { -; CHECK-LABEL: test_vfma_f64 -; CHECK: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vsub_f64(<1 x double> %a, <1 x double> %b) { -; CHECK-LABEL: test_vsub_f64 -; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = fsub <1 x double> %a, %b - ret <1 x double> %1 -} - -define <1 x double> @test_vabd_f64(<1 x double> %a, <1 x double> %b) { -; CHECK-LABEL: test_vabd_f64 -; CHECK: fabd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.arm.neon.vabds.v1f64(<1 x double> %a, <1 x double> %b) - ret <1 x double> %1 -} - -define <1 x double> @test_vmax_f64(<1 x double> %a, <1 x double> %b) { -; CHECK-LABEL: test_vmax_f64 -; CHECK: fmax d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.arm.neon.vmaxs.v1f64(<1 x double> %a, <1 x double> %b) - ret <1 x double> %1 -} - -define <1 x double> @test_vmin_f64(<1 x double> %a, <1 x double> %b) { -; CHECK-LABEL: test_vmin_f64 -; CHECK: fmin d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.arm.neon.vmins.v1f64(<1 x double> %a, <1 x double> %b) - ret <1 x double> %1 -} - -define <1 x double> @test_vmaxnm_f64(<1 x double> %a, <1 x double> %b) { -; CHECK-LABEL: test_vmaxnm_f64 -; CHECK: fmaxnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.aarch64.neon.vmaxnm.v1f64(<1 x double> %a, <1 x double> %b) - ret <1 x double> %1 -} - -define <1 x double> @test_vminnm_f64(<1 x double> %a, <1 x double> %b) { -; CHECK-LABEL: test_vminnm_f64 -; CHECK: fminnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.aarch64.neon.vminnm.v1f64(<1 x double> %a, <1 x double> %b) - ret <1 x double> %1 -} - -define <1 x double> @test_vabs_f64(<1 x double> %a) { -; CHECK-LABEL: test_vabs_f64 -; CHECK: fabs d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.fabs.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vneg_f64(<1 x double> %a) { -; CHECK-LABEL: test_vneg_f64 -; CHECK: fneg d{{[0-9]+}}, d{{[0-9]+}} - %1 = fsub <1 x double> , %a - ret <1 x double> %1 -} - -declare <1 x double> @llvm.fabs.v1f64(<1 x double>) -declare <1 x double> @llvm.aarch64.neon.vminnm.v1f64(<1 x double>, <1 x double>) -declare <1 x double> @llvm.aarch64.neon.vmaxnm.v1f64(<1 x double>, <1 x double>) -declare <1 x double> @llvm.arm.neon.vmins.v1f64(<1 x double>, <1 x double>) -declare <1 x double> @llvm.arm.neon.vmaxs.v1f64(<1 x double>, <1 x double>) -declare <1 x double> @llvm.arm.neon.vabds.v1f64(<1 x double>, <1 x double>) -declare <1 x double> @llvm.fma.v1f64(<1 x double>, <1 x double>, <1 x double>) - -define <1 x i8> @test_add_v1i8(<1 x i8> %a, <1 x i8> %b) { -;CHECK-LABEL: test_add_v1i8: -;CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b - %c = add <1 x i8> %a, %b - ret <1 x i8> %c -} - -define <1 x i16> @test_add_v1i16(<1 x i16> %a, <1 x i16> %b) { -;CHECK-LABEL: test_add_v1i16: -;CHECK: add {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h - %c = add <1 x i16> %a, %b - ret <1 x i16> %c -} - -define <1 x i32> @test_add_v1i32(<1 x i32> %a, <1 x i32> %b) { -;CHECK-LABEL: test_add_v1i32: -;CHECK: add {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %c = add <1 x i32> %a, %b - ret <1 x i32> %c -} - -define <1 x i8> @test_sub_v1i8(<1 x i8> %a, <1 x i8> %b) { -;CHECK-LABEL: test_sub_v1i8: -;CHECK: sub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b - %c = sub <1 x i8> %a, %b - ret <1 x i8> %c -} - -define <1 x i16> @test_sub_v1i16(<1 x i16> %a, <1 x i16> %b) { -;CHECK-LABEL: test_sub_v1i16: -;CHECK: sub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h - %c = sub <1 x i16> %a, %b - ret <1 x i16> %c -} - -define <1 x i32> @test_sub_v1i32(<1 x i32> %a, <1 x i32> %b) { -;CHECK-LABEL: test_sub_v1i32: -;CHECK: sub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %c = sub <1 x i32> %a, %b - ret <1 x i32> %c -} diff --git a/test/CodeGen/AArch64/neon-bitcast.ll b/test/CodeGen/AArch64/neon-bitcast.ll index 25819b379322..b70cda3175ad 100644 --- a/test/CodeGen/AArch64/neon-bitcast.ll +++ b/test/CodeGen/AArch64/neon-bitcast.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s ; From <8 x i8> diff --git a/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/test/CodeGen/AArch64/neon-bitwise-instructions.ll index 228a6bfdf5dd..dfaf1f251792 100644 --- a/test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ b/test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i8> @and8xi8(<8 x i8> %a, <8 x i8> %b) { diff --git a/test/CodeGen/AArch64/neon-bsl.ll b/test/CodeGen/AArch64/neon-bsl.ll deleted file mode 100644 index 3182b700d8f7..000000000000 --- a/test/CodeGen/AArch64/neon-bsl.ll +++ /dev/null @@ -1,237 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has no equivalent vbsl intrinsic, always using the and/or IR. The final -; two tests are duplicated by ARM64's vselect.ll test. - -declare <2 x double> @llvm.arm.neon.vbsl.v2f64(<2 x double>, <2 x double>, <2 x double>) - -declare <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) - -declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) - -declare <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float>, <4 x float>, <4 x float>) - -declare <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) - -declare <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) - -declare <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) - -declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) - -declare <1 x double> @llvm.arm.neon.vbsl.v1f64(<1 x double>, <1 x double>, <1 x double>) - -declare <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float>, <2 x float>, <2 x float>) - -declare <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64>, <1 x i64>, <1 x i64>) - -declare <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) - -define <8 x i8> @test_vbsl_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) { -; CHECK-LABEL: test_vbsl_s8: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) - ret <8 x i8> %vbsl.i -} - -define <8 x i8> @test_vbsl_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) { -; CHECK-LABEL: test_vbsl_s16: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) - %0 = bitcast <4 x i16> %vbsl3.i to <8 x i8> - ret <8 x i8> %0 -} - -define <2 x i32> @test_vbsl_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) { -; CHECK-LABEL: test_vbsl_s32: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) - ret <2 x i32> %vbsl3.i -} - -define <1 x i64> @test_vbsl_s64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) { -; CHECK-LABEL: test_vbsl_s64: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) - ret <1 x i64> %vbsl3.i -} - -define <8 x i8> @test_vbsl_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) { -; CHECK-LABEL: test_vbsl_u8: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) - ret <8 x i8> %vbsl.i -} - -define <4 x i16> @test_vbsl_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) { -; CHECK-LABEL: test_vbsl_u16: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) - ret <4 x i16> %vbsl3.i -} - -define <2 x i32> @test_vbsl_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) { -; CHECK-LABEL: test_vbsl_u32: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) - ret <2 x i32> %vbsl3.i -} - -define <1 x i64> @test_vbsl_u64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) { -; CHECK-LABEL: test_vbsl_u64: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) - ret <1 x i64> %vbsl3.i -} - -define <2 x float> @test_vbsl_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) { -; CHECK-LABEL: test_vbsl_f32: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vbsl3.i = tail call <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) - ret <2 x float> %vbsl3.i -} - -define <1 x double> @test_vbsl_f64(<1 x i64> %v1, <1 x double> %v2, <1 x double> %v3) { -; CHECK-LABEL: test_vbsl_f64: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vbsl.i = bitcast <1 x i64> %v1 to <1 x double> - %vbsl3.i = tail call <1 x double> @llvm.arm.neon.vbsl.v1f64(<1 x double> %vbsl.i, <1 x double> %v2, <1 x double> %v3) - ret <1 x double> %vbsl3.i -} - -define <8 x i8> @test_vbsl_p8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) { -; CHECK-LABEL: test_vbsl_p8: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) - ret <8 x i8> %vbsl.i -} - -define <4 x i16> @test_vbsl_p16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) { -; CHECK-LABEL: test_vbsl_p16: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) - ret <4 x i16> %vbsl3.i -} - -define <16 x i8> @test_vbslq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) { -; CHECK-LABEL: test_vbslq_s8: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) - ret <16 x i8> %vbsl.i -} - -define <8 x i16> @test_vbslq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) { -; CHECK-LABEL: test_vbslq_s16: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) - ret <8 x i16> %vbsl3.i -} - -define <4 x i32> @test_vbslq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { -; CHECK-LABEL: test_vbslq_s32: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) - ret <4 x i32> %vbsl3.i -} - -define <2 x i64> @test_vbslq_s64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) { -; CHECK-LABEL: test_vbslq_s64: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) - ret <2 x i64> %vbsl3.i -} - -define <16 x i8> @test_vbslq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) { -; CHECK-LABEL: test_vbslq_u8: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) - ret <16 x i8> %vbsl.i -} - -define <8 x i16> @test_vbslq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) { -; CHECK-LABEL: test_vbslq_u16: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) - ret <8 x i16> %vbsl3.i -} - -define <4 x i32> @test_vbslq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { -; CHECK-LABEL: test_vbslq_u32: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) - ret <4 x i32> %vbsl3.i -} - -define <2 x i64> @test_vbslq_u64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) { -; CHECK-LABEL: test_vbslq_u64: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) - ret <2 x i64> %vbsl3.i -} - -define <4 x float> @test_vbslq_f32(<4 x i32> %v1, <4 x float> %v2, <4 x float> %v3) { -; CHECK-LABEL: test_vbslq_f32: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %vbsl.i = bitcast <4 x i32> %v1 to <4 x float> - %vbsl3.i = tail call <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float> %vbsl.i, <4 x float> %v2, <4 x float> %v3) - ret <4 x float> %vbsl3.i -} - -define <16 x i8> @test_vbslq_p8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) { -; CHECK-LABEL: test_vbslq_p8: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) - ret <16 x i8> %vbsl.i -} - -define <8 x i16> @test_vbslq_p16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) { -; CHECK-LABEL: test_vbslq_p16: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) - ret <8 x i16> %vbsl3.i -} - -define <2 x double> @test_vbslq_f64(<2 x i64> %v1, <2 x double> %v2, <2 x double> %v3) { -; CHECK-LABEL: test_vbslq_f64: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %vbsl.i = bitcast <2 x i64> %v1 to <2 x double> - %vbsl3.i = tail call <2 x double> @llvm.arm.neon.vbsl.v2f64(<2 x double> %vbsl.i, <2 x double> %v2, <2 x double> %v3) - ret <2 x double> %vbsl3.i -} - -define <2 x double> @test_bsl_v2f64(<2 x i1> %v1, <2 x double> %v2, <2 x double> %v3) { -; CHECK-LABEL: test_bsl_v2f64: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b - %1 = select <2 x i1> %v1, <2 x double> %v2, <2 x double> %v3 - ret <2 x double> %1 -} - -define <4 x float> @test_bsl_v4f32(<4 x i1> %v1, <4 x float> %v2, <4 x float> %v3) { -; CHECK-LABEL: test_bsl_v4f32: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b - %1 = select <4 x i1> %v1, <4 x float> %v2, <4 x float> %v3 - ret <4 x float> %1 -} diff --git a/test/CodeGen/AArch64/neon-compare-instructions.ll b/test/CodeGen/AArch64/neon-compare-instructions.ll index e029cfcf3394..b99057ebf2bc 100644 --- a/test/CodeGen/AArch64/neon-compare-instructions.ll +++ b/test/CodeGen/AArch64/neon-compare-instructions.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s define <8 x i8> @cmeq8xi8(<8 x i8> %A, <8 x i8> %B) { diff --git a/test/CodeGen/AArch64/neon-copy.ll b/test/CodeGen/AArch64/neon-copy.ll deleted file mode 100644 index 096018ab886a..000000000000 --- a/test/CodeGen/AArch64/neon-copy.ll +++ /dev/null @@ -1,1402 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s -; arm64 has copied equivalent test due to intrinsics. - -define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) { -;CHECK: ins {{v[0-9]+}}.b[15], {{w[0-9]+}} - %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15 - ret <16 x i8> %tmp3 -} - -define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) { -;CHECK: ins {{v[0-9]+}}.h[6], {{w[0-9]+}} - %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6 - ret <8 x i16> %tmp3 -} - -define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) { -;CHECK: ins {{v[0-9]+}}.s[2], {{w[0-9]+}} - %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2 - ret <4 x i32> %tmp3 -} - -define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) { -;CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}} - %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1 - ret <2 x i64> %tmp3 -} - -define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) { -;CHECK: ins {{v[0-9]+}}.b[5], {{w[0-9]+}} - %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5 - ret <8 x i8> %tmp3 -} - -define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) { -;CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}} - %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3 - ret <4 x i16> %tmp3 -} - -define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) { -;CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}} - %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1 - ret <2 x i32> %tmp3 -} - -define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) { -;CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2] - %tmp3 = extractelement <16 x i8> %tmp1, i32 2 - %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 - ret <16 x i8> %tmp4 -} - -define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) { -;CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2] - %tmp3 = extractelement <8 x i16> %tmp1, i32 2 - %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 - ret <8 x i16> %tmp4 -} - -define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) { -;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] - %tmp3 = extractelement <4 x i32> %tmp1, i32 2 - %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 - ret <4 x i32> %tmp4 -} - -define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) { -;CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] - %tmp3 = extractelement <2 x i64> %tmp1, i32 0 - %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 - ret <2 x i64> %tmp4 -} - -define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) { -;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] - %tmp3 = extractelement <4 x float> %tmp1, i32 2 - %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 - ret <4 x float> %tmp4 -} - -define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) { -;CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] - %tmp3 = extractelement <2 x double> %tmp1, i32 0 - %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 - ret <2 x double> %tmp4 -} - -define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) { -;CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2] - %tmp3 = extractelement <8 x i8> %tmp1, i32 2 - %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 - ret <16 x i8> %tmp4 -} - -define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) { -;CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2] - %tmp3 = extractelement <4 x i16> %tmp1, i32 2 - %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 - ret <8 x i16> %tmp4 -} - -define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) { -;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1] - %tmp3 = extractelement <2 x i32> %tmp1, i32 1 - %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 - ret <4 x i32> %tmp4 -} - -define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) { -;CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] - %tmp3 = extractelement <1 x i64> %tmp1, i32 0 - %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 - ret <2 x i64> %tmp4 -} - -define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) { -;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1] - %tmp3 = extractelement <2 x float> %tmp1, i32 1 - %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 - ret <4 x float> %tmp4 -} - -define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) { -;CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] - %tmp3 = extractelement <1 x double> %tmp1, i32 0 - %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 - ret <2 x double> %tmp4 -} - -define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) { -;CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[2] - %tmp3 = extractelement <16 x i8> %tmp1, i32 2 - %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7 - ret <8 x i8> %tmp4 -} - -define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) { -;CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2] - %tmp3 = extractelement <8 x i16> %tmp1, i32 2 - %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 - ret <4 x i16> %tmp4 -} - -define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) { -;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] - %tmp3 = extractelement <4 x i32> %tmp1, i32 2 - %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 - ret <2 x i32> %tmp4 -} - -define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) { -;CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] - %tmp3 = extractelement <2 x i64> %tmp1, i32 0 - %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 - ret <1 x i64> %tmp4 -} - -define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) { -;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] - %tmp3 = extractelement <4 x float> %tmp1, i32 2 - %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 - ret <2 x float> %tmp4 -} - -define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) { -;CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] - %tmp3 = extractelement <2 x double> %tmp1, i32 0 - %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0 - ret <1 x double> %tmp4 -} - -define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) { -;CHECK: ins {{v[0-9]+}}.b[4], {{v[0-9]+}}.b[2] - %tmp3 = extractelement <8 x i8> %tmp1, i32 2 - %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4 - ret <8 x i8> %tmp4 -} - -define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) { -;CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2] - %tmp3 = extractelement <4 x i16> %tmp1, i32 2 - %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 - ret <4 x i16> %tmp4 -} - -define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) { -;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] - %tmp3 = extractelement <2 x i32> %tmp1, i32 0 - %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 - ret <2 x i32> %tmp4 -} - -define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) { -;CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] - %tmp3 = extractelement <1 x i64> %tmp1, i32 0 - %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 - ret <1 x i64> %tmp4 -} - -define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) { -;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] - %tmp3 = extractelement <2 x float> %tmp1, i32 0 - %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 - ret <2 x float> %tmp4 -} - -define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) { -;CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] - %tmp3 = extractelement <1 x double> %tmp1, i32 0 - %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0 - ret <1 x double> %tmp4 -} - -define i32 @umovw16b(<16 x i8> %tmp1) { -;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[8] - %tmp3 = extractelement <16 x i8> %tmp1, i32 8 - %tmp4 = zext i8 %tmp3 to i32 - ret i32 %tmp4 -} - -define i32 @umovw8h(<8 x i16> %tmp1) { -;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2] - %tmp3 = extractelement <8 x i16> %tmp1, i32 2 - %tmp4 = zext i16 %tmp3 to i32 - ret i32 %tmp4 -} - -define i32 @umovw4s(<4 x i32> %tmp1) { -;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.s[2] - %tmp3 = extractelement <4 x i32> %tmp1, i32 2 - ret i32 %tmp3 -} - -define i64 @umovx2d(<2 x i64> %tmp1) { -;CHECK: umov {{x[0-9]+}}, {{v[0-9]+}}.d[0] - %tmp3 = extractelement <2 x i64> %tmp1, i32 0 - ret i64 %tmp3 -} - -define i32 @umovw8b(<8 x i8> %tmp1) { -;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[7] - %tmp3 = extractelement <8 x i8> %tmp1, i32 7 - %tmp4 = zext i8 %tmp3 to i32 - ret i32 %tmp4 -} - -define i32 @umovw4h(<4 x i16> %tmp1) { -;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2] - %tmp3 = extractelement <4 x i16> %tmp1, i32 2 - %tmp4 = zext i16 %tmp3 to i32 - ret i32 %tmp4 -} - -define i32 @umovw2s(<2 x i32> %tmp1) { -;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.s[1] - %tmp3 = extractelement <2 x i32> %tmp1, i32 1 - ret i32 %tmp3 -} - -define i64 @umovx1d(<1 x i64> %tmp1) { -;CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} - %tmp3 = extractelement <1 x i64> %tmp1, i32 0 - ret i64 %tmp3 -} - -define i32 @smovw16b(<16 x i8> %tmp1) { -;CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[8] - %tmp3 = extractelement <16 x i8> %tmp1, i32 8 - %tmp4 = sext i8 %tmp3 to i32 - %tmp5 = add i32 5, %tmp4 - ret i32 %tmp5 -} - -define i32 @smovw8h(<8 x i16> %tmp1) { -;CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2] - %tmp3 = extractelement <8 x i16> %tmp1, i32 2 - %tmp4 = sext i16 %tmp3 to i32 - %tmp5 = add i32 5, %tmp4 - ret i32 %tmp5 -} - -define i32 @smovx16b(<16 x i8> %tmp1) { -;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8] - %tmp3 = extractelement <16 x i8> %tmp1, i32 8 - %tmp4 = sext i8 %tmp3 to i32 - ret i32 %tmp4 -} - -define i32 @smovx8h(<8 x i16> %tmp1) { -;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2] - %tmp3 = extractelement <8 x i16> %tmp1, i32 2 - %tmp4 = sext i16 %tmp3 to i32 - ret i32 %tmp4 -} - -define i64 @smovx4s(<4 x i32> %tmp1) { -;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2] - %tmp3 = extractelement <4 x i32> %tmp1, i32 2 - %tmp4 = sext i32 %tmp3 to i64 - ret i64 %tmp4 -} - -define i32 @smovw8b(<8 x i8> %tmp1) { -;CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[4] - %tmp3 = extractelement <8 x i8> %tmp1, i32 4 - %tmp4 = sext i8 %tmp3 to i32 - %tmp5 = add i32 5, %tmp4 - ret i32 %tmp5 -} - -define i32 @smovw4h(<4 x i16> %tmp1) { -;CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2] - %tmp3 = extractelement <4 x i16> %tmp1, i32 2 - %tmp4 = sext i16 %tmp3 to i32 - %tmp5 = add i32 5, %tmp4 - ret i32 %tmp5 -} - -define i32 @smovx8b(<8 x i8> %tmp1) { -;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[6] - %tmp3 = extractelement <8 x i8> %tmp1, i32 6 - %tmp4 = sext i8 %tmp3 to i32 - ret i32 %tmp4 -} - -define i32 @smovx4h(<4 x i16> %tmp1) { -;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2] - %tmp3 = extractelement <4 x i16> %tmp1, i32 2 - %tmp4 = sext i16 %tmp3 to i32 - ret i32 %tmp4 -} - -define i64 @smovx2s(<2 x i32> %tmp1) { -;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1] - %tmp3 = extractelement <2 x i32> %tmp1, i32 1 - %tmp4 = sext i32 %tmp3 to i64 - ret i64 %tmp4 -} - -define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) { -;CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] - %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> - ret <8 x i8> %vset_lane -} - -define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) { -;CHECK: ins {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6] - %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> - ret <16 x i8> %vset_lane -} - -define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) { -;CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0] - %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> - ret <8 x i8> %vset_lane -} - -define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) { -;CHECK: ins {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15] - %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> - ret <16 x i8> %vset_lane -} - -define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 { -;CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} - %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0 - %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1 - %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2 - %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3 - %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4 - %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5 - %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6 - %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7 - ret <8 x i8> %vecinit7.i -} - -define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 { -;CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} - %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0 - %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1 - %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2 - %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3 - ret <4 x i16> %vecinit3.i -} - -define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 { -;CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}} - %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0 - %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1 - ret <2 x i32> %vecinit1.i -} - -define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 { -;CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} - %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0 - ret <1 x i64> %vecinit.i -} - -define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 { -;CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}} - %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0 - %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1 - %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2 - %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3 - %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4 - %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5 - %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6 - %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7 - %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8 - %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9 - %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10 - %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11 - %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12 - %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13 - %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14 - %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15 - ret <16 x i8> %vecinit15.i -} - -define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 { -;CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} - %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0 - %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1 - %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2 - %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3 - %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4 - %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5 - %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6 - %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7 - ret <8 x i16> %vecinit7.i -} - -define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 { -;CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}} - %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0 - %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1 - %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2 - %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3 - ret <4 x i32> %vecinit3.i -} - -define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 { -;CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}} - %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0 - %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1 - ret <2 x i64> %vecinit1.i -} - -define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 { -;CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] - %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> - ret <8 x i8> %shuffle -} - -define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 { -;CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] - %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> - ret <4 x i16> %shuffle -} - -define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 { -;CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] - %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> - ret <2 x i32> %shuffle -} - -define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 { -;CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] - %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> - ret <16 x i8> %shuffle -} - -define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 { -;CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] - %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> - ret <8 x i16> %shuffle -} - -define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 { -;CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] - %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> - ret <4 x i32> %shuffle -} - -define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 { -;CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] - %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer - ret <2 x i64> %shuffle -} - -define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 { -;CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] - %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> - ret <8 x i8> %shuffle -} - -define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 { -;CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] - %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> - ret <4 x i16> %shuffle -} - -define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 { -;CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] - %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> - ret <2 x i32> %shuffle -} - -define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 { -;CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] - %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> - ret <16 x i8> %shuffle -} - -define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 { -;CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] - %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> - ret <8 x i16> %shuffle -} - -define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 { -;CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] - %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> - ret <4 x i32> %shuffle -} - -define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 { -;CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] - %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer - ret <2 x i64> %shuffle -} - -define i64 @test_bitcastv8i8toi64(<8 x i8> %in) { -; CHECK-LABEL: test_bitcastv8i8toi64: - %res = bitcast <8 x i8> %in to i64 -; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} - ret i64 %res -} - -define i64 @test_bitcastv4i16toi64(<4 x i16> %in) { -; CHECK-LABEL: test_bitcastv4i16toi64: - %res = bitcast <4 x i16> %in to i64 -; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} - ret i64 %res -} - -define i64 @test_bitcastv2i32toi64(<2 x i32> %in) { -; CHECK-LABEL: test_bitcastv2i32toi64: - %res = bitcast <2 x i32> %in to i64 -; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} - ret i64 %res -} - -define i64 @test_bitcastv2f32toi64(<2 x float> %in) { -; CHECK-LABEL: test_bitcastv2f32toi64: - %res = bitcast <2 x float> %in to i64 -; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} - ret i64 %res -} - -define i64 @test_bitcastv1i64toi64(<1 x i64> %in) { -; CHECK-LABEL: test_bitcastv1i64toi64: - %res = bitcast <1 x i64> %in to i64 -; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} - ret i64 %res -} - -define i64 @test_bitcastv1f64toi64(<1 x double> %in) { -; CHECK-LABEL: test_bitcastv1f64toi64: - %res = bitcast <1 x double> %in to i64 -; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} - ret i64 %res -} - -define <8 x i8> @test_bitcasti64tov8i8(i64 %in) { -; CHECK-LABEL: test_bitcasti64tov8i8: - %res = bitcast i64 %in to <8 x i8> -; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} - ret <8 x i8> %res -} - -define <4 x i16> @test_bitcasti64tov4i16(i64 %in) { -; CHECK-LABEL: test_bitcasti64tov4i16: - %res = bitcast i64 %in to <4 x i16> -; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} - ret <4 x i16> %res -} - -define <2 x i32> @test_bitcasti64tov2i32(i64 %in) { -; CHECK-LABEL: test_bitcasti64tov2i32: - %res = bitcast i64 %in to <2 x i32> -; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} - ret <2 x i32> %res -} - -define <2 x float> @test_bitcasti64tov2f32(i64 %in) { -; CHECK-LABEL: test_bitcasti64tov2f32: - %res = bitcast i64 %in to <2 x float> -; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} - ret <2 x float> %res -} - -define <1 x i64> @test_bitcasti64tov1i64(i64 %in) { -; CHECK-LABEL: test_bitcasti64tov1i64: - %res = bitcast i64 %in to <1 x i64> -; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} - ret <1 x i64> %res -} - -define <1 x double> @test_bitcasti64tov1f64(i64 %in) { -; CHECK-LABEL: test_bitcasti64tov1f64: - %res = bitcast i64 %in to <1 x double> -; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} - ret <1 x double> %res -} - -define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 { -; CHECK-LABEL: test_bitcastv8i8tov1f64: -; CHECK: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -; CHECK-NEXT: fcvtzs {{d[0-9]+}}, {{d[0-9]+}} - %sub.i = sub <8 x i8> zeroinitializer, %a - %1 = bitcast <8 x i8> %sub.i to <1 x double> - %vcvt.i = fptosi <1 x double> %1 to <1 x i64> - ret <1 x i64> %vcvt.i -} - -define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 { -; CHECK-LABEL: test_bitcastv4i16tov1f64: -; CHECK: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -; CHECK-NEXT: fcvtzs {{d[0-9]+}}, {{d[0-9]+}} - %sub.i = sub <4 x i16> zeroinitializer, %a - %1 = bitcast <4 x i16> %sub.i to <1 x double> - %vcvt.i = fptosi <1 x double> %1 to <1 x i64> - ret <1 x i64> %vcvt.i -} - -define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 { -; CHECK-LABEL: test_bitcastv2i32tov1f64: -; CHECK: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -; CHECK-NEXT: fcvtzs {{d[0-9]+}}, {{d[0-9]+}} - %sub.i = sub <2 x i32> zeroinitializer, %a - %1 = bitcast <2 x i32> %sub.i to <1 x double> - %vcvt.i = fptosi <1 x double> %1 to <1 x i64> - ret <1 x i64> %vcvt.i -} - -define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 { -; CHECK-LABEL: test_bitcastv1i64tov1f64: -; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}} -; CHECK-NEXT: fcvtzs {{d[0-9]+}}, {{d[0-9]+}} - %sub.i = sub <1 x i64> zeroinitializer, %a - %1 = bitcast <1 x i64> %sub.i to <1 x double> - %vcvt.i = fptosi <1 x double> %1 to <1 x i64> - ret <1 x i64> %vcvt.i -} - -define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 { -; CHECK-LABEL: test_bitcastv2f32tov1f64: -; CHECK: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -; CHECK-NEXT: fcvtzs {{d[0-9]+}}, {{d[0-9]+}} - %sub.i = fsub <2 x float> , %a - %1 = bitcast <2 x float> %sub.i to <1 x double> - %vcvt.i = fptosi <1 x double> %1 to <1 x i64> - ret <1 x i64> %vcvt.i -} - -define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 { -; CHECK-LABEL: test_bitcastv1f64tov8i8: -; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}} -; CHECK-NEXT: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b - %vcvt.i = sitofp <1 x i64> %a to <1 x double> - %1 = bitcast <1 x double> %vcvt.i to <8 x i8> - %sub.i = sub <8 x i8> zeroinitializer, %1 - ret <8 x i8> %sub.i -} - -define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 { -; CHECK-LABEL: test_bitcastv1f64tov4i16: -; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}} -; CHECK-NEXT: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h - %vcvt.i = sitofp <1 x i64> %a to <1 x double> - %1 = bitcast <1 x double> %vcvt.i to <4 x i16> - %sub.i = sub <4 x i16> zeroinitializer, %1 - ret <4 x i16> %sub.i -} - -define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 { -; CHECK-LABEL: test_bitcastv1f64tov2i32: -; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}} -; CHECK-NEXT: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %vcvt.i = sitofp <1 x i64> %a to <1 x double> - %1 = bitcast <1 x double> %vcvt.i to <2 x i32> - %sub.i = sub <2 x i32> zeroinitializer, %1 - ret <2 x i32> %sub.i -} - -define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 { -; CHECK-LABEL: test_bitcastv1f64tov1i64: -; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}} -; CHECK-NEXT: neg {{d[0-9]+}}, {{d[0-9]+}} - %vcvt.i = sitofp <1 x i64> %a to <1 x double> - %1 = bitcast <1 x double> %vcvt.i to <1 x i64> - %sub.i = sub <1 x i64> zeroinitializer, %1 - ret <1 x i64> %sub.i -} - -define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 { -; CHECK-LABEL: test_bitcastv1f64tov2f32: -; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}} -; CHECK-NEXT: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %vcvt.i = sitofp <1 x i64> %a to <1 x double> - %1 = bitcast <1 x double> %vcvt.i to <2 x float> - %sub.i = fsub <2 x float> , %1 - ret <2 x float> %sub.i -} - -; Test insert element into an undef vector -define <8 x i8> @scalar_to_vector.v8i8(i8 %a) { -; CHECK-LABEL: scalar_to_vector.v8i8: -; CHECK: ins {{v[0-9]+}}.b[0], {{w[0-9]+}} - %b = insertelement <8 x i8> undef, i8 %a, i32 0 - ret <8 x i8> %b -} - -define <16 x i8> @scalar_to_vector.v16i8(i8 %a) { -; CHECK-LABEL: scalar_to_vector.v16i8: -; CHECK: ins {{v[0-9]+}}.b[0], {{w[0-9]+}} - %b = insertelement <16 x i8> undef, i8 %a, i32 0 - ret <16 x i8> %b -} - -define <4 x i16> @scalar_to_vector.v4i16(i16 %a) { -; CHECK-LABEL: scalar_to_vector.v4i16: -; CHECK: ins {{v[0-9]+}}.h[0], {{w[0-9]+}} - %b = insertelement <4 x i16> undef, i16 %a, i32 0 - ret <4 x i16> %b -} - -define <8 x i16> @scalar_to_vector.v8i16(i16 %a) { -; CHECK-LABEL: scalar_to_vector.v8i16: -; CHECK: ins {{v[0-9]+}}.h[0], {{w[0-9]+}} - %b = insertelement <8 x i16> undef, i16 %a, i32 0 - ret <8 x i16> %b -} - -define <2 x i32> @scalar_to_vector.v2i32(i32 %a) { -; CHECK-LABEL: scalar_to_vector.v2i32: -; CHECK: ins {{v[0-9]+}}.s[0], {{w[0-9]+}} - %b = insertelement <2 x i32> undef, i32 %a, i32 0 - ret <2 x i32> %b -} - -define <4 x i32> @scalar_to_vector.v4i32(i32 %a) { -; CHECK-LABEL: scalar_to_vector.v4i32: -; CHECK: ins {{v[0-9]+}}.s[0], {{w[0-9]+}} - %b = insertelement <4 x i32> undef, i32 %a, i32 0 - ret <4 x i32> %b -} - -define <2 x i64> @scalar_to_vector.v2i64(i64 %a) { -; CHECK-LABEL: scalar_to_vector.v2i64: -; CHECK: ins {{v[0-9]+}}.d[0], {{x[0-9]+}} - %b = insertelement <2 x i64> undef, i64 %a, i32 0 - ret <2 x i64> %b -} - -define <8 x i8> @testDUP.v1i8(<1 x i8> %a) { -; CHECK-LABEL: testDUP.v1i8: -; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} - %b = extractelement <1 x i8> %a, i32 0 - %c = insertelement <8 x i8> undef, i8 %b, i32 0 - %d = insertelement <8 x i8> %c, i8 %b, i32 1 - %e = insertelement <8 x i8> %d, i8 %b, i32 2 - %f = insertelement <8 x i8> %e, i8 %b, i32 3 - %g = insertelement <8 x i8> %f, i8 %b, i32 4 - %h = insertelement <8 x i8> %g, i8 %b, i32 5 - %i = insertelement <8 x i8> %h, i8 %b, i32 6 - %j = insertelement <8 x i8> %i, i8 %b, i32 7 - ret <8 x i8> %j -} - -define <8 x i16> @testDUP.v1i16(<1 x i16> %a) { -; CHECK-LABEL: testDUP.v1i16: -; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} - %b = extractelement <1 x i16> %a, i32 0 - %c = insertelement <8 x i16> undef, i16 %b, i32 0 - %d = insertelement <8 x i16> %c, i16 %b, i32 1 - %e = insertelement <8 x i16> %d, i16 %b, i32 2 - %f = insertelement <8 x i16> %e, i16 %b, i32 3 - %g = insertelement <8 x i16> %f, i16 %b, i32 4 - %h = insertelement <8 x i16> %g, i16 %b, i32 5 - %i = insertelement <8 x i16> %h, i16 %b, i32 6 - %j = insertelement <8 x i16> %i, i16 %b, i32 7 - ret <8 x i16> %j -} - -define <4 x i32> @testDUP.v1i32(<1 x i32> %a) { -; CHECK-LABEL: testDUP.v1i32: -; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}} - %b = extractelement <1 x i32> %a, i32 0 - %c = insertelement <4 x i32> undef, i32 %b, i32 0 - %d = insertelement <4 x i32> %c, i32 %b, i32 1 - %e = insertelement <4 x i32> %d, i32 %b, i32 2 - %f = insertelement <4 x i32> %e, i32 %b, i32 3 - ret <4 x i32> %f -} - -define <8 x i8> @getl(<16 x i8> %x) #0 { -; CHECK-LABEL: getl: -; CHECK: ret - %vecext = extractelement <16 x i8> %x, i32 0 - %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0 - %vecext1 = extractelement <16 x i8> %x, i32 1 - %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1 - %vecext3 = extractelement <16 x i8> %x, i32 2 - %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2 - %vecext5 = extractelement <16 x i8> %x, i32 3 - %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3 - %vecext7 = extractelement <16 x i8> %x, i32 4 - %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4 - %vecext9 = extractelement <16 x i8> %x, i32 5 - %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5 - %vecext11 = extractelement <16 x i8> %x, i32 6 - %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6 - %vecext13 = extractelement <16 x i8> %x, i32 7 - %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7 - ret <8 x i8> %vecinit14 -} - -define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) { -; CHECK-LABEL: test_dup_v2i32_v4i16: -; CHECK: dup v0.4h, v0.h[2] -entry: - %x = extractelement <2 x i32> %a, i32 1 - %vget_lane = trunc i32 %x to i16 - %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 - %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 - %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 - %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 - ret <4 x i16> %vecinit3.i -} - -define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) { -; CHECK-LABEL: test_dup_v4i32_v8i16: -; CHECK: dup v0.8h, v0.h[6] -entry: - %x = extractelement <4 x i32> %a, i32 3 - %vget_lane = trunc i32 %x to i16 - %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0 - %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1 - %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2 - %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3 - %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4 - %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5 - %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6 - %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7 - ret <8 x i16> %vecinit7.i -} - -define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) { -; CHECK-LABEL: test_dup_v1i64_v4i16: -; CHECK: dup v0.4h, v0.h[0] -entry: - %x = extractelement <1 x i64> %a, i32 0 - %vget_lane = trunc i64 %x to i16 - %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 - %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 - %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 - %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 - ret <4 x i16> %vecinit3.i -} - -define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) { -; CHECK-LABEL: test_dup_v1i64_v2i32: -; CHECK: dup v0.2s, v0.s[0] -entry: - %x = extractelement <1 x i64> %a, i32 0 - %vget_lane = trunc i64 %x to i32 - %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0 - %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1 - ret <2 x i32> %vecinit1.i -} - -define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) { -; CHECK-LABEL: test_dup_v2i64_v8i16: -; CHECK: dup v0.8h, v0.h[4] -entry: - %x = extractelement <2 x i64> %a, i32 1 - %vget_lane = trunc i64 %x to i16 - %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0 - %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1 - %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2 - %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3 - %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4 - %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5 - %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6 - %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7 - ret <8 x i16> %vecinit7.i -} - -define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) { -; CHECK-LABEL: test_dup_v2i64_v4i32: -; CHECK: dup v0.4s, v0.s[2] -entry: - %x = extractelement <2 x i64> %a, i32 1 - %vget_lane = trunc i64 %x to i32 - %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0 - %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1 - %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2 - %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3 - ret <4 x i32> %vecinit3.i -} - -define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) { -; CHECK-LABEL: test_dup_v4i32_v4i16: -; CHECK: dup v0.4h, v0.h[2] -entry: - %x = extractelement <4 x i32> %a, i32 1 - %vget_lane = trunc i32 %x to i16 - %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 - %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 - %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 - %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 - ret <4 x i16> %vecinit3.i -} - -define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) { -; CHECK-LABEL: test_dup_v2i64_v4i16: -; CHECK: dup v0.4h, v0.h[0] -entry: - %x = extractelement <2 x i64> %a, i32 0 - %vget_lane = trunc i64 %x to i16 - %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 - %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 - %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 - %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 - ret <4 x i16> %vecinit3.i -} - -define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) { -; CHECK-LABEL: test_dup_v2i64_v2i32: -; CHECK: dup v0.2s, v0.s[0] -entry: - %x = extractelement <2 x i64> %a, i32 0 - %vget_lane = trunc i64 %x to i32 - %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0 - %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1 - ret <2 x i32> %vecinit1.i -} - - -define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) { -; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32: -; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s -; CHECK-NEXT: ret -entry: - %0 = call float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float> %a) - %1 = insertelement <1 x float> undef, float %0, i32 0 - %2 = extractelement <1 x float> %1, i32 0 - %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0 - ret <2 x float> %vecinit1.i -} - -define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) { -; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32: -; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s -; CHECK-NEXT: ret -entry: - %0 = call float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float> %a) - %1 = insertelement <1 x float> undef, float %0, i32 0 - %2 = extractelement <1 x float> %1, i32 0 - %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0 - ret <4 x float> %vecinit1.i -} - -declare float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float>) - -define <2 x i32> @test_concat_undef_v1i32(<1 x i32> %a) { -; CHECK-LABEL: test_concat_undef_v1i32: -; CHECK: ins v{{[0-9]+}}.s[1], v{{[0-9]+}}.s[0] -entry: - %0 = extractelement <1 x i32> %a, i32 0 - %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1 - ret <2 x i32> %vecinit1.i -} - -declare <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32>) #4 - -define <2 x i32> @test_concat_v1i32_undef(<1 x i32> %a) { -; CHECK-LABEL: test_concat_v1i32_undef: -; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} -; CHECK-NEXT: ret -entry: - %b = tail call <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32> %a) - %0 = extractelement <1 x i32> %b, i32 0 - %vecinit.i432 = insertelement <2 x i32> undef, i32 %0, i32 0 - ret <2 x i32> %vecinit.i432 -} - -define <2 x i32> @test_concat_same_v1i32_v1i32(<1 x i32> %a) { -; CHECK-LABEL: test_concat_same_v1i32_v1i32: -; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0] -entry: - %0 = extractelement <1 x i32> %a, i32 0 - %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0 - %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1 - ret <2 x i32> %vecinit1.i -} - -define <2 x i32> @test_concat_diff_v1i32_v1i32(<1 x i32> %a, <1 x i32> %b) { -; CHECK-LABEL: test_concat_diff_v1i32_v1i32: -; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} -; CHECK-NEXT: sqabs s{{[0-9]+}}, s{{[0-9]+}} -; CHECK-NEXT: ins v0.s[1], v1.s[0] -entry: - %c = tail call <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32> %a) - %d = extractelement <1 x i32> %c, i32 0 - %e = tail call <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32> %b) - %f = extractelement <1 x i32> %e, i32 0 - %h = shufflevector <1 x i32> %c, <1 x i32> %e, <2 x i32> - ret <2 x i32> %h -} - -define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 { -; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> - ret <16 x i8> %vecinit30 -} - -define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 { -; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <8 x i8> %x, i32 0 - %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 - %vecext1 = extractelement <8 x i8> %x, i32 1 - %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 - %vecext3 = extractelement <8 x i8> %x, i32 2 - %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 - %vecext5 = extractelement <8 x i8> %x, i32 3 - %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 - %vecext7 = extractelement <8 x i8> %x, i32 4 - %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 - %vecext9 = extractelement <8 x i8> %x, i32 5 - %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 - %vecext11 = extractelement <8 x i8> %x, i32 6 - %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 - %vecext13 = extractelement <8 x i8> %x, i32 7 - %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 - %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> - ret <16 x i8> %vecinit30 -} - -define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 { -; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <16 x i8> %x, i32 0 - %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 - %vecext1 = extractelement <16 x i8> %x, i32 1 - %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 - %vecext3 = extractelement <16 x i8> %x, i32 2 - %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 - %vecext5 = extractelement <16 x i8> %x, i32 3 - %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 - %vecext7 = extractelement <16 x i8> %x, i32 4 - %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 - %vecext9 = extractelement <16 x i8> %x, i32 5 - %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 - %vecext11 = extractelement <16 x i8> %x, i32 6 - %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 - %vecext13 = extractelement <16 x i8> %x, i32 7 - %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 - %vecext15 = extractelement <8 x i8> %y, i32 0 - %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 - %vecext17 = extractelement <8 x i8> %y, i32 1 - %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 - %vecext19 = extractelement <8 x i8> %y, i32 2 - %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 - %vecext21 = extractelement <8 x i8> %y, i32 3 - %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 - %vecext23 = extractelement <8 x i8> %y, i32 4 - %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 - %vecext25 = extractelement <8 x i8> %y, i32 5 - %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 - %vecext27 = extractelement <8 x i8> %y, i32 6 - %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 - %vecext29 = extractelement <8 x i8> %y, i32 7 - %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 - ret <16 x i8> %vecinit30 -} - -define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 { -; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <8 x i8> %x, i32 0 - %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 - %vecext1 = extractelement <8 x i8> %x, i32 1 - %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 - %vecext3 = extractelement <8 x i8> %x, i32 2 - %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 - %vecext5 = extractelement <8 x i8> %x, i32 3 - %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 - %vecext7 = extractelement <8 x i8> %x, i32 4 - %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 - %vecext9 = extractelement <8 x i8> %x, i32 5 - %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 - %vecext11 = extractelement <8 x i8> %x, i32 6 - %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 - %vecext13 = extractelement <8 x i8> %x, i32 7 - %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 - %vecext15 = extractelement <8 x i8> %y, i32 0 - %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 - %vecext17 = extractelement <8 x i8> %y, i32 1 - %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 - %vecext19 = extractelement <8 x i8> %y, i32 2 - %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 - %vecext21 = extractelement <8 x i8> %y, i32 3 - %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 - %vecext23 = extractelement <8 x i8> %y, i32 4 - %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 - %vecext25 = extractelement <8 x i8> %y, i32 5 - %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 - %vecext27 = extractelement <8 x i8> %y, i32 6 - %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 - %vecext29 = extractelement <8 x i8> %y, i32 7 - %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 - ret <16 x i8> %vecinit30 -} - -define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 { -; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> - ret <8 x i16> %vecinit14 -} - -define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 { -; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <4 x i16> %x, i32 0 - %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 - %vecext1 = extractelement <4 x i16> %x, i32 1 - %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 - %vecext3 = extractelement <4 x i16> %x, i32 2 - %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 - %vecext5 = extractelement <4 x i16> %x, i32 3 - %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 - %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> - ret <8 x i16> %vecinit14 -} - -define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 { -; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <8 x i16> %x, i32 0 - %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 - %vecext1 = extractelement <8 x i16> %x, i32 1 - %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 - %vecext3 = extractelement <8 x i16> %x, i32 2 - %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 - %vecext5 = extractelement <8 x i16> %x, i32 3 - %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 - %vecext7 = extractelement <4 x i16> %y, i32 0 - %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 - %vecext9 = extractelement <4 x i16> %y, i32 1 - %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 - %vecext11 = extractelement <4 x i16> %y, i32 2 - %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 - %vecext13 = extractelement <4 x i16> %y, i32 3 - %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 - ret <8 x i16> %vecinit14 -} - -define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 { -; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <4 x i16> %x, i32 0 - %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 - %vecext1 = extractelement <4 x i16> %x, i32 1 - %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 - %vecext3 = extractelement <4 x i16> %x, i32 2 - %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 - %vecext5 = extractelement <4 x i16> %x, i32 3 - %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 - %vecext7 = extractelement <4 x i16> %y, i32 0 - %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 - %vecext9 = extractelement <4 x i16> %y, i32 1 - %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 - %vecext11 = extractelement <4 x i16> %y, i32 2 - %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 - %vecext13 = extractelement <4 x i16> %y, i32 3 - %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 - ret <8 x i16> %vecinit14 -} - -define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 { -; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> - ret <4 x i32> %vecinit6 -} - -define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 { -; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <2 x i32> %x, i32 0 - %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 - %vecext1 = extractelement <2 x i32> %x, i32 1 - %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 - %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> - ret <4 x i32> %vecinit6 -} - -define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 { -; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <4 x i32> %x, i32 0 - %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 - %vecext1 = extractelement <4 x i32> %x, i32 1 - %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 - %vecext3 = extractelement <2 x i32> %y, i32 0 - %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2 - %vecext5 = extractelement <2 x i32> %y, i32 1 - %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3 - ret <4 x i32> %vecinit6 -} - -define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 { -; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <2 x i32> %x, i32 0 - %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 - %vecext1 = extractelement <2 x i32> %x, i32 1 - %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 - %vecext3 = extractelement <2 x i32> %y, i32 0 - %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2 - %vecext5 = extractelement <2 x i32> %y, i32 1 - %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3 - ret <4 x i32> %vecinit6 -} - -define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 { -; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> - ret <2 x i64> %vecinit2 -} - -define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 { -; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <1 x i64> %x, i32 0 - %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 - %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> - ret <2 x i64> %vecinit2 -} - -define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 { -; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <2 x i64> %x, i32 0 - %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 - %vecext1 = extractelement <1 x i64> %y, i32 0 - %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 - ret <2 x i64> %vecinit2 -} - -define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 { -; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <1 x i64> %x, i32 0 - %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 - %vecext1 = extractelement <1 x i64> %y, i32 0 - %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 - ret <2 x i64> %vecinit2 -} - -declare <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8>, <1 x i8>) - -; This case tests the copy of two FPR8 registers, which is implemented by fmov -; of two FPR32 registers. -define <1 x i8> @test_copy_FPR8_FPR8(<1 x i8> %a, <1 x i8> %b) { -; CHECK-LABEL: test_copy_FPR8_FPR8: -; CHECK: usqadd b1, b0 -; CHECK-NEXT: fmov s0, s1 -entry: - %vsqadd2.i = call <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8> %b, <1 x i8> %a) - ret <1 x i8> %vsqadd2.i -} - -declare <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16>, <1 x i16>) - -define <1 x i16> @test_copy_FPR16_FPR16(<1 x i16> %a, <1 x i16> %b) { -; CHECK-LABEL: test_copy_FPR16_FPR16: -; CHECK: usqadd h1, h0 -; CHECK-NEXT: fmov s0, s1 -entry: - %vsqadd2.i = call <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16> %b, <1 x i16> %a) - ret <1 x i16> %vsqadd2.i -} - -define <4 x i16> @concat_vector_v4i16_const() { -; CHECK-LABEL: concat_vector_v4i16_const: -; CHECK: dup {{v[0-9]+}}.4h, wzr - %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer - ret <4 x i16> %r -} - -define <4 x i16> @concat_vector_v4i16_const_one() { -; CHECK-LABEL: concat_vector_v4i16_const_one: -; CHECK: movz {{w[0-9]+}}, #1 -; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} - %r = shufflevector <1 x i16> , <1 x i16> undef, <4 x i32> zeroinitializer - ret <4 x i16> %r -} - -define <4 x i32> @concat_vector_v4i32_const() { -; CHECK-LABEL: concat_vector_v4i32_const: -; CHECK: dup {{v[0-9]+}}.4s, wzr - %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer - ret <4 x i32> %r -} - -define <8 x i8> @concat_vector_v8i8_const() { -; CHECK-LABEL: concat_vector_v8i8_const: -; CHECK: dup {{v[0-9]+}}.8b, wzr - %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer - ret <8 x i8> %r -} - -define <8 x i16> @concat_vector_v8i16_const() { -; CHECK-LABEL: concat_vector_v8i16_const: -; CHECK: dup {{v[0-9]+}}.8h, wzr - %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer - ret <8 x i16> %r -} - -define <8 x i16> @concat_vector_v8i16_const_one() { -; CHECK-LABEL: concat_vector_v8i16_const_one: -; CHECK: movz {{w[0-9]+}}, #1 -; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} - %r = shufflevector <1 x i16> , <1 x i16> undef, <8 x i32> zeroinitializer - ret <8 x i16> %r -} - -define <16 x i8> @concat_vector_v16i8_const() { -; CHECK-LABEL: concat_vector_v16i8_const: -; CHECK: dup {{v[0-9]+}}.16b, wzr - %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer - ret <16 x i8> %r -} - -define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) { -; CHECK-LABEL: concat_vector_v4i16: -; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] - %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer - ret <4 x i16> %r -} - -define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) { -; CHECK-LABEL: concat_vector_v4i32: -; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] - %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer - ret <4 x i32> %r -} - -define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) { -; CHECK-LABEL: concat_vector_v8i8: -; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[0] - %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer - ret <8 x i8> %r -} - -define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) { -; CHECK-LABEL: concat_vector_v8i16: -; CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] - %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer - ret <8 x i16> %r -} - -define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) { -; CHECK-LABEL: concat_vector_v16i8: -; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[0] - %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer - ret <16 x i8> %r -} diff --git a/test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll b/test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll deleted file mode 100644 index 1256b2b65049..000000000000 --- a/test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll +++ /dev/null @@ -1,48 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has a separate copy due to intrinsics - -define <4 x i32> @copyTuple.QPair(i8* %a, i8* %b) { -; CHECK-LABEL: copyTuple.QPair: -; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b -; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b -; CHECK: ld2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}] -entry: - %vld = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %a, <4 x i32> , <4 x i32> , i32 0, i32 4) - %extract = extractvalue { <4 x i32>, <4 x i32> } %vld, 0 - %vld1 = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %b, <4 x i32> %extract, <4 x i32> , i32 1, i32 4) - %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld1, 0 - ret <4 x i32> %vld1.fca.0.extract -} - -define <4 x i32> @copyTuple.QTriple(i8* %a, i8* %b, <4 x i32> %c) { -; CHECK-LABEL: copyTuple.QTriple: -; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b -; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b -; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b -; CHECK: ld3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}] -entry: - %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8* %a, <4 x i32> , <4 x i32> %c, <4 x i32> %c, i32 0, i32 4) - %extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0 - %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8* %b, <4 x i32> %extract, <4 x i32> , <4 x i32> %c, i32 1, i32 4) - %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld1, 0 - ret <4 x i32> %vld1.fca.0.extract -} - -define <4 x i32> @copyTuple.QQuad(i8* %a, i8* %b, <4 x i32> %c) { -; CHECK-LABEL: copyTuple.QQuad: -; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b -; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b -; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b -; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b -; CHECK: ld4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}] -entry: - %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8* %a, <4 x i32> , <4 x i32> %c, <4 x i32> %c, <4 x i32> %c, i32 0, i32 4) - %extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0 - %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8* %b, <4 x i32> %extract, <4 x i32> , <4 x i32> %c, <4 x i32> %c, i32 1, i32 4) - %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld1, 0 - ret <4 x i32> %vld1.fca.0.extract -} - -declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) -declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) -declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) diff --git a/test/CodeGen/AArch64/neon-crypto.ll b/test/CodeGen/AArch64/neon-crypto.ll deleted file mode 100644 index 5f1491eb1e90..000000000000 --- a/test/CodeGen/AArch64/neon-crypto.ll +++ /dev/null @@ -1,145 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -mattr=+crypto | FileCheck %s -; RUN: not llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon 2>&1 | FileCheck --check-prefix=CHECK-NO-CRYPTO %s -; arm64 has a separate test for this, covering the same features (crypto.ll). N.b. NO-CRYPTO will need porting. - -declare <4 x i32> @llvm.arm.neon.sha256su1(<4 x i32>, <4 x i32>, <4 x i32>) #1 - -declare <4 x i32> @llvm.arm.neon.sha256h2(<4 x i32>, <4 x i32>, <4 x i32>) #1 - -declare <4 x i32> @llvm.arm.neon.sha256h(<4 x i32>, <4 x i32>, <4 x i32>) #1 - -declare <4 x i32> @llvm.arm.neon.sha1su0(<4 x i32>, <4 x i32>, <4 x i32>) #1 - -declare <4 x i32> @llvm.arm.neon.sha1m(<4 x i32>, i32, <4 x i32>) #1 - -declare <4 x i32> @llvm.arm.neon.sha1p(<4 x i32>, i32, <4 x i32>) #1 - -declare <4 x i32> @llvm.arm.neon.sha1c(<4 x i32>, i32, <4 x i32>) #1 - -declare <4 x i32> @llvm.arm.neon.sha256su0(<4 x i32>, <4 x i32>) #1 - -declare <4 x i32> @llvm.arm.neon.sha1su1(<4 x i32>, <4 x i32>) #1 - -declare i32 @llvm.arm.neon.sha1h(i32) #1 - -declare <16 x i8> @llvm.arm.neon.aesimc(<16 x i8>) #1 - -declare <16 x i8> @llvm.arm.neon.aesmc(<16 x i8>) #1 - -declare <16 x i8> @llvm.arm.neon.aesd(<16 x i8>, <16 x i8>) #1 - -declare <16 x i8> @llvm.arm.neon.aese(<16 x i8>, <16 x i8>) #1 - -define <16 x i8> @test_vaeseq_u8(<16 x i8> %data, <16 x i8> %key) { -; CHECK: test_vaeseq_u8: -; CHECK: aese {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -; CHECK-NO-CRYPTO: Cannot select: intrinsic %llvm.arm.neon.aese -entry: - %aese.i = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data, <16 x i8> %key) - ret <16 x i8> %aese.i -} - -define <16 x i8> @test_vaesdq_u8(<16 x i8> %data, <16 x i8> %key) { -; CHECK: test_vaesdq_u8: -; CHECK: aesd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %aesd.i = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data, <16 x i8> %key) - ret <16 x i8> %aesd.i -} - -define <16 x i8> @test_vaesmcq_u8(<16 x i8> %data) { -; CHECK: test_vaesmcq_u8: -; CHECK: aesmc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %aesmc.i = tail call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %data) - ret <16 x i8> %aesmc.i -} - -define <16 x i8> @test_vaesimcq_u8(<16 x i8> %data) { -; CHECK: test_vaesimcq_u8: -; CHECK: aesimc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %aesimc.i = tail call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %data) - ret <16 x i8> %aesimc.i -} - -define i32 @test_vsha1h_u32(i32 %hash_e) { -; CHECK: test_vsha1h_u32: -; CHECK: sha1h {{s[0-9]+}}, {{s[0-9]+}} -entry: - %sha1h1.i = tail call i32 @llvm.arm.neon.sha1h(i32 %hash_e) - ret i32 %sha1h1.i -} - -define <4 x i32> @test_vsha1su1q_u32(<4 x i32> %tw0_3, <4 x i32> %w12_15) { -; CHECK: test_vsha1su1q_u32: -; CHECK: sha1su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %sha1su12.i = tail call <4 x i32> @llvm.arm.neon.sha1su1(<4 x i32> %tw0_3, <4 x i32> %w12_15) - ret <4 x i32> %sha1su12.i -} - -define <4 x i32> @test_vsha256su0q_u32(<4 x i32> %w0_3, <4 x i32> %w4_7) { -; CHECK: test_vsha256su0q_u32: -; CHECK: sha256su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %sha256su02.i = tail call <4 x i32> @llvm.arm.neon.sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7) - ret <4 x i32> %sha256su02.i -} - -define <4 x i32> @test_vsha1cq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) { -; CHECK: test_vsha1cq_u32: -; CHECK: sha1c {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s -entry: - %sha1c1.i = tail call <4 x i32> @llvm.arm.neon.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) - ret <4 x i32> %sha1c1.i -} - -define <4 x i32> @test_vsha1pq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) { -; CHECK: test_vsha1pq_u32: -; CHECK: sha1p {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s -entry: - %sha1p1.i = tail call <4 x i32> @llvm.arm.neon.sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) - ret <4 x i32> %sha1p1.i -} - -define <4 x i32> @test_vsha1mq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) { -; CHECK: test_vsha1mq_u32: -; CHECK: sha1m {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s -entry: - %sha1m1.i = tail call <4 x i32> @llvm.arm.neon.sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) - ret <4 x i32> %sha1m1.i -} - -define <4 x i32> @test_vsha1su0q_u32(<4 x i32> %w0_3, <4 x i32> %w4_7, <4 x i32> %w8_11) { -; CHECK: test_vsha1su0q_u32: -; CHECK: sha1su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %sha1su03.i = tail call <4 x i32> @llvm.arm.neon.sha1su0(<4 x i32> %w0_3, <4 x i32> %w4_7, <4 x i32> %w8_11) - ret <4 x i32> %sha1su03.i -} - -define <4 x i32> @test_vsha256hq_u32(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) { -; CHECK: test_vsha256hq_u32: -; CHECK: sha256h {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s -entry: - %sha256h3.i = tail call <4 x i32> @llvm.arm.neon.sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) - ret <4 x i32> %sha256h3.i -} - -define <4 x i32> @test_vsha256h2q_u32(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk) { -; CHECK: test_vsha256h2q_u32: -; CHECK: sha256h2 {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s -entry: - %sha256h23.i = tail call <4 x i32> @llvm.arm.neon.sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk) - ret <4 x i32> %sha256h23.i -} - -define <4 x i32> @test_vsha256su1q_u32(<4 x i32> %tw0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) { -; CHECK: test_vsha256su1q_u32: -; CHECK: sha256su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %sha256su13.i = tail call <4 x i32> @llvm.arm.neon.sha256su1(<4 x i32> %tw0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) - ret <4 x i32> %sha256su13.i -} - diff --git a/test/CodeGen/AArch64/neon-diagnostics.ll b/test/CodeGen/AArch64/neon-diagnostics.ll index 470bff771e3d..e28df29f3e85 100644 --- a/test/CodeGen/AArch64/neon-diagnostics.ll +++ b/test/CodeGen/AArch64/neon-diagnostics.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { diff --git a/test/CodeGen/AArch64/neon-extract.ll b/test/CodeGen/AArch64/neon-extract.ll index f16b0365c8ed..96b4084a2574 100644 --- a/test/CodeGen/AArch64/neon-extract.ll +++ b/test/CodeGen/AArch64/neon-extract.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i8> @test_vext_s8(<8 x i8> %a, <8 x i8> %b) { diff --git a/test/CodeGen/AArch64/neon-facge-facgt.ll b/test/CodeGen/AArch64/neon-facge-facgt.ll deleted file mode 100644 index bf43e51cc297..000000000000 --- a/test/CodeGen/AArch64/neon-facge-facgt.ll +++ /dev/null @@ -1,57 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; arm64 has duplicates for this functionality in vcmp.ll. - -declare <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float>, <2 x float>) -declare <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float>, <4 x float>) -declare <2 x i64> @llvm.arm.neon.vacge.v2i64.v2f64(<2 x double>, <2 x double>) - -define <2 x i32> @facge_from_intr_v2i32(<2 x float> %A, <2 x float> %B, <2 x float> %C) { -; Using registers other than v0, v1 and v2 are possible, but would be odd. -; CHECK: facge_from_intr_v2i32: - %val = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %A, <2 x float> %B) -; CHECK: facge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - ret <2 x i32> %val -} -define <4 x i32> @facge_from_intr_v4i32( <4 x float> %A, <4 x float> %B) { -; Using registers other than v0, v1 and v2 are possible, but would be odd. -; CHECK: facge_from_intr_v4i32: - %val = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %A, <4 x float> %B) -; CHECK: facge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s - ret <4 x i32> %val -} - -define <2 x i64> @facge_from_intr_v2i64(<2 x double> %A, <2 x double> %B) { -; Using registers other than v0, v1 and v2 are possible, but would be odd. -; CHECK: facge_from_intr_v2i64: - %val = call <2 x i64> @llvm.arm.neon.vacge.v2i64.v2f64(<2 x double> %A, <2 x double> %B) -; CHECK: facge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - ret <2 x i64> %val -} - -declare <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float>, <2 x float>) -declare <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float>, <4 x float>) -declare <2 x i64> @llvm.arm.neon.vacgt.v2i64.v2f64(<2 x double>, <2 x double>) - -define <2 x i32> @facgt_from_intr_v2i32(<2 x float> %A, <2 x float> %B, <2 x float> %C) { -; Using registers other than v0, v1 and v2 are possible, but would be odd. -; CHECK: facgt_from_intr_v2i32: - %val = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %A, <2 x float> %B) -; CHECK: facgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - ret <2 x i32> %val -} -define <4 x i32> @facgt_from_intr_v4i32( <4 x float> %A, <4 x float> %B) { -; Using registers other than v0, v1 and v2 are possible, but would be odd. -; CHECK: facgt_from_intr_v4i32: - %val = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %A, <4 x float> %B) -; CHECK: facgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s - ret <4 x i32> %val -} - -define <2 x i64> @facgt_from_intr_v2i64(<2 x double> %A, <2 x double> %B) { -; Using registers other than v0, v1 and v2 are possible, but would be odd. -; CHECK: facgt_from_intr_v2i64: - %val = call <2 x i64> @llvm.arm.neon.vacgt.v2i64.v2f64(<2 x double> %A, <2 x double> %B) -; CHECK: facgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - ret <2 x i64> %val -} - diff --git a/test/CodeGen/AArch64/neon-fma.ll b/test/CodeGen/AArch64/neon-fma.ll index 9b1657c36f2a..6df494dedaee 100644 --- a/test/CodeGen/AArch64/neon-fma.ll +++ b/test/CodeGen/AArch64/neon-fma.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s define <2 x float> @fmla2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) { diff --git a/test/CodeGen/AArch64/neon-fpround_f128.ll b/test/CodeGen/AArch64/neon-fpround_f128.ll index f6c0d06872db..e48dbbaec929 100644 --- a/test/CodeGen/AArch64/neon-fpround_f128.ll +++ b/test/CodeGen/AArch64/neon-fpround_f128.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s define <1 x double> @test_fpround_v1f128(<1 x fp128>* %a) { diff --git a/test/CodeGen/AArch64/neon-frsqrt-frecp.ll b/test/CodeGen/AArch64/neon-frsqrt-frecp.ll deleted file mode 100644 index 199258d60ecb..000000000000 --- a/test/CodeGen/AArch64/neon-frsqrt-frecp.ll +++ /dev/null @@ -1,55 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has a duplicate for all these tests in vsqrt.ll - -; Set of tests for when the intrinsic is used. - -declare <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.arm.neon.vrsqrts.v2f64(<2 x double>, <2 x double>) - -define <2 x float> @frsqrts_from_intr_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; Using registers other than v0, v1 are possible, but would be odd. -; CHECK: frsqrts v0.2s, v0.2s, v1.2s - %val = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %lhs, <2 x float> %rhs) - ret <2 x float> %val -} - -define <4 x float> @frsqrts_from_intr_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; Using registers other than v0, v1 are possible, but would be odd. -; CHECK: frsqrts v0.4s, v0.4s, v1.4s - %val = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %lhs, <4 x float> %rhs) - ret <4 x float> %val -} - -define <2 x double> @frsqrts_from_intr_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; Using registers other than v0, v1 are possible, but would be odd. -; CHECK: frsqrts v0.2d, v0.2d, v1.2d - %val = call <2 x double> @llvm.arm.neon.vrsqrts.v2f64(<2 x double> %lhs, <2 x double> %rhs) - ret <2 x double> %val -} - -declare <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.arm.neon.vrecps.v2f64(<2 x double>, <2 x double>) - -define <2 x float> @frecps_from_intr_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; Using registers other than v0, v1 are possible, but would be odd. -; CHECK: frecps v0.2s, v0.2s, v1.2s - %val = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %lhs, <2 x float> %rhs) - ret <2 x float> %val -} - -define <4 x float> @frecps_from_intr_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; Using registers other than v0, v1 are possible, but would be odd. -; CHECK: frecps v0.4s, v0.4s, v1.4s - %val = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %lhs, <4 x float> %rhs) - ret <4 x float> %val -} - -define <2 x double> @frecps_from_intr_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; Using registers other than v0, v1 are possible, but would be odd. -; CHECK: frecps v0.2d, v0.2d, v1.2d - %val = call <2 x double> @llvm.arm.neon.vrecps.v2f64(<2 x double> %lhs, <2 x double> %rhs) - ret <2 x double> %val -} - diff --git a/test/CodeGen/AArch64/neon-halving-add-sub.ll b/test/CodeGen/AArch64/neon-halving-add-sub.ll deleted file mode 100644 index 4d9ffe5dbd7b..000000000000 --- a/test/CodeGen/AArch64/neon-halving-add-sub.ll +++ /dev/null @@ -1,208 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; arm64 duplicates these in vhadd.ll and vhsub.ll - -declare <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_uhadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_uhadd_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: uhadd v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_shadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_shadd_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: shadd v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_uhadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_uhadd_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: uhadd v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_shadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_shadd_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: shadd v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_uhadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_uhadd_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: uhadd v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_shadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_shadd_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: shadd v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -declare <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_uhadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_uhadd_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: uhadd v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_shadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_shadd_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: shadd v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -declare <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_uhadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_uhadd_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: uhadd v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_shadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_shadd_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: shadd v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_uhadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_uhadd_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: uhadd v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_shadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_shadd_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: shadd v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - - -declare <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_uhsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_uhsub_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: uhsub v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_shsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_shsub_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: shsub v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_uhsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_uhsub_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: uhsub v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_shsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_shsub_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: shsub v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_uhsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_uhsub_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: uhsub v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_shsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_shsub_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: shsub v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -declare <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_uhsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_uhsub_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: uhsub v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_shsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_shsub_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: shsub v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -declare <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_uhsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_uhsub_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: uhsub v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_shsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_shsub_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: shsub v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_uhsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_uhsub_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: uhsub v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_shsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_shsub_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: shsub v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - diff --git a/test/CodeGen/AArch64/neon-idiv.ll b/test/CodeGen/AArch64/neon-idiv.ll index 9c9758a81f8b..11e1af7e143e 100644 --- a/test/CodeGen/AArch64/neon-idiv.ll +++ b/test/CodeGen/AArch64/neon-idiv.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -mattr=+neon | FileCheck %s ; RUN: llc -mtriple=arm64-none-linux-gnu < %s -mattr=+neon | FileCheck %s define <4 x i32> @test1(<4 x i32> %a) { diff --git a/test/CodeGen/AArch64/neon-load-store-v1i32.ll b/test/CodeGen/AArch64/neon-load-store-v1i32.ll deleted file mode 100644 index 12361ba008db..000000000000 --- a/test/CodeGen/AArch64/neon-load-store-v1i32.ll +++ /dev/null @@ -1,30 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 does not use these pseudo-vectors, and they're not blessed by the PCS. Skipping. - -; Test load/store of v1i8, v1i16, v1i32 types can be selected correctly -define void @load.store.v1i8(<1 x i8>* %ptr, <1 x i8>* %ptr2) { -; CHECK-LABEL: load.store.v1i8: -; CHECK: ldr b{{[0-9]+}}, [x{{[0-9]+|sp}}] -; CHECK: str b{{[0-9]+}}, [x{{[0-9]+|sp}}] - %a = load <1 x i8>* %ptr - store <1 x i8> %a, <1 x i8>* %ptr2 - ret void -} - -define void @load.store.v1i16(<1 x i16>* %ptr, <1 x i16>* %ptr2) { -; CHECK-LABEL: load.store.v1i16: -; CHECK: ldr h{{[0-9]+}}, [x{{[0-9]+|sp}}] -; CHECK: str h{{[0-9]+}}, [x{{[0-9]+|sp}}] - %a = load <1 x i16>* %ptr - store <1 x i16> %a, <1 x i16>* %ptr2 - ret void -} - -define void @load.store.v1i32(<1 x i32>* %ptr, <1 x i32>* %ptr2) { -; CHECK-LABEL: load.store.v1i32: -; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+|sp}}] -; CHECK: str s{{[0-9]+}}, [x{{[0-9]+|sp}}] - %a = load <1 x i32>* %ptr - store <1 x i32> %a, <1 x i32>* %ptr2 - ret void -} diff --git a/test/CodeGen/AArch64/neon-max-min-pairwise.ll b/test/CodeGen/AArch64/neon-max-min-pairwise.ll deleted file mode 100644 index 8642f09c4e28..000000000000 --- a/test/CodeGen/AArch64/neon-max-min-pairwise.ll +++ /dev/null @@ -1,347 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; These duplicate arm64 tests in vmax.ll - -declare <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_smaxp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; Using registers other than v0, v1 are possible, but would be odd. -; CHECK: test_smaxp_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: smaxp v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_umaxp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { - %tmp1 = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: umaxp v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vpmaxs.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vpmaxu.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_smaxp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_smaxp_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vpmaxs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: smaxp v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_umaxp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_umaxp_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vpmaxu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: umaxp v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_smaxp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_smaxp_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: smaxp v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_umaxp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_umaxp_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: umaxp v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - - -declare <8 x i16> @llvm.arm.neon.vpmaxs.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vpmaxu.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_smaxp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_smaxp_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vpmaxs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: smaxp v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_umaxp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_umaxp_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vpmaxu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: umaxp v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - - -declare <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_smaxp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_smaxp_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: smaxp v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_umaxp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_umaxp_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: umaxp v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vpmaxs.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vpmaxu.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_smaxp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_smaxp_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vpmaxs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: smaxp v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_umaxp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_umaxp_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vpmaxu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: umaxp v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -declare <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_sminp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; Using registers other than v0, v1 are possible, but would be odd. -; CHECK: test_sminp_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: sminp v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_uminp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { - %tmp1 = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: uminp v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vpmins.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vpminu.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_sminp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_sminp_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vpmins.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: sminp v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_uminp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_uminp_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vpminu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: uminp v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_sminp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_sminp_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: sminp v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_uminp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_uminp_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: uminp v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - - -declare <8 x i16> @llvm.arm.neon.vpmins.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vpminu.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_sminp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_sminp_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vpmins.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: sminp v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_uminp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_uminp_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vpminu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: uminp v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - - -declare <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_sminp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_sminp_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: sminp v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_uminp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_uminp_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: uminp v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vpmins.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vpminu.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_sminp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_sminp_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vpmins.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: sminp v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_uminp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_uminp_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vpminu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: uminp v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.arm.neon.vpmaxs.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.arm.neon.vpmaxs.v2f64(<2 x double>, <2 x double>) - -define <2 x float> @test_fmaxp_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; CHECK: test_fmaxp_v2f32: - %val = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %lhs, <2 x float> %rhs) -; CHECK: fmaxp v0.2s, v0.2s, v1.2s - ret <2 x float> %val -} - -define <4 x float> @test_fmaxp_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; CHECK: test_fmaxp_v4f32: - %val = call <4 x float> @llvm.arm.neon.vpmaxs.v4f32(<4 x float> %lhs, <4 x float> %rhs) -; CHECK: fmaxp v0.4s, v0.4s, v1.4s - ret <4 x float> %val -} - -define <2 x double> @test_fmaxp_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; CHECK: test_fmaxp_v2f64: - %val = call <2 x double> @llvm.arm.neon.vpmaxs.v2f64(<2 x double> %lhs, <2 x double> %rhs) -; CHECK: fmaxp v0.2d, v0.2d, v1.2d - ret <2 x double> %val -} - -declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.arm.neon.vpmins.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.arm.neon.vpmins.v2f64(<2 x double>, <2 x double>) - -define <2 x float> @test_fminp_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; CHECK: test_fminp_v2f32: - %val = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %lhs, <2 x float> %rhs) -; CHECK: fminp v0.2s, v0.2s, v1.2s - ret <2 x float> %val -} - -define <4 x float> @test_fminp_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; CHECK: test_fminp_v4f32: - %val = call <4 x float> @llvm.arm.neon.vpmins.v4f32(<4 x float> %lhs, <4 x float> %rhs) -; CHECK: fminp v0.4s, v0.4s, v1.4s - ret <4 x float> %val -} - -define <2 x double> @test_fminp_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; CHECK: test_fminp_v2f64: - %val = call <2 x double> @llvm.arm.neon.vpmins.v2f64(<2 x double> %lhs, <2 x double> %rhs) -; CHECK: fminp v0.2d, v0.2d, v1.2d - ret <2 x double> %val -} - -declare <2 x float> @llvm.aarch64.neon.vpmaxnm.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.aarch64.neon.vpmaxnm.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.aarch64.neon.vpmaxnm.v2f64(<2 x double>, <2 x double>) - -define <2 x float> @test_fmaxnmp_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; CHECK: test_fmaxnmp_v2f32: - %val = call <2 x float> @llvm.aarch64.neon.vpmaxnm.v2f32(<2 x float> %lhs, <2 x float> %rhs) -; CHECK: fmaxnmp v0.2s, v0.2s, v1.2s - ret <2 x float> %val -} - -define <4 x float> @test_fmaxnmp_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; CHECK: test_fmaxnmp_v4f32: - %val = call <4 x float> @llvm.aarch64.neon.vpmaxnm.v4f32(<4 x float> %lhs, <4 x float> %rhs) -; CHECK: fmaxnmp v0.4s, v0.4s, v1.4s - ret <4 x float> %val -} - -define <2 x double> @test_fmaxnmp_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; CHECK: test_fmaxnmp_v2f64: - %val = call <2 x double> @llvm.aarch64.neon.vpmaxnm.v2f64(<2 x double> %lhs, <2 x double> %rhs) -; CHECK: fmaxnmp v0.2d, v0.2d, v1.2d - ret <2 x double> %val -} - -declare <2 x float> @llvm.aarch64.neon.vpminnm.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.aarch64.neon.vpminnm.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.aarch64.neon.vpminnm.v2f64(<2 x double>, <2 x double>) - -define <2 x float> @test_fminnmp_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; CHECK: test_fminnmp_v2f32: - %val = call <2 x float> @llvm.aarch64.neon.vpminnm.v2f32(<2 x float> %lhs, <2 x float> %rhs) -; CHECK: fminnmp v0.2s, v0.2s, v1.2s - ret <2 x float> %val -} - -define <4 x float> @test_fminnmp_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; CHECK: test_fminnmp_v4f32: - %val = call <4 x float> @llvm.aarch64.neon.vpminnm.v4f32(<4 x float> %lhs, <4 x float> %rhs) -; CHECK: fminnmp v0.4s, v0.4s, v1.4s - ret <4 x float> %val -} - -define <2 x double> @test_fminnmp_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; CHECK: test_fminnmp_v2f64: - %val = call <2 x double> @llvm.aarch64.neon.vpminnm.v2f64(<2 x double> %lhs, <2 x double> %rhs) -; CHECK: fminnmp v0.2d, v0.2d, v1.2d - ret <2 x double> %val -} - -define i32 @test_vminv_s32(<2 x i32> %a) { -; CHECK-LABEL: test_vminv_s32 -; CHECK: sminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %1 = tail call <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v2i32(<2 x i32> %a) - %2 = extractelement <1 x i32> %1, i32 0 - ret i32 %2 -} - -define i32 @test_vminv_u32(<2 x i32> %a) { -; CHECK-LABEL: test_vminv_u32 -; CHECK: uminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %1 = tail call <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v2i32(<2 x i32> %a) - %2 = extractelement <1 x i32> %1, i32 0 - ret i32 %2 -} - -define i32 @test_vmaxv_s32(<2 x i32> %a) { -; CHECK-LABEL: test_vmaxv_s32 -; CHECK: smaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %1 = tail call <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v2i32(<2 x i32> %a) - %2 = extractelement <1 x i32> %1, i32 0 - ret i32 %2 -} - -define i32 @test_vmaxv_u32(<2 x i32> %a) { -; CHECK-LABEL: test_vmaxv_u32 -; CHECK: umaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %1 = tail call <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v2i32(<2 x i32> %a) - %2 = extractelement <1 x i32> %1, i32 0 - ret i32 %2 -} - -declare <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v2i32(<2 x i32>) -declare <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v2i32(<2 x i32>) -declare <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v2i32(<2 x i32>) -declare <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v2i32(<2 x i32>) diff --git a/test/CodeGen/AArch64/neon-max-min.ll b/test/CodeGen/AArch64/neon-max-min.ll deleted file mode 100644 index f9a50f4e5d72..000000000000 --- a/test/CodeGen/AArch64/neon-max-min.ll +++ /dev/null @@ -1,311 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; These duplicate tests in arm64's vmax.ll - -declare <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_smax_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; Using registers other than v0, v1 are possible, but would be odd. -; CHECK: test_smax_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: smax v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_umax_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { - %tmp1 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: umax v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_smax_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_smax_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: smax v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_umax_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_umax_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: umax v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_smax_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_smax_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: smax v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_umax_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_umax_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: umax v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - - -declare <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_smax_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_smax_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: smax v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_umax_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_umax_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: umax v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - - -declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_smax_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_smax_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: smax v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_umax_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_umax_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: umax v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_smax_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_smax_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: smax v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_umax_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_umax_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: umax v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -declare <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_smin_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; Using registers other than v0, v1 are possible, but would be odd. -; CHECK: test_smin_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: smin v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_umin_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { - %tmp1 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: umin v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_smin_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_smin_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: smin v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_umin_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_umin_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: umin v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_smin_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_smin_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: smin v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_umin_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_umin_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: umin v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - - -declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_smin_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_smin_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: smin v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_umin_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_umin_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: umin v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - - -declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_smin_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_smin_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: smin v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_umin_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_umin_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: umin v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_smin_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_smin_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: smin v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_umin_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_umin_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: umin v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.arm.neon.vmaxs.v2f64(<2 x double>, <2 x double>) - -define <2 x float> @test_fmax_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; CHECK: test_fmax_v2f32: - %val = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %lhs, <2 x float> %rhs) -; CHECK: fmax v0.2s, v0.2s, v1.2s - ret <2 x float> %val -} - -define <4 x float> @test_fmax_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; CHECK: test_fmax_v4f32: - %val = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %lhs, <4 x float> %rhs) -; CHECK: fmax v0.4s, v0.4s, v1.4s - ret <4 x float> %val -} - -define <2 x double> @test_fmax_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; CHECK: test_fmax_v2f64: - %val = call <2 x double> @llvm.arm.neon.vmaxs.v2f64(<2 x double> %lhs, <2 x double> %rhs) -; CHECK: fmax v0.2d, v0.2d, v1.2d - ret <2 x double> %val -} - -declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.arm.neon.vmins.v2f64(<2 x double>, <2 x double>) - -define <2 x float> @test_fmin_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; CHECK: test_fmin_v2f32: - %val = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %lhs, <2 x float> %rhs) -; CHECK: fmin v0.2s, v0.2s, v1.2s - ret <2 x float> %val -} - -define <4 x float> @test_fmin_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; CHECK: test_fmin_v4f32: - %val = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %lhs, <4 x float> %rhs) -; CHECK: fmin v0.4s, v0.4s, v1.4s - ret <4 x float> %val -} - -define <2 x double> @test_fmin_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; CHECK: test_fmin_v2f64: - %val = call <2 x double> @llvm.arm.neon.vmins.v2f64(<2 x double> %lhs, <2 x double> %rhs) -; CHECK: fmin v0.2d, v0.2d, v1.2d - ret <2 x double> %val -} - - -declare <2 x float> @llvm.aarch64.neon.vmaxnm.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.aarch64.neon.vmaxnm.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.aarch64.neon.vmaxnm.v2f64(<2 x double>, <2 x double>) - -define <2 x float> @test_fmaxnm_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; CHECK: test_fmaxnm_v2f32: - %val = call <2 x float> @llvm.aarch64.neon.vmaxnm.v2f32(<2 x float> %lhs, <2 x float> %rhs) -; CHECK: fmaxnm v0.2s, v0.2s, v1.2s - ret <2 x float> %val -} - -define <4 x float> @test_fmaxnm_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; CHECK: test_fmaxnm_v4f32: - %val = call <4 x float> @llvm.aarch64.neon.vmaxnm.v4f32(<4 x float> %lhs, <4 x float> %rhs) -; CHECK: fmaxnm v0.4s, v0.4s, v1.4s - ret <4 x float> %val -} - -define <2 x double> @test_fmaxnm_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; CHECK: test_fmaxnm_v2f64: - %val = call <2 x double> @llvm.aarch64.neon.vmaxnm.v2f64(<2 x double> %lhs, <2 x double> %rhs) -; CHECK: fmaxnm v0.2d, v0.2d, v1.2d - ret <2 x double> %val -} - -declare <2 x float> @llvm.aarch64.neon.vminnm.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.aarch64.neon.vminnm.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.aarch64.neon.vminnm.v2f64(<2 x double>, <2 x double>) - -define <2 x float> @test_fminnm_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; CHECK: test_fminnm_v2f32: - %val = call <2 x float> @llvm.aarch64.neon.vminnm.v2f32(<2 x float> %lhs, <2 x float> %rhs) -; CHECK: fminnm v0.2s, v0.2s, v1.2s - ret <2 x float> %val -} - -define <4 x float> @test_fminnm_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; CHECK: test_fminnm_v4f32: - %val = call <4 x float> @llvm.aarch64.neon.vminnm.v4f32(<4 x float> %lhs, <4 x float> %rhs) -; CHECK: fminnm v0.4s, v0.4s, v1.4s - ret <4 x float> %val -} - -define <2 x double> @test_fminnm_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; CHECK: test_fminnm_v2f64: - %val = call <2 x double> @llvm.aarch64.neon.vminnm.v2f64(<2 x double> %lhs, <2 x double> %rhs) -; CHECK: fminnm v0.2d, v0.2d, v1.2d - ret <2 x double> %val -} diff --git a/test/CodeGen/AArch64/neon-misc-scalar.ll b/test/CodeGen/AArch64/neon-misc-scalar.ll deleted file mode 100644 index 3472c5f07bc7..000000000000 --- a/test/CodeGen/AArch64/neon-misc-scalar.ll +++ /dev/null @@ -1,61 +0,0 @@ -;RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 already has copies of these tests (scattered). - -declare <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64>) - -declare <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64>) - -declare <1 x i64> @llvm.arm.neon.vabs.v1i64(<1 x i64>) - -declare <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64>, <1 x i64>) - -declare <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_vuqadd_s64(<1 x i64> %a, <1 x i64> %b) { -entry: - ; CHECK: test_vuqadd_s64 - %vuqadd2.i = tail call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %a, <1 x i64> %b) - ; CHECK: suqadd d{{[0-9]+}}, d{{[0-9]+}} - ret <1 x i64> %vuqadd2.i -} - -define <1 x i64> @test_vsqadd_u64(<1 x i64> %a, <1 x i64> %b) { -entry: - ; CHECK: test_vsqadd_u64 - %vsqadd2.i = tail call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %a, <1 x i64> %b) - ; CHECK: usqadd d{{[0-9]+}}, d{{[0-9]+}} - ret <1 x i64> %vsqadd2.i -} - -define <1 x i64> @test_vabs_s64(<1 x i64> %a) { - ; CHECK: test_vabs_s64 -entry: - %vabs1.i = tail call <1 x i64> @llvm.arm.neon.vabs.v1i64(<1 x i64> %a) - ; CHECK: abs d{{[0-9]+}}, d{{[0-9]+}} - ret <1 x i64> %vabs1.i -} - -define <1 x i64> @test_vqabs_s64(<1 x i64> %a) { - ; CHECK: test_vqabs_s64 -entry: - %vqabs1.i = tail call <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64> %a) - ; CHECK: sqabs d{{[0-9]+}}, d{{[0-9]+}} - ret <1 x i64> %vqabs1.i -} - -define <1 x i64> @test_vqneg_s64(<1 x i64> %a) { - ; CHECK: test_vqneg_s64 -entry: - %vqneg1.i = tail call <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64> %a) - ; CHECK: sqneg d{{[0-9]+}}, d{{[0-9]+}} - ret <1 x i64> %vqneg1.i -} - -define <1 x i64> @test_vneg_s64(<1 x i64> %a) { - ; CHECK: test_vneg_s64 -entry: - %sub.i = sub <1 x i64> zeroinitializer, %a - ; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}} - ret <1 x i64> %sub.i -} - diff --git a/test/CodeGen/AArch64/neon-misc.ll b/test/CodeGen/AArch64/neon-misc.ll deleted file mode 100644 index 5682f103e93c..000000000000 --- a/test/CodeGen/AArch64/neon-misc.ll +++ /dev/null @@ -1,2014 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s -; arm64 has a separate copy of these in aarch64-neon-misc.ll due to different intrinsics. - -define <8 x i8> @test_vrev16_s8(<8 x i8> %a) #0 { -; CHECK: rev16 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> - ret <8 x i8> %shuffle.i -} - -define <16 x i8> @test_vrev16q_s8(<16 x i8> %a) #0 { -; CHECK: rev16 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> - ret <16 x i8> %shuffle.i -} - -define <8 x i8> @test_vrev32_s8(<8 x i8> %a) #0 { -; CHECK: rev32 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> - ret <8 x i8> %shuffle.i -} - -define <4 x i16> @test_vrev32_s16(<4 x i16> %a) #0 { -; CHECK: rev32 v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> - ret <4 x i16> %shuffle.i -} - -define <16 x i8> @test_vrev32q_s8(<16 x i8> %a) #0 { -; CHECK: rev32 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> - ret <16 x i8> %shuffle.i -} - -define <8 x i16> @test_vrev32q_s16(<8 x i16> %a) #0 { -; CHECK: rev32 v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> - ret <8 x i16> %shuffle.i -} - -define <8 x i8> @test_vrev64_s8(<8 x i8> %a) #0 { -; CHECK: rev64 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> - ret <8 x i8> %shuffle.i -} - -define <4 x i16> @test_vrev64_s16(<4 x i16> %a) #0 { -; CHECK: rev64 v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> - ret <4 x i16> %shuffle.i -} - -define <2 x i32> @test_vrev64_s32(<2 x i32> %a) #0 { -; CHECK: rev64 v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> undef, <2 x i32> - ret <2 x i32> %shuffle.i -} - -define <2 x float> @test_vrev64_f32(<2 x float> %a) #0 { -; CHECK: rev64 v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %shuffle.i = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> - ret <2 x float> %shuffle.i -} - -define <16 x i8> @test_vrev64q_s8(<16 x i8> %a) #0 { -; CHECK: rev64 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> - ret <16 x i8> %shuffle.i -} - -define <8 x i16> @test_vrev64q_s16(<8 x i16> %a) #0 { -; CHECK: rev64 v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> - ret <8 x i16> %shuffle.i -} - -define <4 x i32> @test_vrev64q_s32(<4 x i32> %a) #0 { -; CHECK: rev64 v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> - ret <4 x i32> %shuffle.i -} - -define <4 x float> @test_vrev64q_f32(<4 x float> %a) #0 { -; CHECK: rev64 v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> - ret <4 x float> %shuffle.i -} - -define <4 x i16> @test_vpaddl_s8(<8 x i8> %a) #0 { -; CHECK: saddlp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b - %vpaddl.i = tail call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %a) #4 - ret <4 x i16> %vpaddl.i -} - -define <2 x i32> @test_vpaddl_s16(<4 x i16> %a) #0 { -; CHECK: saddlp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h - %vpaddl1.i = tail call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %a) #4 - ret <2 x i32> %vpaddl1.i -} - -define <1 x i64> @test_vpaddl_s32(<2 x i32> %a) #0 { -; CHECK: saddlp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s - %vpaddl1.i = tail call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %a) #4 - ret <1 x i64> %vpaddl1.i -} - -define <4 x i16> @test_vpaddl_u8(<8 x i8> %a) #0 { -; CHECK: uaddlp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b - %vpaddl.i = tail call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %a) #4 - ret <4 x i16> %vpaddl.i -} - -define <2 x i32> @test_vpaddl_u16(<4 x i16> %a) #0 { -; CHECK: uaddlp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h - %vpaddl1.i = tail call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %a) #4 - ret <2 x i32> %vpaddl1.i -} - -define <1 x i64> @test_vpaddl_u32(<2 x i32> %a) #0 { -; CHECK: uaddlp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s - %vpaddl1.i = tail call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %a) #4 - ret <1 x i64> %vpaddl1.i -} - -define <8 x i16> @test_vpaddlq_s8(<16 x i8> %a) #0 { -; CHECK: saddlp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b - %vpaddl.i = tail call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %a) #4 - ret <8 x i16> %vpaddl.i -} - -define <4 x i32> @test_vpaddlq_s16(<8 x i16> %a) #0 { -; CHECK: saddlp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h - %vpaddl1.i = tail call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %a) #4 - ret <4 x i32> %vpaddl1.i -} - -define <2 x i64> @test_vpaddlq_s32(<4 x i32> %a) #0 { -; CHECK: saddlp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s - %vpaddl1.i = tail call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %a) #4 - ret <2 x i64> %vpaddl1.i -} - -define <8 x i16> @test_vpaddlq_u8(<16 x i8> %a) #0 { -; CHECK: uaddlp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b - %vpaddl.i = tail call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %a) #4 - ret <8 x i16> %vpaddl.i -} - -define <4 x i32> @test_vpaddlq_u16(<8 x i16> %a) #0 { -; CHECK: uaddlp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h - %vpaddl1.i = tail call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %a) #4 - ret <4 x i32> %vpaddl1.i -} - -define <2 x i64> @test_vpaddlq_u32(<4 x i32> %a) #0 { -; CHECK: uaddlp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s - %vpaddl1.i = tail call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %a) #4 - ret <2 x i64> %vpaddl1.i -} - -define <4 x i16> @test_vpadal_s8(<4 x i16> %a, <8 x i8> %b) #0 { -; CHECK: sadalp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b - %vpadal1.i = tail call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b) #4 - ret <4 x i16> %vpadal1.i -} - -define <2 x i32> @test_vpadal_s16(<2 x i32> %a, <4 x i16> %b) #0 { -; CHECK: sadalp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h - %vpadal2.i = tail call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b) #4 - ret <2 x i32> %vpadal2.i -} - -define <1 x i64> @test_vpadal_s32(<1 x i64> %a, <2 x i32> %b) #0 { -; CHECK: sadalp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s - %vpadal2.i = tail call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b) #4 - ret <1 x i64> %vpadal2.i -} - -define <4 x i16> @test_vpadal_u8(<4 x i16> %a, <8 x i8> %b) #0 { -; CHECK: uadalp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b - %vpadal1.i = tail call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b) #4 - ret <4 x i16> %vpadal1.i -} - -define <2 x i32> @test_vpadal_u16(<2 x i32> %a, <4 x i16> %b) #0 { -; CHECK: uadalp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h - %vpadal2.i = tail call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b) #4 - ret <2 x i32> %vpadal2.i -} - -define <1 x i64> @test_vpadal_u32(<1 x i64> %a, <2 x i32> %b) #0 { -; CHECK: uadalp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s - %vpadal2.i = tail call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b) #4 - ret <1 x i64> %vpadal2.i -} - -define <8 x i16> @test_vpadalq_s8(<8 x i16> %a, <16 x i8> %b) #0 { -; CHECK: sadalp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b - %vpadal1.i = tail call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b) #4 - ret <8 x i16> %vpadal1.i -} - -define <4 x i32> @test_vpadalq_s16(<4 x i32> %a, <8 x i16> %b) #0 { -; CHECK: sadalp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h - %vpadal2.i = tail call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b) #4 - ret <4 x i32> %vpadal2.i -} - -define <2 x i64> @test_vpadalq_s32(<2 x i64> %a, <4 x i32> %b) #0 { -; CHECK: sadalp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s - %vpadal2.i = tail call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b) #4 - ret <2 x i64> %vpadal2.i -} - -define <8 x i16> @test_vpadalq_u8(<8 x i16> %a, <16 x i8> %b) #0 { -; CHECK: uadalp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b - %vpadal1.i = tail call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b) #4 - ret <8 x i16> %vpadal1.i -} - -define <4 x i32> @test_vpadalq_u16(<4 x i32> %a, <8 x i16> %b) #0 { -; CHECK: uadalp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h - %vpadal2.i = tail call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b) #4 - ret <4 x i32> %vpadal2.i -} - -define <2 x i64> @test_vpadalq_u32(<2 x i64> %a, <4 x i32> %b) #0 { -; CHECK: uadalp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s - %vpadal2.i = tail call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b) #4 - ret <2 x i64> %vpadal2.i -} - -define <8 x i8> @test_vqabs_s8(<8 x i8> %a) #0 { -; CHECK: sqabs v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %vqabs.i = tail call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %a) #4 - ret <8 x i8> %vqabs.i -} - -define <16 x i8> @test_vqabsq_s8(<16 x i8> %a) #0 { -; CHECK: sqabs v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %vqabs.i = tail call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %a) #4 - ret <16 x i8> %vqabs.i -} - -define <4 x i16> @test_vqabs_s16(<4 x i16> %a) #0 { -; CHECK: sqabs v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %vqabs1.i = tail call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %a) #4 - ret <4 x i16> %vqabs1.i -} - -define <8 x i16> @test_vqabsq_s16(<8 x i16> %a) #0 { -; CHECK: sqabs v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %vqabs1.i = tail call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %a) #4 - ret <8 x i16> %vqabs1.i -} - -define <2 x i32> @test_vqabs_s32(<2 x i32> %a) #0 { -; CHECK: sqabs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vqabs1.i = tail call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %a) #4 - ret <2 x i32> %vqabs1.i -} - -define <4 x i32> @test_vqabsq_s32(<4 x i32> %a) #0 { -; CHECK: sqabs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vqabs1.i = tail call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %a) #4 - ret <4 x i32> %vqabs1.i -} - -define <2 x i64> @test_vqabsq_s64(<2 x i64> %a) #0 { -; CHECK: sqabs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vqabs1.i = tail call <2 x i64> @llvm.arm.neon.vqabs.v2i64(<2 x i64> %a) #4 - ret <2 x i64> %vqabs1.i -} - -define <8 x i8> @test_vqneg_s8(<8 x i8> %a) #0 { -; CHECK: sqneg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %vqneg.i = tail call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %a) #4 - ret <8 x i8> %vqneg.i -} - -define <16 x i8> @test_vqnegq_s8(<16 x i8> %a) #0 { -; CHECK: sqneg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %vqneg.i = tail call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %a) #4 - ret <16 x i8> %vqneg.i -} - -define <4 x i16> @test_vqneg_s16(<4 x i16> %a) #0 { -; CHECK: sqneg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %vqneg1.i = tail call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %a) #4 - ret <4 x i16> %vqneg1.i -} - -define <8 x i16> @test_vqnegq_s16(<8 x i16> %a) #0 { -; CHECK: sqneg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %vqneg1.i = tail call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %a) #4 - ret <8 x i16> %vqneg1.i -} - -define <2 x i32> @test_vqneg_s32(<2 x i32> %a) #0 { -; CHECK: sqneg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vqneg1.i = tail call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %a) #4 - ret <2 x i32> %vqneg1.i -} - -define <4 x i32> @test_vqnegq_s32(<4 x i32> %a) #0 { -; CHECK: sqneg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vqneg1.i = tail call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %a) #4 - ret <4 x i32> %vqneg1.i -} - -define <2 x i64> @test_vqnegq_s64(<2 x i64> %a) #0 { -; CHECK: sqneg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vqneg1.i = tail call <2 x i64> @llvm.arm.neon.vqneg.v2i64(<2 x i64> %a) #4 - ret <2 x i64> %vqneg1.i -} - -define <8 x i8> @test_vneg_s8(<8 x i8> %a) #0 { -; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %sub.i = sub <8 x i8> zeroinitializer, %a - ret <8 x i8> %sub.i -} - -define <16 x i8> @test_vnegq_s8(<16 x i8> %a) #0 { -; CHECK: neg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %sub.i = sub <16 x i8> zeroinitializer, %a - ret <16 x i8> %sub.i -} - -define <4 x i16> @test_vneg_s16(<4 x i16> %a) #0 { -; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %sub.i = sub <4 x i16> zeroinitializer, %a - ret <4 x i16> %sub.i -} - -define <8 x i16> @test_vnegq_s16(<8 x i16> %a) #0 { -; CHECK: neg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %sub.i = sub <8 x i16> zeroinitializer, %a - ret <8 x i16> %sub.i -} - -define <2 x i32> @test_vneg_s32(<2 x i32> %a) #0 { -; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %sub.i = sub <2 x i32> zeroinitializer, %a - ret <2 x i32> %sub.i -} - -define <4 x i32> @test_vnegq_s32(<4 x i32> %a) #0 { -; CHECK: neg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %sub.i = sub <4 x i32> zeroinitializer, %a - ret <4 x i32> %sub.i -} - -define <2 x i64> @test_vnegq_s64(<2 x i64> %a) #0 { -; CHECK: neg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %sub.i = sub <2 x i64> zeroinitializer, %a - ret <2 x i64> %sub.i -} - -define <2 x float> @test_vneg_f32(<2 x float> %a) #0 { -; CHECK: fneg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %sub.i = fsub <2 x float> , %a - ret <2 x float> %sub.i -} - -define <4 x float> @test_vnegq_f32(<4 x float> %a) #0 { -; CHECK: fneg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %sub.i = fsub <4 x float> , %a - ret <4 x float> %sub.i -} - -define <2 x double> @test_vnegq_f64(<2 x double> %a) #0 { -; CHECK: fneg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %sub.i = fsub <2 x double> , %a - ret <2 x double> %sub.i -} - -define <8 x i8> @test_vabs_s8(<8 x i8> %a) #0 { -; CHECK: abs v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %vabs.i = tail call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %a) #4 - ret <8 x i8> %vabs.i -} - -define <16 x i8> @test_vabsq_s8(<16 x i8> %a) #0 { -; CHECK: abs v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %vabs.i = tail call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %a) #4 - ret <16 x i8> %vabs.i -} - -define <4 x i16> @test_vabs_s16(<4 x i16> %a) #0 { -; CHECK: abs v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %vabs1.i = tail call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %a) #4 - ret <4 x i16> %vabs1.i -} - -define <8 x i16> @test_vabsq_s16(<8 x i16> %a) #0 { -; CHECK: abs v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %vabs1.i = tail call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %a) #4 - ret <8 x i16> %vabs1.i -} - -define <2 x i32> @test_vabs_s32(<2 x i32> %a) #0 { -; CHECK: abs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vabs1.i = tail call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %a) #4 - ret <2 x i32> %vabs1.i -} - -define <4 x i32> @test_vabsq_s32(<4 x i32> %a) #0 { -; CHECK: abs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vabs1.i = tail call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %a) #4 - ret <4 x i32> %vabs1.i -} - -define <2 x i64> @test_vabsq_s64(<2 x i64> %a) #0 { -; CHECK: abs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vabs1.i = tail call <2 x i64> @llvm.arm.neon.vabs.v2i64(<2 x i64> %a) #4 - ret <2 x i64> %vabs1.i -} - -define <2 x float> @test_vabs_f32(<2 x float> %a) #1 { -; CHECK: fabs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vabs1.i = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> %a) #4 - ret <2 x float> %vabs1.i -} - -define <4 x float> @test_vabsq_f32(<4 x float> %a) #1 { -; CHECK: fabs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vabs1.i = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) #4 - ret <4 x float> %vabs1.i -} - -define <2 x double> @test_vabsq_f64(<2 x double> %a) #1 { -; CHECK: fabs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vabs1.i = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %a) #4 - ret <2 x double> %vabs1.i -} - -define <8 x i8> @test_vuqadd_s8(<8 x i8> %a, <8 x i8> %b) #0 { -; CHECK: suqadd v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %vuqadd.i = tail call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 - ret <8 x i8> %vuqadd.i -} - -define <16 x i8> @test_vuqaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 { -; CHECK: suqadd v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %vuqadd.i = tail call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4 - ret <16 x i8> %vuqadd.i -} - -define <4 x i16> @test_vuqadd_s16(<4 x i16> %a, <4 x i16> %b) #0 { -; CHECK: suqadd v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %vuqadd2.i = tail call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %a, <4 x i16> %b) #4 - ret <4 x i16> %vuqadd2.i -} - -define <8 x i16> @test_vuqaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 { -; CHECK: suqadd v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %vuqadd2.i = tail call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %a, <8 x i16> %b) #4 - ret <8 x i16> %vuqadd2.i -} - -define <2 x i32> @test_vuqadd_s32(<2 x i32> %a, <2 x i32> %b) #0 { -; CHECK: suqadd v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vuqadd2.i = tail call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %a, <2 x i32> %b) #4 - ret <2 x i32> %vuqadd2.i -} - -define <4 x i32> @test_vuqaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 { -; CHECK: suqadd v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vuqadd2.i = tail call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %a, <4 x i32> %b) #4 - ret <4 x i32> %vuqadd2.i -} - -define <2 x i64> @test_vuqaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 { -; CHECK: suqadd v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vuqadd2.i = tail call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %a, <2 x i64> %b) #4 - ret <2 x i64> %vuqadd2.i -} - -define <8 x i8> @test_vcls_s8(<8 x i8> %a) #0 { -; CHECK: cls v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %vcls.i = tail call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %a) #4 - ret <8 x i8> %vcls.i -} - -define <16 x i8> @test_vclsq_s8(<16 x i8> %a) #0 { -; CHECK: cls v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %vcls.i = tail call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %a) #4 - ret <16 x i8> %vcls.i -} - -define <4 x i16> @test_vcls_s16(<4 x i16> %a) #0 { -; CHECK: cls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %vcls1.i = tail call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %a) #4 - ret <4 x i16> %vcls1.i -} - -define <8 x i16> @test_vclsq_s16(<8 x i16> %a) #0 { -; CHECK: cls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %vcls1.i = tail call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %a) #4 - ret <8 x i16> %vcls1.i -} - -define <2 x i32> @test_vcls_s32(<2 x i32> %a) #0 { -; CHECK: cls v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcls1.i = tail call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %a) #4 - ret <2 x i32> %vcls1.i -} - -define <4 x i32> @test_vclsq_s32(<4 x i32> %a) #0 { -; CHECK: cls v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcls1.i = tail call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %a) #4 - ret <4 x i32> %vcls1.i -} - -define <8 x i8> @test_vclz_s8(<8 x i8> %a) #0 { -; CHECK: clz v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %vclz.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #4 - ret <8 x i8> %vclz.i -} - -define <16 x i8> @test_vclzq_s8(<16 x i8> %a) #0 { -; CHECK: clz v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %vclz.i = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #4 - ret <16 x i8> %vclz.i -} - -define <4 x i16> @test_vclz_s16(<4 x i16> %a) #0 { -; CHECK: clz v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %vclz1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) #4 - ret <4 x i16> %vclz1.i -} - -define <8 x i16> @test_vclzq_s16(<8 x i16> %a) #0 { -; CHECK: clz v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %vclz1.i = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) #4 - ret <8 x i16> %vclz1.i -} - -define <2 x i32> @test_vclz_s32(<2 x i32> %a) #0 { -; CHECK: clz v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vclz1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) #4 - ret <2 x i32> %vclz1.i -} - -define <4 x i32> @test_vclzq_s32(<4 x i32> %a) #0 { -; CHECK: clz v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vclz1.i = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) #4 - ret <4 x i32> %vclz1.i -} - -define <8 x i8> @test_vcnt_s8(<8 x i8> %a) #0 { -; CHECK: cnt v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %vctpop.i = tail call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4 - ret <8 x i8> %vctpop.i -} - -define <16 x i8> @test_vcntq_s8(<16 x i8> %a) #0 { -; CHECK: cnt v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %vctpop.i = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4 - ret <16 x i8> %vctpop.i -} - -define <8 x i8> @test_vmvn_s8(<8 x i8> %a) #0 { -; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %neg.i = xor <8 x i8> %a, - ret <8 x i8> %neg.i -} - -define <16 x i8> @test_vmvnq_s8(<16 x i8> %a) #0 { -; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %neg.i = xor <16 x i8> %a, - ret <16 x i8> %neg.i -} - -define <4 x i16> @test_vmvn_s16(<4 x i16> %a) #0 { -; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %neg.i = xor <4 x i16> %a, - ret <4 x i16> %neg.i -} - -define <8 x i16> @test_vmvnq_s16(<8 x i16> %a) #0 { -; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %neg.i = xor <8 x i16> %a, - ret <8 x i16> %neg.i -} - -define <2 x i32> @test_vmvn_s32(<2 x i32> %a) #0 { -; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %neg.i = xor <2 x i32> %a, - ret <2 x i32> %neg.i -} - -define <4 x i32> @test_vmvnq_s32(<4 x i32> %a) #0 { -; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %neg.i = xor <4 x i32> %a, - ret <4 x i32> %neg.i -} - -define <8 x i8> @test_vrbit_s8(<8 x i8> %a) #0 { -; CHECK: rbit v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %vrbit.i = tail call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a) #4 - ret <8 x i8> %vrbit.i -} - -define <16 x i8> @test_vrbitq_s8(<16 x i8> %a) #0 { -; CHECK: rbit v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %vrbit.i = tail call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a) #4 - ret <16 x i8> %vrbit.i -} - -define <8 x i8> @test_vmovn_s16(<8 x i16> %a) #0 { -; CHECK: xtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h - %vmovn.i = trunc <8 x i16> %a to <8 x i8> - ret <8 x i8> %vmovn.i -} - -define <4 x i16> @test_vmovn_s32(<4 x i32> %a) #0 { -; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s - %vmovn.i = trunc <4 x i32> %a to <4 x i16> - ret <4 x i16> %vmovn.i -} - -define <2 x i32> @test_vmovn_s64(<2 x i64> %a) #0 { -; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vmovn.i = trunc <2 x i64> %a to <2 x i32> - ret <2 x i32> %vmovn.i -} - -define <16 x i8> @test_vmovn_high_s16(<8 x i8> %a, <8 x i16> %b) #0 { -; CHECK: xtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h - %vmovn.i.i = trunc <8 x i16> %b to <8 x i8> - %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vmovn.i.i, <16 x i32> - ret <16 x i8> %shuffle.i -} - -define <8 x i16> @test_vmovn_high_s32(<4 x i16> %a, <4 x i32> %b) #0 { -; CHECK: xtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s - %vmovn.i.i = trunc <4 x i32> %b to <4 x i16> - %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vmovn.i.i, <8 x i32> - ret <8 x i16> %shuffle.i -} - -define <4 x i32> @test_vmovn_high_s64(<2 x i32> %a, <2 x i64> %b) #0 { -; CHECK: xtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d - %vmovn.i.i = trunc <2 x i64> %b to <2 x i32> - %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vmovn.i.i, <4 x i32> - ret <4 x i32> %shuffle.i -} - -define <8 x i8> @test_vqmovun_s16(<8 x i16> %a) #0 { -; CHECK: sqxtun v{{[0-9]+}}.8b, v{{[0-9]+}}.8h - %vqdmull1.i = tail call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %a) #4 - ret <8 x i8> %vqdmull1.i -} - -define <4 x i16> @test_vqmovun_s32(<4 x i32> %a) #0 { -; CHECK: sqxtun v{{[0-9]+}}.4h, v{{[0-9]+}}.4s - %vqdmull1.i = tail call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %a) #4 - ret <4 x i16> %vqdmull1.i -} - -define <2 x i32> @test_vqmovun_s64(<2 x i64> %a) #0 { -; CHECK: sqxtun v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vqdmull1.i = tail call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %a) #4 - ret <2 x i32> %vqdmull1.i -} - -define <16 x i8> @test_vqmovun_high_s16(<8 x i8> %a, <8 x i16> %b) #0 { -; CHECK: sqxtun2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h - %vqdmull1.i.i = tail call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %b) #4 - %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqdmull1.i.i, <16 x i32> - ret <16 x i8> %shuffle.i -} - -define <8 x i16> @test_vqmovun_high_s32(<4 x i16> %a, <4 x i32> %b) #0 { -; CHECK: sqxtun2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s - %vqdmull1.i.i = tail call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %b) #4 - %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqdmull1.i.i, <8 x i32> - ret <8 x i16> %shuffle.i -} - -define <4 x i32> @test_vqmovun_high_s64(<2 x i32> %a, <2 x i64> %b) #0 { -; CHECK: sqxtun2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d - %vqdmull1.i.i = tail call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %b) #4 - %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqdmull1.i.i, <4 x i32> - ret <4 x i32> %shuffle.i -} - -define <8 x i8> @test_vqmovn_s16(<8 x i16> %a) #0 { -; CHECK: sqxtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h - %vqmovn1.i = tail call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %a) #4 - ret <8 x i8> %vqmovn1.i -} - -define <4 x i16> @test_vqmovn_s32(<4 x i32> %a) #0 { -; CHECK: sqxtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s - %vqmovn1.i = tail call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %a) #4 - ret <4 x i16> %vqmovn1.i -} - -define <2 x i32> @test_vqmovn_s64(<2 x i64> %a) #0 { -; CHECK: sqxtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vqmovn1.i = tail call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %a) #4 - ret <2 x i32> %vqmovn1.i -} - -define <16 x i8> @test_vqmovn_high_s16(<8 x i8> %a, <8 x i16> %b) #0 { -; CHECK: sqxtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h - %vqmovn1.i.i = tail call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %b) #4 - %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqmovn1.i.i, <16 x i32> - ret <16 x i8> %shuffle.i -} - -define <8 x i16> @test_vqmovn_high_s32(<4 x i16> %a, <4 x i32> %b) #0 { -; CHECK: test_vqmovn_high_s32 - %vqmovn1.i.i = tail call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %b) #4 - %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqmovn1.i.i, <8 x i32> - ret <8 x i16> %shuffle.i -} - -define <4 x i32> @test_vqmovn_high_s64(<2 x i32> %a, <2 x i64> %b) #0 { -; CHECK: test_vqmovn_high_s64 - %vqmovn1.i.i = tail call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %b) #4 - %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqmovn1.i.i, <4 x i32> - ret <4 x i32> %shuffle.i -} - -define <8 x i8> @test_vqmovn_u16(<8 x i16> %a) #0 { -; CHECK: uqxtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h - %vqmovn1.i = tail call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %a) #4 - ret <8 x i8> %vqmovn1.i -} - -define <4 x i16> @test_vqmovn_u32(<4 x i32> %a) #0 { -; CHECK: uqxtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s - %vqmovn1.i = tail call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %a) #4 - ret <4 x i16> %vqmovn1.i -} - -define <2 x i32> @test_vqmovn_u64(<2 x i64> %a) #0 { -; CHECK: uqxtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vqmovn1.i = tail call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %a) #4 - ret <2 x i32> %vqmovn1.i -} - -define <16 x i8> @test_vqmovn_high_u16(<8 x i8> %a, <8 x i16> %b) #0 { -; CHECK: uqxtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h - %vqmovn1.i.i = tail call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %b) #4 - %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqmovn1.i.i, <16 x i32> - ret <16 x i8> %shuffle.i -} - -define <8 x i16> @test_vqmovn_high_u32(<4 x i16> %a, <4 x i32> %b) #0 { -; CHECK: uqxtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s - %vqmovn1.i.i = tail call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %b) #4 - %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqmovn1.i.i, <8 x i32> - ret <8 x i16> %shuffle.i -} - -define <4 x i32> @test_vqmovn_high_u64(<2 x i32> %a, <2 x i64> %b) #0 { -; CHECK: uqxtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d - %vqmovn1.i.i = tail call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %b) #4 - %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqmovn1.i.i, <4 x i32> - ret <4 x i32> %shuffle.i -} - -define <8 x i16> @test_vshll_n_s8(<8 x i8> %a) #0 { -; CHECK: shll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #8 - %1 = sext <8 x i8> %a to <8 x i16> - %vshll_n = shl <8 x i16> %1, - ret <8 x i16> %vshll_n -} - -define <4 x i32> @test_vshll_n_s16(<4 x i16> %a) #0 { -; CHECK: shll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #16 - %1 = sext <4 x i16> %a to <4 x i32> - %vshll_n = shl <4 x i32> %1, - ret <4 x i32> %vshll_n -} - -define <2 x i64> @test_vshll_n_s32(<2 x i32> %a) #0 { -; CHECK: shll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #32 - %1 = sext <2 x i32> %a to <2 x i64> - %vshll_n = shl <2 x i64> %1, - ret <2 x i64> %vshll_n -} - -define <8 x i16> @test_vshll_n_u8(<8 x i8> %a) #0 { -; CHECK: shll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #8 - %1 = zext <8 x i8> %a to <8 x i16> - %vshll_n = shl <8 x i16> %1, - ret <8 x i16> %vshll_n -} - -define <4 x i32> @test_vshll_n_u16(<4 x i16> %a) #0 { -; CHECK: shll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #16 - %1 = zext <4 x i16> %a to <4 x i32> - %vshll_n = shl <4 x i32> %1, - ret <4 x i32> %vshll_n -} - -define <2 x i64> @test_vshll_n_u32(<2 x i32> %a) #0 { -; CHECK: shll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #32 - %1 = zext <2 x i32> %a to <2 x i64> - %vshll_n = shl <2 x i64> %1, - ret <2 x i64> %vshll_n -} - -define <8 x i16> @test_vshll_high_n_s8(<16 x i8> %a) #0 { -; CHECK: shll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #8 - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> - %1 = sext <8 x i8> %shuffle.i to <8 x i16> - %vshll_n = shl <8 x i16> %1, - ret <8 x i16> %vshll_n -} - -define <4 x i32> @test_vshll_high_n_s16(<8 x i16> %a) #0 { -; CHECK: shll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #16 - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %1 = sext <4 x i16> %shuffle.i to <4 x i32> - %vshll_n = shl <4 x i32> %1, - ret <4 x i32> %vshll_n -} - -define <2 x i64> @test_vshll_high_n_s32(<4 x i32> %a) #0 { -; CHECK: shll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #32 - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %1 = sext <2 x i32> %shuffle.i to <2 x i64> - %vshll_n = shl <2 x i64> %1, - ret <2 x i64> %vshll_n -} - -define <8 x i16> @test_vshll_high_n_u8(<16 x i8> %a) #0 { -; CHECK: shll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #8 - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> - %1 = zext <8 x i8> %shuffle.i to <8 x i16> - %vshll_n = shl <8 x i16> %1, - ret <8 x i16> %vshll_n -} - -define <4 x i32> @test_vshll_high_n_u16(<8 x i16> %a) #0 { -; CHECK: shll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #16 - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %1 = zext <4 x i16> %shuffle.i to <4 x i32> - %vshll_n = shl <4 x i32> %1, - ret <4 x i32> %vshll_n -} - -define <2 x i64> @test_vshll_high_n_u32(<4 x i32> %a) #0 { -; CHECK: shll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #32 - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %1 = zext <2 x i32> %shuffle.i to <2 x i64> - %vshll_n = shl <2 x i64> %1, - ret <2 x i64> %vshll_n -} - -define <4 x i16> @test_vcvt_f16_f32(<4 x float> %a) #0 { -; CHECK: fcvtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s - %vcvt1.i = tail call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %a) #4 - ret <4 x i16> %vcvt1.i -} - -define <8 x i16> @test_vcvt_high_f16_f32(<4 x i16> %a, <4 x float> %b) #0 { -; CHECK: fcvtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s - %vcvt1.i.i = tail call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %b) #4 - %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vcvt1.i.i, <8 x i32> - ret <8 x i16> %shuffle.i -} - -define <4 x float> @test_vcvt_f32_f16(<4 x i16> %a) #0 { -; CHECK: fcvtl v{{[0-9]+}}.4s, v{{[0-9]+}}.4h - %vcvt1.i = tail call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %a) #4 - ret <4 x float> %vcvt1.i -} - -define <4 x float> @test_vcvt_high_f32_f16(<8 x i16> %a) #0 { -; CHECK: fcvtl2 v{{[0-9]+}}.4s, v{{[0-9]+}}.8h - %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %vcvt1.i.i = tail call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %shuffle.i.i) #4 - ret <4 x float> %vcvt1.i.i -} - -define <2 x float> @test_vcvt_f32_f64(<2 x double> %a) #0 { -; CHECK: fcvtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vcvt.i = fptrunc <2 x double> %a to <2 x float> - ret <2 x float> %vcvt.i -} - -define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 { -; CHECK: fcvtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d - %vcvt.i.i = fptrunc <2 x double> %b to <2 x float> - %shuffle.i = shufflevector <2 x float> %a, <2 x float> %vcvt.i.i, <4 x i32> - ret <4 x float> %shuffle.i -} - -define <2 x float> @test_vcvtx_f32_f64(<2 x double> %a) #0 { -; CHECK: fcvtxn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vcvtx_f32_f641.i = call <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double> %a) #4 - ret <2 x float> %vcvtx_f32_f641.i -} - -define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 { -; CHECK: fcvtxn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d - %vcvtx_f32_f641.i.i = tail call <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double> %b) #4 - %shuffle.i = shufflevector <2 x float> %a, <2 x float> %vcvtx_f32_f641.i.i, <4 x i32> - ret <4 x float> %shuffle.i -} - -define <2 x double> @test_vcvt_f64_f32(<2 x float> %a) #0 { -; CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s - %vcvt.i = fpext <2 x float> %a to <2 x double> - ret <2 x double> %vcvt.i -} - -define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %a) #0 { -; CHECK: fcvtl2 v{{[0-9]+}}.2d, v{{[0-9]+}}.4s - %shuffle.i.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> - %vcvt.i.i = fpext <2 x float> %shuffle.i.i to <2 x double> - ret <2 x double> %vcvt.i.i -} - -define <2 x float> @test_vrndn_f32(<2 x float> %a) #0 { -; CHECK: frintn v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrndn1.i = tail call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrndn1.i -} - -define <4 x float> @test_vrndnq_f32(<4 x float> %a) #0 { -; CHECK: frintn v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrndn1.i = tail call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrndn1.i -} - -define <2 x double> @test_vrndnq_f64(<2 x double> %a) #0 { -; CHECK: frintn v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrndn1.i = tail call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrndn1.i -} - -define <2 x float> @test_vrnda_f32(<2 x float> %a) #0 { -; CHECK: frinta v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrnda1.i = tail call <2 x float> @llvm.round.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrnda1.i -} - -define <4 x float> @test_vrndaq_f32(<4 x float> %a) #0 { -; CHECK: frinta v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrnda1.i = tail call <4 x float> @llvm.round.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrnda1.i -} - -define <2 x double> @test_vrndaq_f64(<2 x double> %a) #0 { -; CHECK: frinta v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrnda1.i = tail call <2 x double> @llvm.round.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrnda1.i -} - -define <2 x float> @test_vrndp_f32(<2 x float> %a) #0 { -; CHECK: frintp v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrndp1.i = tail call <2 x float> @llvm.ceil.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrndp1.i -} - -define <4 x float> @test_vrndpq_f32(<4 x float> %a) #0 { -; CHECK: frintp v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrndp1.i = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrndp1.i -} - -define <2 x double> @test_vrndpq_f64(<2 x double> %a) #0 { -; CHECK: frintp v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrndp1.i = tail call <2 x double> @llvm.ceil.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrndp1.i -} - -define <2 x float> @test_vrndm_f32(<2 x float> %a) #0 { -; CHECK: frintm v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrndm1.i = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrndm1.i -} - -define <4 x float> @test_vrndmq_f32(<4 x float> %a) #0 { -; CHECK: frintm v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrndm1.i = tail call <4 x float> @llvm.floor.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrndm1.i -} - -define <2 x double> @test_vrndmq_f64(<2 x double> %a) #0 { -; CHECK: frintm v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrndm1.i = tail call <2 x double> @llvm.floor.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrndm1.i -} - -define <2 x float> @test_vrndx_f32(<2 x float> %a) #0 { -; CHECK: frintx v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrndx1.i = tail call <2 x float> @llvm.rint.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrndx1.i -} - -define <4 x float> @test_vrndxq_f32(<4 x float> %a) #0 { -; CHECK: frintx v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrndx1.i = tail call <4 x float> @llvm.rint.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrndx1.i -} - -define <2 x double> @test_vrndxq_f64(<2 x double> %a) #0 { -; CHECK: frintx v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrndx1.i = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrndx1.i -} - -define <2 x float> @test_vrnd_f32(<2 x float> %a) #0 { -; CHECK: frintz v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrnd1.i = tail call <2 x float> @llvm.trunc.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrnd1.i -} - -define <4 x float> @test_vrndq_f32(<4 x float> %a) #0 { -; CHECK: frintz v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrnd1.i = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrnd1.i -} - -define <2 x double> @test_vrndq_f64(<2 x double> %a) #0 { -; CHECK: frintz v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrnd1.i = tail call <2 x double> @llvm.trunc.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrnd1.i -} - -define <2 x float> @test_vrndi_f32(<2 x float> %a) #0 { -; CHECK: frinti v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrndi1.i = tail call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrndi1.i -} - -define <4 x float> @test_vrndiq_f32(<4 x float> %a) #0 { -; CHECK: frinti v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrndi1.i = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrndi1.i -} - -define <2 x double> @test_vrndiq_f64(<2 x double> %a) #0 { -; CHECK: frinti v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrndi1.i = tail call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrndi1.i -} - -define <2 x i32> @test_vcvt_s32_f32(<2 x float> %a) #0 { -; CHECK: fcvtzs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvt.i = fptosi <2 x float> %a to <2 x i32> - ret <2 x i32> %vcvt.i -} - -define <4 x i32> @test_vcvtq_s32_f32(<4 x float> %a) #0 { -; CHECK: fcvtzs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvt.i = fptosi <4 x float> %a to <4 x i32> - ret <4 x i32> %vcvt.i -} - -define <2 x i64> @test_vcvtq_s64_f64(<2 x double> %a) #0 { -; CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvt.i = fptosi <2 x double> %a to <2 x i64> - ret <2 x i64> %vcvt.i -} - -define <2 x i32> @test_vcvt_u32_f32(<2 x float> %a) #0 { -; CHECK: fcvtzu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvt.i = fptoui <2 x float> %a to <2 x i32> - ret <2 x i32> %vcvt.i -} - -define <4 x i32> @test_vcvtq_u32_f32(<4 x float> %a) #0 { -; CHECK: fcvtzu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvt.i = fptoui <4 x float> %a to <4 x i32> - ret <4 x i32> %vcvt.i -} - -define <2 x i64> @test_vcvtq_u64_f64(<2 x double> %a) #0 { -; CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvt.i = fptoui <2 x double> %a to <2 x i64> - ret <2 x i64> %vcvt.i -} - -define <2 x i64> @test_vcvt_s64_f32(<2 x float> %a) #0 { -; CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s -; CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvt.i = fptosi <2 x float> %a to <2 x i64> - ret <2 x i64> %vcvt.i -} - -define <2 x i64> @test_vcvt_u64_f32(<2 x float> %a) #0 { -; CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s -; CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvt.i = fptoui <2 x float> %a to <2 x i64> - ret <2 x i64> %vcvt.i -} - -define <4 x i16> @test_vcvt_s16_f32(<4 x float> %a) #0 { -; CHECK: fcvtzs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s -; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s - %vcvt.i = fptosi <4 x float> %a to <4 x i16> - ret <4 x i16> %vcvt.i -} - -define <4 x i16> @test_vcvt_u16_f32(<4 x float> %a) #0 { -; CHECK: fcvtzu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s -; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s - %vcvt.i = fptoui <4 x float> %a to <4 x i16> - ret <4 x i16> %vcvt.i -} - -define <2 x i32> @test_vcvt_s32_f64(<2 x double> %a) #0 { -; CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d -; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vcvt.i = fptosi <2 x double> %a to <2 x i32> - ret <2 x i32> %vcvt.i -} - -define <2 x i32> @test_vcvt_u32_f64(<2 x double> %a) #0 { -; CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d -; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vcvt.i = fptoui <2 x double> %a to <2 x i32> - ret <2 x i32> %vcvt.i -} - -define <1 x i8> @test_vcvt_s8_f64(<1 x double> %a) #0 { -; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}} -; CHECK: ins v{{[0-9]+}}.b[0], w{{[0-9]+}} - %vcvt.i = fptosi <1 x double> %a to <1 x i8> - ret <1 x i8> %vcvt.i -} - -define <1 x i8> @test_vcvt_u8_f64(<1 x double> %a) #0 { -; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}} -; CHECK: ins v{{[0-9]+}}.b[0], w{{[0-9]+}} - %vcvt.i = fptoui <1 x double> %a to <1 x i8> - ret <1 x i8> %vcvt.i -} - -define <1 x i16> @test_vcvt_s16_f64(<1 x double> %a) #0 { -; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}} -; CHECK: ins v{{[0-9]+}}.h[0], w{{[0-9]+}} - %vcvt.i = fptosi <1 x double> %a to <1 x i16> - ret <1 x i16> %vcvt.i -} - -define <1 x i16> @test_vcvt_u16_f64(<1 x double> %a) #0 { -; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}} -; CHECK: ins v{{[0-9]+}}.h[0], w{{[0-9]+}} - %vcvt.i = fptoui <1 x double> %a to <1 x i16> - ret <1 x i16> %vcvt.i -} - -define <1 x i32> @test_vcvt_s32_f64_v1(<1 x double> %a) #0 { -; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}} -; CHECK: fmov s{{[0-9]+}}, w{{[0-9]+}} - %vcvt.i = fptosi <1 x double> %a to <1 x i32> - ret <1 x i32> %vcvt.i -} - -define <1 x i32> @test_vcvt_u32_f64_v1(<1 x double> %a) #0 { -; CHECK: fcvtzu w{{[0-9]+}}, d{{[0-9]+}} -; CHECK: fmov s{{[0-9]+}}, w{{[0-9]+}} - %vcvt.i = fptoui <1 x double> %a to <1 x i32> - ret <1 x i32> %vcvt.i -} - -define <2 x i32> @test_vcvtn_s32_f32(<2 x float> %a) { -; CHECK-LABEL: test_vcvtn_s32_f32 -; CHECK: fcvtns v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtns_f321.i = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> %a) - ret <2 x i32> %vcvtns_f321.i -} - -define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) { -; CHECK-LABEL: test_vcvtnq_s32_f32 -; CHECK: fcvtns v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtns_f321.i = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> %a) - ret <4 x i32> %vcvtns_f321.i -} - -define <2 x i64> @test_vcvtnq_s64_f64(<2 x double> %a) { -; CHECK-LABEL: test_vcvtnq_s64_f64 -; CHECK: fcvtns v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtns_f641.i = call <2 x i64> @llvm.arm.neon.vcvtns.v2i64.v2f64(<2 x double> %a) - ret <2 x i64> %vcvtns_f641.i -} - -define <2 x i32> @test_vcvtn_u32_f32(<2 x float> %a) { -; CHECK-LABEL: test_vcvtn_u32_f32 -; CHECK: fcvtnu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtnu_f321.i = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> %a) - ret <2 x i32> %vcvtnu_f321.i -} - -define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) { -; CHECK-LABEL: test_vcvtnq_u32_f32 -; CHECK: fcvtnu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtnu_f321.i = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> %a) - ret <4 x i32> %vcvtnu_f321.i -} - -define <2 x i64> @test_vcvtnq_u64_f64(<2 x double> %a) { -; CHECK-LABEL: test_vcvtnq_u64_f64 -; CHECK: fcvtnu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtnu_f641.i = call <2 x i64> @llvm.arm.neon.vcvtnu.v2i64.v2f64(<2 x double> %a) - ret <2 x i64> %vcvtnu_f641.i -} - -define <2 x i32> @test_vcvtp_s32_f32(<2 x float> %a) { -; CHECK-LABEL: test_vcvtp_s32_f32 -; CHECK: fcvtps v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtps_f321.i = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> %a) - ret <2 x i32> %vcvtps_f321.i -} - -define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) { -; CHECK-LABEL: test_vcvtpq_s32_f32 -; CHECK: fcvtps v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtps_f321.i = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> %a) - ret <4 x i32> %vcvtps_f321.i -} - -define <2 x i64> @test_vcvtpq_s64_f64(<2 x double> %a) { -; CHECK-LABEL: test_vcvtpq_s64_f64 -; CHECK: fcvtps v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtps_f641.i = call <2 x i64> @llvm.arm.neon.vcvtps.v2i64.v2f64(<2 x double> %a) - ret <2 x i64> %vcvtps_f641.i -} - -define <2 x i32> @test_vcvtp_u32_f32(<2 x float> %a) { -; CHECK-LABEL: test_vcvtp_u32_f32 -; CHECK: fcvtpu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtpu_f321.i = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> %a) - ret <2 x i32> %vcvtpu_f321.i -} - -define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) { -; CHECK-LABEL: test_vcvtpq_u32_f32 -; CHECK: fcvtpu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtpu_f321.i = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> %a) - ret <4 x i32> %vcvtpu_f321.i -} - -define <2 x i64> @test_vcvtpq_u64_f64(<2 x double> %a) { -; CHECK-LABEL: test_vcvtpq_u64_f64 -; CHECK: fcvtpu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtpu_f641.i = call <2 x i64> @llvm.arm.neon.vcvtpu.v2i64.v2f64(<2 x double> %a) - ret <2 x i64> %vcvtpu_f641.i -} - -define <2 x i32> @test_vcvtm_s32_f32(<2 x float> %a) { -; CHECK-LABEL: test_vcvtm_s32_f32 -; CHECK: fcvtms v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtms_f321.i = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> %a) - ret <2 x i32> %vcvtms_f321.i -} - -define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) { -; CHECK-LABEL: test_vcvtmq_s32_f32 -; CHECK: fcvtms v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtms_f321.i = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> %a) - ret <4 x i32> %vcvtms_f321.i -} - -define <2 x i64> @test_vcvtmq_s64_f64(<2 x double> %a) { -; CHECK-LABEL: test_vcvtmq_s64_f64 -; CHECK: fcvtms v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtms_f641.i = call <2 x i64> @llvm.arm.neon.vcvtms.v2i64.v2f64(<2 x double> %a) - ret <2 x i64> %vcvtms_f641.i -} - -define <2 x i32> @test_vcvtm_u32_f32(<2 x float> %a) { -; CHECK-LABEL: test_vcvtm_u32_f32 -; CHECK: fcvtmu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtmu_f321.i = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> %a) - ret <2 x i32> %vcvtmu_f321.i -} - -define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) { -; CHECK-LABEL: test_vcvtmq_u32_f32 -; CHECK: fcvtmu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtmu_f321.i = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> %a) - ret <4 x i32> %vcvtmu_f321.i -} - -define <2 x i64> @test_vcvtmq_u64_f64(<2 x double> %a) { -; CHECK-LABEL: test_vcvtmq_u64_f64 -; CHECK: fcvtmu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtmu_f641.i = call <2 x i64> @llvm.arm.neon.vcvtmu.v2i64.v2f64(<2 x double> %a) - ret <2 x i64> %vcvtmu_f641.i -} - -define <2 x i32> @test_vcvta_s32_f32(<2 x float> %a) { -; CHECK-LABEL: test_vcvta_s32_f32 -; CHECK: fcvtas v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtas_f321.i = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> %a) - ret <2 x i32> %vcvtas_f321.i -} - -define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) { -; CHECK-LABEL: test_vcvtaq_s32_f32 -; CHECK: fcvtas v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtas_f321.i = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> %a) - ret <4 x i32> %vcvtas_f321.i -} - -define <2 x i64> @test_vcvtaq_s64_f64(<2 x double> %a) { -; CHECK-LABEL: test_vcvtaq_s64_f64 -; CHECK: fcvtas v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtas_f641.i = call <2 x i64> @llvm.arm.neon.vcvtas.v2i64.v2f64(<2 x double> %a) - ret <2 x i64> %vcvtas_f641.i -} - -define <2 x i32> @test_vcvta_u32_f32(<2 x float> %a) { -; CHECK-LABEL: test_vcvta_u32_f32 -; CHECK: fcvtau v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtau_f321.i = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> %a) - ret <2 x i32> %vcvtau_f321.i -} - -define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) { -; CHECK-LABEL: test_vcvtaq_u32_f32 -; CHECK: fcvtau v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtau_f321.i = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> %a) - ret <4 x i32> %vcvtau_f321.i -} - -define <2 x i64> @test_vcvtaq_u64_f64(<2 x double> %a) { -; CHECK-LABEL: test_vcvtaq_u64_f64 -; CHECK: fcvtau v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtau_f641.i = call <2 x i64> @llvm.arm.neon.vcvtau.v2i64.v2f64(<2 x double> %a) - ret <2 x i64> %vcvtau_f641.i -} - -define <2 x float> @test_vrsqrte_f32(<2 x float> %a) #0 { -; CHECK: frsqrte v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrsqrte1.i = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrsqrte1.i -} - -define <4 x float> @test_vrsqrteq_f32(<4 x float> %a) #0 { -; CHECK: frsqrte v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrsqrte1.i = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrsqrte1.i -} - -define <2 x double> @test_vrsqrteq_f64(<2 x double> %a) #0 { -; CHECK: frsqrte v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrsqrte1.i = tail call <2 x double> @llvm.arm.neon.vrsqrte.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrsqrte1.i -} - -define <2 x float> @test_vrecpe_f32(<2 x float> %a) #0 { -; CHECK: frecpe v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrecpe1.i = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrecpe1.i -} - -define <4 x float> @test_vrecpeq_f32(<4 x float> %a) #0 { -; CHECK: frecpe v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrecpe1.i = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrecpe1.i -} - -define <2 x double> @test_vrecpeq_f64(<2 x double> %a) #0 { -; CHECK: frecpe v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrecpe1.i = tail call <2 x double> @llvm.arm.neon.vrecpe.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrecpe1.i -} - -define <2 x i32> @test_vrecpe_u32(<2 x i32> %a) #0 { -; CHECK: urecpe v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrecpe1.i = tail call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %a) #4 - ret <2 x i32> %vrecpe1.i -} - -define <4 x i32> @test_vrecpeq_u32(<4 x i32> %a) #0 { -; CHECK: urecpe v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrecpe1.i = tail call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %a) #4 - ret <4 x i32> %vrecpe1.i -} - -define <2 x float> @test_vsqrt_f32(<2 x float> %a) #0 { -; CHECK: fsqrt v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vsqrt1.i = tail call <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) #4 - ret <2 x float> %vsqrt1.i -} - -define <4 x float> @test_vsqrtq_f32(<4 x float> %a) #0 { -; CHECK: fsqrt v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vsqrt1.i = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) #4 - ret <4 x float> %vsqrt1.i -} - -define <2 x double> @test_vsqrtq_f64(<2 x double> %a) #0 { -; CHECK: fsqrt v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vsqrt1.i = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) #4 - ret <2 x double> %vsqrt1.i -} - -define <2 x float> @test_vcvt_f32_s32(<2 x i32> %a) #0 { -; CHECK: scvtf v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvt.i = sitofp <2 x i32> %a to <2 x float> - ret <2 x float> %vcvt.i -} - -define <2 x float> @test_vcvt_f32_u32(<2 x i32> %a) #0 { -; CHECK: ucvtf v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvt.i = uitofp <2 x i32> %a to <2 x float> - ret <2 x float> %vcvt.i -} - -define <4 x float> @test_vcvtq_f32_s32(<4 x i32> %a) #0 { -; CHECK: scvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvt.i = sitofp <4 x i32> %a to <4 x float> - ret <4 x float> %vcvt.i -} - -define <4 x float> @test_vcvtq_f32_u32(<4 x i32> %a) #0 { -; CHECK: ucvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvt.i = uitofp <4 x i32> %a to <4 x float> - ret <4 x float> %vcvt.i -} - -define <2 x double> @test_vcvtq_f64_s64(<2 x i64> %a) #0 { -; CHECK: scvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvt.i = sitofp <2 x i64> %a to <2 x double> - ret <2 x double> %vcvt.i -} - -define <2 x double> @test_vcvtq_f64_u64(<2 x i64> %a) #0 { -; CHECK: ucvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvt.i = uitofp <2 x i64> %a to <2 x double> - ret <2 x double> %vcvt.i -} - -define <2 x float> @test_vcvt_f32_s64(<2 x i64> %a) #0 { -; CHECK: scvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d -; CHECK: fcvtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vcvt.i = sitofp <2 x i64> %a to <2 x float> - ret <2 x float> %vcvt.i -} - -define <2 x float> @test_vcvt_f32_u64(<2 x i64> %a) #0 { -; CHECK: ucvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d -; CHECK: fcvtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vcvt.i = uitofp <2 x i64> %a to <2 x float> - ret <2 x float> %vcvt.i -} - -define <4 x float> @test_vcvt_f32_s16(<4 x i16> %a) #0 { -; CHECK: sshll v{{[0-9]+}}.4s, v{{[0-9]+}}.4h, #0 -; CHECK: scvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvt.i = sitofp <4 x i16> %a to <4 x float> - ret <4 x float> %vcvt.i -} - -define <4 x float> @test_vcvt_f32_u16(<4 x i16> %a) #0 { -; CHECK: ushll v{{[0-9]+}}.4s, v{{[0-9]+}}.4h, #0 -; CHECK: ucvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvt.i = uitofp <4 x i16> %a to <4 x float> - ret <4 x float> %vcvt.i -} - -define <2 x double> @test_vcvt_f64_s32(<2 x i32> %a) #0 { -; CHECK: sshll v{{[0-9]+}}.2d, v{{[0-9]+}}.2s, #0 -; CHECK: scvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvt.i = sitofp <2 x i32> %a to <2 x double> - ret <2 x double> %vcvt.i -} - -define <2 x double> @test_vcvt_f64_u32(<2 x i32> %a) #0 { -; CHECK: ushll v{{[0-9]+}}.2d, v{{[0-9]+}}.2s, #0 -; CHECK: ucvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvt.i = uitofp <2 x i32> %a to <2 x double> - ret <2 x double> %vcvt.i -} - -define <1 x double> @test_vcvt_f64_s8(<1 x i8> %a) #0 { -; CHECK: umov w{{[0-9]+}}, v{{[0-9]+}}.b[0] -; CHECK: sxtb w{{[0-9]+}}, w{{[0-9]+}} -; CHECK: scvtf d{{[0-9]+}}, w{{[0-9]+}} - %vcvt.i = sitofp <1 x i8> %a to <1 x double> - ret <1 x double> %vcvt.i -} - -define <1 x double> @test_vcvt_f64_u8(<1 x i8> %a) #0 { -; CHECK: umov w{{[0-9]+}}, v{{[0-9]+}}.b[0] -; CHECK: and w{{[0-9]+}}, w{{[0-9]+}}, #0xff -; CHECK: ucvtf d{{[0-9]+}}, w{{[0-9]+}} - %vcvt.i = uitofp <1 x i8> %a to <1 x double> - ret <1 x double> %vcvt.i -} - -define <1 x double> @test_vcvt_f64_s16(<1 x i16> %a) #0 { -; CHECK: umov w{{[0-9]+}}, v{{[0-9]+}}.h[0] -; CHECK: sxth w{{[0-9]+}}, w{{[0-9]+}} -; CHECK: scvtf d{{[0-9]+}}, w{{[0-9]+}} - %vcvt.i = sitofp <1 x i16> %a to <1 x double> - ret <1 x double> %vcvt.i -} - -define <1 x double> @test_vcvt_f64_u16(<1 x i16> %a) #0 { -; CHECK: umov w{{[0-9]+}}, v{{[0-9]+}}.h[0] -; CHECK: and w{{[0-9]+}}, w{{[0-9]+}}, #0xffff -; CHECK: ucvtf d{{[0-9]+}}, w{{[0-9]+}} - %vcvt.i = uitofp <1 x i16> %a to <1 x double> - ret <1 x double> %vcvt.i -} - -define <1 x double> @test_vcvt_f64_s32_v1(<1 x i32> %a) #0 { -; CHECK: fmov w{{[0-9]+}}, s{{[0-9]+}} -; CHECK: scvtf d{{[0-9]+}}, w{{[0-9]+}} - %vcvt.i = sitofp <1 x i32> %a to <1 x double> - ret <1 x double> %vcvt.i -} - -define <1 x double> @test_vcvt_f64_u32_v1(<1 x i32> %a) #0 { -; CHECK: fmov w{{[0-9]+}}, s{{[0-9]+}} -; CHECK: ucvtf d{{[0-9]+}}, w{{[0-9]+}} - %vcvt.i = uitofp <1 x i32> %a to <1 x double> - ret <1 x double> %vcvt.i -} - -declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #2 - -declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #2 - -declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #2 - -declare <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32>) #2 - -declare <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32>) #2 - -declare <2 x double> @llvm.arm.neon.vrecpe.v2f64(<2 x double>) #2 - -declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) #2 - -declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) #2 - -declare <2 x double> @llvm.arm.neon.vrsqrte.v2f64(<2 x double>) #2 - -declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) #2 - -declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) #2 - -declare <2 x i64> @llvm.arm.neon.vcvtau.v2i64.v2f64(<2 x double>) - -declare <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float>) - -declare <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float>) - -declare <2 x i64> @llvm.arm.neon.vcvtas.v2i64.v2f64(<2 x double>) - -declare <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float>) - -declare <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float>) - -declare <2 x i64> @llvm.arm.neon.vcvtmu.v2i64.v2f64(<2 x double>) - -declare <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float>) - -declare <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float>) - -declare <2 x i64> @llvm.arm.neon.vcvtms.v2i64.v2f64(<2 x double>) - -declare <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float>) - -declare <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float>) - -declare <2 x i64> @llvm.arm.neon.vcvtpu.v2i64.v2f64(<2 x double>) - -declare <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float>) - -declare <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float>) - -declare <2 x i64> @llvm.arm.neon.vcvtps.v2i64.v2f64(<2 x double>) - -declare <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float>) - -declare <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float>) - -declare <2 x i64> @llvm.arm.neon.vcvtnu.v2i64.v2f64(<2 x double>) - -declare <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float>) - -declare <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float>) - -declare <2 x i64> @llvm.arm.neon.vcvtns.v2i64.v2f64(<2 x double>) - -declare <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float>) - -declare <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float>) - -declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #3 - -declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) #3 - -declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) #3 - -declare <2 x double> @llvm.trunc.v2f64(<2 x double>) #3 - -declare <4 x float> @llvm.trunc.v4f32(<4 x float>) #3 - -declare <2 x float> @llvm.trunc.v2f32(<2 x float>) #3 - -declare <2 x double> @llvm.rint.v2f64(<2 x double>) #3 - -declare <4 x float> @llvm.rint.v4f32(<4 x float>) #3 - -declare <2 x float> @llvm.rint.v2f32(<2 x float>) #3 - -declare <2 x double> @llvm.floor.v2f64(<2 x double>) #3 - -declare <4 x float> @llvm.floor.v4f32(<4 x float>) #3 - -declare <2 x float> @llvm.floor.v2f32(<2 x float>) #3 - -declare <2 x double> @llvm.ceil.v2f64(<2 x double>) #3 - -declare <4 x float> @llvm.ceil.v4f32(<4 x float>) #3 - -declare <2 x float> @llvm.ceil.v2f32(<2 x float>) #3 - -declare <2 x double> @llvm.round.v2f64(<2 x double>) #3 - -declare <4 x float> @llvm.round.v4f32(<4 x float>) #3 - -declare <2 x float> @llvm.round.v2f32(<2 x float>) #3 - -declare <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double>) #2 - -declare <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float>) #2 - -declare <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float>) #2 - -declare <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double>) #2 - -declare <2 x float> @llvm.aarch64.neon.fcvtn.v2f32.v2f64(<2 x double>) #2 - -declare <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64>) #2 - -declare <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32>) #2 - -declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) #2 - -declare <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64>) #2 - -declare <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32>) #2 - -declare <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16>) #2 - -declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) #2 - -declare <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32>) #2 - -declare <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16>) #2 - -declare <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8>) #2 - -declare <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8>) #2 - -declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) #2 - -declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>) #2 - -declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) #2 - -declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) #2 - -declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) #2 - -declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) #2 - -declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) #2 - -declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) #2 - -declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) #2 - -declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) #2 - -declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) #2 - -declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) #2 - -declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) #2 - -declare <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8>) #2 - -declare <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64>, <2 x i64>) #2 - -declare <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32>, <4 x i32>) #2 - -declare <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32>, <2 x i32>) #2 - -declare <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16>, <8 x i16>) #2 - -declare <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16>, <4 x i16>) #2 - -declare <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8>, <16 x i8>) #2 - -declare <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8>, <8 x i8>) #2 - -declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #3 - -declare <4 x float> @llvm.fabs.v4f32(<4 x float>) #3 - -declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #3 - -declare <2 x i64> @llvm.arm.neon.vabs.v2i64(<2 x i64>) #2 - -declare <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32>) #2 - -declare <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32>) #2 - -declare <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16>) #2 - -declare <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16>) #2 - -declare <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8>) #2 - -declare <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8>) #2 - -declare <2 x i64> @llvm.arm.neon.vqneg.v2i64(<2 x i64>) #2 - -declare <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32>) #2 - -declare <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32>) #2 - -declare <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16>) #2 - -declare <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16>) #2 - -declare <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8>) #2 - -declare <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8>) #2 - -declare <2 x i64> @llvm.arm.neon.vqabs.v2i64(<2 x i64>) #2 - -declare <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32>) #2 - -declare <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32>) #2 - -declare <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16>) #2 - -declare <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16>) #2 - -declare <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8>) #2 - -declare <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8>) #2 - -declare <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64>, <4 x i32>) #2 - -declare <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32>, <8 x i16>) #2 - -declare <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16>, <16 x i8>) #2 - -declare <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64>, <4 x i32>) #2 - -declare <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32>, <8 x i16>) #2 - -declare <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16>, <16 x i8>) #2 - -declare <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64>, <2 x i32>) #2 - -declare <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32>, <4 x i16>) #2 - -declare <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16>, <8 x i8>) #2 - -declare <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64>, <2 x i32>) #2 - -declare <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32>, <4 x i16>) #2 - -declare <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16>, <8 x i8>) #2 - -declare <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32>) #2 - -declare <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16>) #2 - -declare <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8>) #2 - -declare <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32>) #2 - -declare <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16>) #2 - -declare <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8>) #2 - -declare <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32>) #2 - -declare <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16>) #2 - -declare <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8>) #2 - -declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) #2 - -declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) #2 - -declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) #2 - -declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) #2 - -declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) #2 - - -define <1 x i64> @test_vcvt_s64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvt_s64_f64 -; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}} - %1 = fptosi <1 x double> %a to <1 x i64> - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvt_u64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvt_u64_f64 -; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}} - %1 = fptoui <1 x double> %a to <1 x i64> - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvtn_s64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvtn_s64_f64 -; CHECK: fcvtns d{{[0-9]+}}, d{{[0-9]+}} - %1 = call <1 x i64> @llvm.arm.neon.vcvtns.v1i64.v1f64(<1 x double> %a) - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvtn_u64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvtn_u64_f64 -; CHECK: fcvtnu d{{[0-9]+}}, d{{[0-9]+}} - %1 = call <1 x i64> @llvm.arm.neon.vcvtnu.v1i64.v1f64(<1 x double> %a) - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvtp_s64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvtp_s64_f64 -; CHECK: fcvtps d{{[0-9]+}}, d{{[0-9]+}} - %1 = call <1 x i64> @llvm.arm.neon.vcvtps.v1i64.v1f64(<1 x double> %a) - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvtp_u64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvtp_u64_f64 -; CHECK: fcvtpu d{{[0-9]+}}, d{{[0-9]+}} - %1 = call <1 x i64> @llvm.arm.neon.vcvtpu.v1i64.v1f64(<1 x double> %a) - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvtm_s64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvtm_s64_f64 -; CHECK: fcvtms d{{[0-9]+}}, d{{[0-9]+}} - %1 = call <1 x i64> @llvm.arm.neon.vcvtms.v1i64.v1f64(<1 x double> %a) - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvtm_u64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvtm_u64_f64 -; CHECK: fcvtmu d{{[0-9]+}}, d{{[0-9]+}} - %1 = call <1 x i64> @llvm.arm.neon.vcvtmu.v1i64.v1f64(<1 x double> %a) - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvta_s64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvta_s64_f64 -; CHECK: fcvtas d{{[0-9]+}}, d{{[0-9]+}} - %1 = call <1 x i64> @llvm.arm.neon.vcvtas.v1i64.v1f64(<1 x double> %a) - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvta_u64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvta_u64_f64 -; CHECK: fcvtau d{{[0-9]+}}, d{{[0-9]+}} - %1 = call <1 x i64> @llvm.arm.neon.vcvtau.v1i64.v1f64(<1 x double> %a) - ret <1 x i64> %1 -} - -define <1 x double> @test_vcvt_f64_s64(<1 x i64> %a) { -; CHECK-LABEL: test_vcvt_f64_s64 -; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}} - %1 = sitofp <1 x i64> %a to <1 x double> - ret <1 x double> %1 -} - -define <1 x double> @test_vcvt_f64_u64(<1 x i64> %a) { -; CHECK-LABEL: test_vcvt_f64_u64 -; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}} - %1 = uitofp <1 x i64> %a to <1 x double> - ret <1 x double> %1 -} - -declare <1 x i64> @llvm.arm.neon.vcvtau.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.arm.neon.vcvtas.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.arm.neon.vcvtmu.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.arm.neon.vcvtms.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.arm.neon.vcvtpu.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.arm.neon.vcvtps.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.arm.neon.vcvtnu.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.arm.neon.vcvtns.v1i64.v1f64(<1 x double>) - -define <1 x double> @test_vrndn_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrndn_f64 -; CHECK: frintn d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vrnda_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrnda_f64 -; CHECK: frinta d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.round.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vrndp_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrndp_f64 -; CHECK: frintp d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.ceil.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vrndm_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrndm_f64 -; CHECK: frintm d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.floor.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vrndx_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrndx_f64 -; CHECK: frintx d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.rint.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vrnd_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrnd_f64 -; CHECK: frintz d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.trunc.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vrndi_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrndi_f64 -; CHECK: frinti d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -declare <1 x double> @llvm.nearbyint.v1f64(<1 x double>) -declare <1 x double> @llvm.trunc.v1f64(<1 x double>) -declare <1 x double> @llvm.rint.v1f64(<1 x double>) -declare <1 x double> @llvm.floor.v1f64(<1 x double>) -declare <1 x double> @llvm.ceil.v1f64(<1 x double>) -declare <1 x double> @llvm.round.v1f64(<1 x double>) -declare <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double>) - -define <1 x double> @test_vrsqrte_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrsqrte_f64 -; CHECK: frsqrte d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vrecpe_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrecpe_f64 -; CHECK: frecpe d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vsqrt_f64(<1 x double> %a) { -; CHECK-LABEL: test_vsqrt_f64 -; CHECK: fsqrt d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vrecps_f64(<1 x double> %a, <1 x double> %b) { -; CHECK-LABEL: test_vrecps_f64 -; CHECK: frecps d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double> %a, <1 x double> %b) - ret <1 x double> %1 -} - -define <1 x double> @test_vrsqrts_f64(<1 x double> %a, <1 x double> %b) { -; CHECK-LABEL: test_vrsqrts_f64 -; CHECK: frsqrts d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double> %a, <1 x double> %b) - ret <1 x double> %1 -} - -declare <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double>, <1 x double>) -declare <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double>, <1 x double>) -declare <1 x double> @llvm.sqrt.v1f64(<1 x double>) -declare <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double>) -declare <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double>) - -define i64 @test_vaddlv_s32(<2 x i32> %a) { -; CHECK-LABEL: test_vaddlv_s32 -; CHECK: saddlp {{v[0-9]+}}.1d, {{v[0-9]+}}.2s - %1 = tail call <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v2i32(<2 x i32> %a) - %2 = extractelement <1 x i64> %1, i32 0 - ret i64 %2 -} - -define i64 @test_vaddlv_u32(<2 x i32> %a) { -; CHECK-LABEL: test_vaddlv_u32 -; CHECK: uaddlp {{v[0-9]+}}.1d, {{v[0-9]+}}.2s - %1 = tail call <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v2i32(<2 x i32> %a) - %2 = extractelement <1 x i64> %1, i32 0 - ret i64 %2 -} - -declare <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v2i32(<2 x i32>) -declare <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v2i32(<2 x i32>) diff --git a/test/CodeGen/AArch64/neon-mla-mls.ll b/test/CodeGen/AArch64/neon-mla-mls.ll index 37daadef0b28..e7bff748ad37 100644 --- a/test/CodeGen/AArch64/neon-mla-mls.ll +++ b/test/CodeGen/AArch64/neon-mla-mls.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s diff --git a/test/CodeGen/AArch64/neon-mov.ll b/test/CodeGen/AArch64/neon-mov.ll index 7eadde481613..b7baf25f807a 100644 --- a/test/CodeGen/AArch64/neon-mov.ll +++ b/test/CodeGen/AArch64/neon-mov.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 define <8 x i8> @movi8b() { @@ -15,21 +14,18 @@ define <16 x i8> @movi16b() { define <2 x i32> @movi2s_lsl0() { ; CHECK-LABEL: movi2s_lsl0: -; CHECK-AARCH64: movi {{v[0-9]+}}.2s, #0xff ; CHECK-ARM64: movi {{d[0-9]+}}, #0x0000ff000000ff ret <2 x i32> < i32 255, i32 255 > } define <2 x i32> @movi2s_lsl8() { ; CHECK-LABEL: movi2s_lsl8: -; CHECK-AARCH64: movi {{v[0-9]+}}.2s, #0xff, lsl #8 ; CHECK-ARM64: movi {{d[0-9]+}}, #0x00ff000000ff00 ret <2 x i32> < i32 65280, i32 65280 > } define <2 x i32> @movi2s_lsl16() { ; CHECK-LABEL: movi2s_lsl16: -; CHECK-AARCH64: movi {{v[0-9]+}}.2s, #0xff, lsl #16 ; CHECK-ARM64: movi {{d[0-9]+}}, #0xff000000ff0000 ret <2 x i32> < i32 16711680, i32 16711680 > @@ -37,28 +33,24 @@ define <2 x i32> @movi2s_lsl16() { define <2 x i32> @movi2s_lsl24() { ; CHECK-LABEL: movi2s_lsl24: -; CHECK-AARCH64: movi {{v[0-9]+}}.2s, #0xff, lsl #24 ; CHECK-ARM64: movi {{d[0-9]+}}, #0xff000000ff000000 ret <2 x i32> < i32 4278190080, i32 4278190080 > } define <4 x i32> @movi4s_lsl0() { ; CHECK-LABEL: movi4s_lsl0: -; CHECK-AARCH64: movi {{v[0-9]+}}.4s, #0xff ; CHECK-ARM64: movi {{v[0-9]+}}.2d, #0x0000ff000000ff ret <4 x i32> < i32 255, i32 255, i32 255, i32 255 > } define <4 x i32> @movi4s_lsl8() { ; CHECK-LABEL: movi4s_lsl8: -; CHECK-AARCH64: movi {{v[0-9]+}}.4s, #0xff, lsl #8 ; CHECK-ARM64: movi {{v[0-9]+}}.2d, #0x00ff000000ff00 ret <4 x i32> < i32 65280, i32 65280, i32 65280, i32 65280 > } define <4 x i32> @movi4s_lsl16() { ; CHECK-LABEL: movi4s_lsl16: -; CHECK-AARCH64: movi {{v[0-9]+}}.4s, #0xff, lsl #16 ; CHECK-ARM64: movi {{v[0-9]+}}.2d, #0xff000000ff0000 ret <4 x i32> < i32 16711680, i32 16711680, i32 16711680, i32 16711680 > @@ -66,35 +58,30 @@ define <4 x i32> @movi4s_lsl16() { define <4 x i32> @movi4s_lsl24() { ; CHECK-LABEL: movi4s_lsl24: -; CHECK-AARCH64: movi {{v[0-9]+}}.4s, #0xff, lsl #24 ; CHECK-ARM64: movi {{v[0-9]+}}.2d, #0xff000000ff000000 ret <4 x i32> < i32 4278190080, i32 4278190080, i32 4278190080, i32 4278190080 > } define <4 x i16> @movi4h_lsl0() { ; CHECK-LABEL: movi4h_lsl0: -; CHECK-AARCH64: movi {{v[0-9]+}}.4h, #0xff ; CHECK-ARM64: movi {{d[0-9]+}}, #0xff00ff00ff00ff ret <4 x i16> < i16 255, i16 255, i16 255, i16 255 > } define <4 x i16> @movi4h_lsl8() { ; CHECK-LABEL: movi4h_lsl8: -; CHECK-AARCH64: movi {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8 ; CHECK-ARM64: movi d0, #0xff00ff00ff00ff00 ret <4 x i16> < i16 65280, i16 65280, i16 65280, i16 65280 > } define <8 x i16> @movi8h_lsl0() { ; CHECK-LABEL: movi8h_lsl0: -; CHECK-AARCH64: movi {{v[0-9]+}}.8h, #{{0xff|255}} ; CHECK-ARM64: movi v0.2d, #0xff00ff00ff00ff ret <8 x i16> < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255 > } define <8 x i16> @movi8h_lsl8() { ; CHECK-LABEL: movi8h_lsl8: -; CHECK-AARCH64: movi {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8 ; CHECK-ARM64: movi v0.2d, #0xff00ff00ff00ff00 ret <8 x i16> < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 > } @@ -177,14 +164,12 @@ define <8 x i16> @mvni8h_lsl8() { define <2 x i32> @movi2s_msl8(<2 x i32> %a) { ; CHECK-LABEL: movi2s_msl8: -; CHECK-AARCH64: movi {{v[0-9]+}}.2s, #0xff, msl #8 ; CHECK-ARM64: movi {{d[0-9]+}}, #0x00ffff0000ffff ret <2 x i32> < i32 65535, i32 65535 > } define <2 x i32> @movi2s_msl16() { ; CHECK-LABEL: movi2s_msl16: -; CHECK-AARCH64: movi {{v[0-9]+}}.2s, #0xff, msl #16 ; CHECK-ARM64: movi d0, #0xffffff00ffffff ret <2 x i32> < i32 16777215, i32 16777215 > } @@ -192,14 +177,12 @@ define <2 x i32> @movi2s_msl16() { define <4 x i32> @movi4s_msl8() { ; CHECK-LABEL: movi4s_msl8: -; CHECK-AARCH64: movi {{v[0-9]+}}.4s, #0xff, msl #8 ; CHECK-ARM64: movi v0.2d, #0x00ffff0000ffff ret <4 x i32> < i32 65535, i32 65535, i32 65535, i32 65535 > } define <4 x i32> @movi4s_msl16() { ; CHECK-LABEL: movi4s_msl16: -; CHECK-AARCH64: movi {{v[0-9]+}}.4s, #0xff, msl #16 ; CHECK-ARM64: movi v0.2d, #0xffffff00ffffff ret <4 x i32> < i32 16777215, i32 16777215, i32 16777215, i32 16777215 > } diff --git a/test/CodeGen/AArch64/neon-mul-div.ll b/test/CodeGen/AArch64/neon-mul-div.ll deleted file mode 100644 index 869bd445c718..000000000000 --- a/test/CodeGen/AArch64/neon-mul-div.ll +++ /dev/null @@ -1,754 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has its own copy of this because of the intrinsics - -define <8 x i8> @mul8xi8(<8 x i8> %A, <8 x i8> %B) { -;CHECK: mul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b - %tmp3 = mul <8 x i8> %A, %B; - ret <8 x i8> %tmp3 -} - -define <16 x i8> @mul16xi8(<16 x i8> %A, <16 x i8> %B) { -;CHECK: mul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b - %tmp3 = mul <16 x i8> %A, %B; - ret <16 x i8> %tmp3 -} - -define <4 x i16> @mul4xi16(<4 x i16> %A, <4 x i16> %B) { -;CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h - %tmp3 = mul <4 x i16> %A, %B; - ret <4 x i16> %tmp3 -} - -define <8 x i16> @mul8xi16(<8 x i16> %A, <8 x i16> %B) { -;CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h - %tmp3 = mul <8 x i16> %A, %B; - ret <8 x i16> %tmp3 -} - -define <2 x i32> @mul2xi32(<2 x i32> %A, <2 x i32> %B) { -;CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %tmp3 = mul <2 x i32> %A, %B; - ret <2 x i32> %tmp3 -} - -define <4 x i32> @mul4x32(<4 x i32> %A, <4 x i32> %B) { -;CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s - %tmp3 = mul <4 x i32> %A, %B; - ret <4 x i32> %tmp3 -} - -define <1 x i64> @mul1xi64(<1 x i64> %A, <1 x i64> %B) { -;CHECK-LABEL: mul1xi64: -;CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} - %tmp3 = mul <1 x i64> %A, %B; - ret <1 x i64> %tmp3 -} - -define <2 x i64> @mul2xi64(<2 x i64> %A, <2 x i64> %B) { -;CHECK-LABEL: mul2xi64: -;CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} -;CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} - %tmp3 = mul <2 x i64> %A, %B; - ret <2 x i64> %tmp3 -} - - define <2 x float> @mul2xfloat(<2 x float> %A, <2 x float> %B) { -;CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %tmp3 = fmul <2 x float> %A, %B; - ret <2 x float> %tmp3 -} - -define <4 x float> @mul4xfloat(<4 x float> %A, <4 x float> %B) { -;CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s - %tmp3 = fmul <4 x float> %A, %B; - ret <4 x float> %tmp3 -} -define <2 x double> @mul2xdouble(<2 x double> %A, <2 x double> %B) { -;CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - %tmp3 = fmul <2 x double> %A, %B; - ret <2 x double> %tmp3 -} - - - define <2 x float> @div2xfloat(<2 x float> %A, <2 x float> %B) { -;CHECK: fdiv {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %tmp3 = fdiv <2 x float> %A, %B; - ret <2 x float> %tmp3 -} - -define <4 x float> @div4xfloat(<4 x float> %A, <4 x float> %B) { -;CHECK: fdiv {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s - %tmp3 = fdiv <4 x float> %A, %B; - ret <4 x float> %tmp3 -} -define <2 x double> @div2xdouble(<2 x double> %A, <2 x double> %B) { -;CHECK: fdiv {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - %tmp3 = fdiv <2 x double> %A, %B; - ret <2 x double> %tmp3 -} - -define <1 x i8> @sdiv1x8(<1 x i8> %A, <1 x i8> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = sdiv <1 x i8> %A, %B; - ret <1 x i8> %tmp3 -} - -define <8 x i8> @sdiv8x8(<8 x i8> %A, <8 x i8> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = sdiv <8 x i8> %A, %B; - ret <8 x i8> %tmp3 -} - -define <16 x i8> @sdiv16x8(<16 x i8> %A, <16 x i8> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = sdiv <16 x i8> %A, %B; - ret <16 x i8> %tmp3 -} - -define <1 x i16> @sdiv1x16(<1 x i16> %A, <1 x i16> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = sdiv <1 x i16> %A, %B; - ret <1 x i16> %tmp3 -} - -define <4 x i16> @sdiv4x16(<4 x i16> %A, <4 x i16> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = sdiv <4 x i16> %A, %B; - ret <4 x i16> %tmp3 -} - -define <8 x i16> @sdiv8x16(<8 x i16> %A, <8 x i16> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = sdiv <8 x i16> %A, %B; - ret <8 x i16> %tmp3 -} - -define <1 x i32> @sdiv1x32(<1 x i32> %A, <1 x i32> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = sdiv <1 x i32> %A, %B; - ret <1 x i32> %tmp3 -} - -define <2 x i32> @sdiv2x32(<2 x i32> %A, <2 x i32> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = sdiv <2 x i32> %A, %B; - ret <2 x i32> %tmp3 -} - -define <4 x i32> @sdiv4x32(<4 x i32> %A, <4 x i32> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = sdiv <4 x i32> %A, %B; - ret <4 x i32> %tmp3 -} - -define <1 x i64> @sdiv1x64(<1 x i64> %A, <1 x i64> %B) { -;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} - %tmp3 = sdiv <1 x i64> %A, %B; - ret <1 x i64> %tmp3 -} - -define <2 x i64> @sdiv2x64(<2 x i64> %A, <2 x i64> %B) { -;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} -;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} - %tmp3 = sdiv <2 x i64> %A, %B; - ret <2 x i64> %tmp3 -} - -define <1 x i8> @udiv1x8(<1 x i8> %A, <1 x i8> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = udiv <1 x i8> %A, %B; - ret <1 x i8> %tmp3 -} - -define <8 x i8> @udiv8x8(<8 x i8> %A, <8 x i8> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = udiv <8 x i8> %A, %B; - ret <8 x i8> %tmp3 -} - -define <16 x i8> @udiv16x8(<16 x i8> %A, <16 x i8> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = udiv <16 x i8> %A, %B; - ret <16 x i8> %tmp3 -} - -define <1 x i16> @udiv1x16(<1 x i16> %A, <1 x i16> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = udiv <1 x i16> %A, %B; - ret <1 x i16> %tmp3 -} - -define <4 x i16> @udiv4x16(<4 x i16> %A, <4 x i16> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = udiv <4 x i16> %A, %B; - ret <4 x i16> %tmp3 -} - -define <8 x i16> @udiv8x16(<8 x i16> %A, <8 x i16> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = udiv <8 x i16> %A, %B; - ret <8 x i16> %tmp3 -} - -define <1 x i32> @udiv1x32(<1 x i32> %A, <1 x i32> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = udiv <1 x i32> %A, %B; - ret <1 x i32> %tmp3 -} - -define <2 x i32> @udiv2x32(<2 x i32> %A, <2 x i32> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = udiv <2 x i32> %A, %B; - ret <2 x i32> %tmp3 -} - -define <4 x i32> @udiv4x32(<4 x i32> %A, <4 x i32> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = udiv <4 x i32> %A, %B; - ret <4 x i32> %tmp3 -} - -define <1 x i64> @udiv1x64(<1 x i64> %A, <1 x i64> %B) { -;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} - %tmp3 = udiv <1 x i64> %A, %B; - ret <1 x i64> %tmp3 -} - -define <2 x i64> @udiv2x64(<2 x i64> %A, <2 x i64> %B) { -;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} -;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} - %tmp3 = udiv <2 x i64> %A, %B; - ret <2 x i64> %tmp3 -} - -define <1 x i8> @srem1x8(<1 x i8> %A, <1 x i8> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = srem <1 x i8> %A, %B; - ret <1 x i8> %tmp3 -} - -define <8 x i8> @srem8x8(<8 x i8> %A, <8 x i8> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = srem <8 x i8> %A, %B; - ret <8 x i8> %tmp3 -} - -define <16 x i8> @srem16x8(<16 x i8> %A, <16 x i8> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = srem <16 x i8> %A, %B; - ret <16 x i8> %tmp3 -} - -define <1 x i16> @srem1x16(<1 x i16> %A, <1 x i16> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = srem <1 x i16> %A, %B; - ret <1 x i16> %tmp3 -} - -define <4 x i16> @srem4x16(<4 x i16> %A, <4 x i16> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = srem <4 x i16> %A, %B; - ret <4 x i16> %tmp3 -} - -define <8 x i16> @srem8x16(<8 x i16> %A, <8 x i16> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = srem <8 x i16> %A, %B; - ret <8 x i16> %tmp3 -} - -define <1 x i32> @srem1x32(<1 x i32> %A, <1 x i32> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = srem <1 x i32> %A, %B; - ret <1 x i32> %tmp3 -} - -define <2 x i32> @srem2x32(<2 x i32> %A, <2 x i32> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = srem <2 x i32> %A, %B; - ret <2 x i32> %tmp3 -} - -define <4 x i32> @srem4x32(<4 x i32> %A, <4 x i32> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = srem <4 x i32> %A, %B; - ret <4 x i32> %tmp3 -} - -define <1 x i64> @srem1x64(<1 x i64> %A, <1 x i64> %B) { -;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} -;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} - %tmp3 = srem <1 x i64> %A, %B; - ret <1 x i64> %tmp3 -} - -define <2 x i64> @srem2x64(<2 x i64> %A, <2 x i64> %B) { -;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} -;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} -;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} -;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} - %tmp3 = srem <2 x i64> %A, %B; - ret <2 x i64> %tmp3 -} - -define <1 x i8> @urem1x8(<1 x i8> %A, <1 x i8> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = urem <1 x i8> %A, %B; - ret <1 x i8> %tmp3 -} - -define <8 x i8> @urem8x8(<8 x i8> %A, <8 x i8> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = urem <8 x i8> %A, %B; - ret <8 x i8> %tmp3 -} - -define <16 x i8> @urem16x8(<16 x i8> %A, <16 x i8> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = urem <16 x i8> %A, %B; - ret <16 x i8> %tmp3 -} - -define <1 x i16> @urem1x16(<1 x i16> %A, <1 x i16> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = urem <1 x i16> %A, %B; - ret <1 x i16> %tmp3 -} - -define <4 x i16> @urem4x16(<4 x i16> %A, <4 x i16> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = urem <4 x i16> %A, %B; - ret <4 x i16> %tmp3 -} - -define <8 x i16> @urem8x16(<8 x i16> %A, <8 x i16> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = urem <8 x i16> %A, %B; - ret <8 x i16> %tmp3 -} - -define <1 x i32> @urem1x32(<1 x i32> %A, <1 x i32> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = urem <1 x i32> %A, %B; - ret <1 x i32> %tmp3 -} - -define <2 x i32> @urem2x32(<2 x i32> %A, <2 x i32> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = urem <2 x i32> %A, %B; - ret <2 x i32> %tmp3 -} - -define <4 x i32> @urem4x32(<4 x i32> %A, <4 x i32> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = urem <4 x i32> %A, %B; - ret <4 x i32> %tmp3 -} - -define <1 x i64> @urem1x64(<1 x i64> %A, <1 x i64> %B) { -;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} -;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} - %tmp3 = urem <1 x i64> %A, %B; - ret <1 x i64> %tmp3 -} - -define <2 x i64> @urem2x64(<2 x i64> %A, <2 x i64> %B) { -;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} -;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} -;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} -;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} - %tmp3 = urem <2 x i64> %A, %B; - ret <2 x i64> %tmp3 -} - -define <2 x float> @frem2f32(<2 x float> %A, <2 x float> %B) { -; CHECK: bl fmodf -; CHECK: bl fmodf - %tmp3 = frem <2 x float> %A, %B; - ret <2 x float> %tmp3 -} - -define <4 x float> @frem4f32(<4 x float> %A, <4 x float> %B) { -; CHECK: bl fmodf -; CHECK: bl fmodf -; CHECK: bl fmodf -; CHECK: bl fmodf - %tmp3 = frem <4 x float> %A, %B; - ret <4 x float> %tmp3 -} - -define <1 x double> @frem1d64(<1 x double> %A, <1 x double> %B) { -; CHECK: bl fmod - %tmp3 = frem <1 x double> %A, %B; - ret <1 x double> %tmp3 -} - -define <2 x double> @frem2d64(<2 x double> %A, <2 x double> %B) { -; CHECK: bl fmod -; CHECK: bl fmod - %tmp3 = frem <2 x double> %A, %B; - ret <2 x double> %tmp3 -} - -declare <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8>, <8 x i8>) -declare <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8>, <16 x i8>) - -define <8 x i8> @poly_mulv8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: poly_mulv8i8: - %prod = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: pmul v0.8b, v0.8b, v1.8b - ret <8 x i8> %prod -} - -define <16 x i8> @poly_mulv16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: poly_mulv16i8: - %prod = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: pmul v0.16b, v0.16b, v1.16b - ret <16 x i8> %prod -} - -declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>) -declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>) -declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>) -declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i16> @test_sqdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_sqdmulh_v4i16: - %prod = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: sqdmulh v0.4h, v0.4h, v1.4h - ret <4 x i16> %prod -} - -define <8 x i16> @test_sqdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_sqdmulh_v8i16: - %prod = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: sqdmulh v0.8h, v0.8h, v1.8h - ret <8 x i16> %prod -} - -define <2 x i32> @test_sqdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_sqdmulh_v2i32: - %prod = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: sqdmulh v0.2s, v0.2s, v1.2s - ret <2 x i32> %prod -} - -define <4 x i32> @test_sqdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_sqdmulh_v4i32: - %prod = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: sqdmulh v0.4s, v0.4s, v1.4s - ret <4 x i32> %prod -} - -declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>) -declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>) -declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) -declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i16> @test_sqrdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_sqrdmulh_v4i16: - %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: sqrdmulh v0.4h, v0.4h, v1.4h - ret <4 x i16> %prod -} - -define <8 x i16> @test_sqrdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_sqrdmulh_v8i16: - %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: sqrdmulh v0.8h, v0.8h, v1.8h - ret <8 x i16> %prod -} - -define <2 x i32> @test_sqrdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_sqrdmulh_v2i32: - %prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: sqrdmulh v0.2s, v0.2s, v1.2s - ret <2 x i32> %prod -} - -define <4 x i32> @test_sqrdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_sqrdmulh_v4i32: - %prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: sqrdmulh v0.4s, v0.4s, v1.4s - ret <4 x i32> %prod -} - -declare <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double>, <2 x double>) - -define <2 x float> @fmulx_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; Using registers other than v0, v1 and v2 are possible, but would be odd. -; CHECK: fmulx v0.2s, v0.2s, v1.2s - %val = call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %lhs, <2 x float> %rhs) - ret <2 x float> %val -} - -define <4 x float> @fmulx_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; Using registers other than v0, v1 and v2 are possible, but would be odd. -; CHECK: fmulx v0.4s, v0.4s, v1.4s - %val = call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %lhs, <4 x float> %rhs) - ret <4 x float> %val -} - -define <2 x double> @fmulx_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; Using registers other than v0, v1 and v2 are possible, but would be odd. -; CHECK: fmulx v0.2d, v0.2d, v1.2d - %val = call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %lhs, <2 x double> %rhs) - ret <2 x double> %val -} - -define <1 x i8> @test_mul_v1i8(<1 x i8> %a, <1 x i8> %b) { -;CHECK-LABEL: test_mul_v1i8: -;CHECK: mul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b - %c = mul <1 x i8> %a, %b - ret <1 x i8> %c -} - -define <1 x i16> @test_mul_v1i16(<1 x i16> %a, <1 x i16> %b) { -;CHECK-LABEL: test_mul_v1i16: -;CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h - %c = mul <1 x i16> %a, %b - ret <1 x i16> %c -} - -define <1 x i32> @test_mul_v1i32(<1 x i32> %a, <1 x i32> %b) { -;CHECK-LABEL: test_mul_v1i32: -;CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %c = mul <1 x i32> %a, %b - ret <1 x i32> %c -} diff --git a/test/CodeGen/AArch64/neon-or-combine.ll b/test/CodeGen/AArch64/neon-or-combine.ll index e8da72f42cd5..d98c12802a0c 100644 --- a/test/CodeGen/AArch64/neon-or-combine.ll +++ b/test/CodeGen/AArch64/neon-or-combine.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s ; Check that the DAGCombiner does not crash with an assertion failure diff --git a/test/CodeGen/AArch64/neon-perm.ll b/test/CodeGen/AArch64/neon-perm.ll index 99507cecf1c8..d45dde649e47 100644 --- a/test/CodeGen/AArch64/neon-perm.ll +++ b/test/CodeGen/AArch64/neon-perm.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 %struct.int8x8x2_t = type { [2 x <8 x i8>] } @@ -54,7 +53,6 @@ entry: define <2 x i32> @test_vuzp1_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vuzp1_s32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> @@ -71,7 +69,6 @@ entry: define <2 x i64> @test_vuzp1q_s64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vuzp1q_s64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> @@ -112,7 +109,6 @@ entry: define <2 x i32> @test_vuzp1_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vuzp1_u32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> @@ -129,7 +125,6 @@ entry: define <2 x i64> @test_vuzp1q_u64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vuzp1q_u64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> @@ -138,7 +133,6 @@ entry: define <2 x float> @test_vuzp1_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vuzp1_f32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> @@ -155,7 +149,6 @@ entry: define <2 x double> @test_vuzp1q_f64(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: test_vuzp1q_f64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> @@ -228,7 +221,6 @@ entry: define <2 x i32> @test_vuzp2_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vuzp2_s32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> @@ -245,8 +237,6 @@ entry: define <2 x i64> @test_vuzp2q_s64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vuzp2q_s64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] -; CHECK-AARCH64-NEXT: mov {{v[0-9]+}}.16b, {{v[0-9]+}}.16b ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> @@ -287,7 +277,6 @@ entry: define <2 x i32> @test_vuzp2_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vuzp2_u32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> @@ -304,8 +293,6 @@ entry: define <2 x i64> @test_vuzp2q_u64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vuzp2q_u64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] -; CHECK-AARCH64-NEXT: mov {{v[0-9]+}}.16b, {{v[0-9]+}}.16b ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> @@ -314,7 +301,6 @@ entry: define <2 x float> @test_vuzp2_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vuzp2_f32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> @@ -331,8 +317,6 @@ entry: define <2 x double> @test_vuzp2q_f64(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: test_vuzp2q_f64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] -; CHECK-AARCH64-NEXT: mov {{v[0-9]+}}.16b, {{v[0-9]+}}.16b ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> @@ -405,7 +389,6 @@ entry: define <2 x i32> @test_vzip1_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vzip1_s32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> @@ -422,7 +405,6 @@ entry: define <2 x i64> @test_vzip1q_s64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vzip1q_s64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> @@ -463,7 +445,6 @@ entry: define <2 x i32> @test_vzip1_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vzip1_u32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> @@ -480,7 +461,6 @@ entry: define <2 x i64> @test_vzip1q_u64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vzip1q_u64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> @@ -489,7 +469,6 @@ entry: define <2 x float> @test_vzip1_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vzip1_f32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> @@ -506,7 +485,6 @@ entry: define <2 x double> @test_vzip1q_f64(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: test_vzip1q_f64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> @@ -579,7 +557,6 @@ entry: define <2 x i32> @test_vzip2_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vzip2_s32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> @@ -596,7 +573,6 @@ entry: define <2 x i64> @test_vzip2q_s64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vzip2q_s64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> @@ -637,7 +613,6 @@ entry: define <2 x i32> @test_vzip2_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vzip2_u32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> @@ -654,7 +629,6 @@ entry: define <2 x i64> @test_vzip2q_u64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vzip2q_u64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> @@ -663,7 +637,6 @@ entry: define <2 x float> @test_vzip2_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vzip2_f32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> @@ -680,7 +653,6 @@ entry: define <2 x double> @test_vzip2q_f64(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: test_vzip2q_f64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> @@ -753,7 +725,6 @@ entry: define <2 x i32> @test_vtrn1_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vtrn1_s32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> @@ -770,7 +741,6 @@ entry: define <2 x i64> @test_vtrn1q_s64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vtrn1q_s64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> @@ -811,7 +781,6 @@ entry: define <2 x i32> @test_vtrn1_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vtrn1_u32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> @@ -828,7 +797,6 @@ entry: define <2 x i64> @test_vtrn1q_u64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vtrn1q_u64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> @@ -837,7 +805,6 @@ entry: define <2 x float> @test_vtrn1_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vtrn1_f32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> @@ -854,7 +821,6 @@ entry: define <2 x double> @test_vtrn1q_f64(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: test_vtrn1q_f64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> @@ -927,7 +893,6 @@ entry: define <2 x i32> @test_vtrn2_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vtrn2_s32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> @@ -944,7 +909,6 @@ entry: define <2 x i64> @test_vtrn2q_s64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vtrn2q_s64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> @@ -985,7 +949,6 @@ entry: define <2 x i32> @test_vtrn2_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vtrn2_u32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> @@ -1002,7 +965,6 @@ entry: define <2 x i64> @test_vtrn2q_u64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vtrn2q_u64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> @@ -1011,7 +973,6 @@ entry: define <2 x float> @test_vtrn2_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vtrn2_f32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> @@ -1028,7 +989,6 @@ entry: define <2 x double> @test_vtrn2q_f64(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: test_vtrn2q_f64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> @@ -2534,8 +2494,6 @@ entry: define %struct.int32x2x2_t @test_vuzp_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vuzp_s32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: @@ -2572,8 +2530,6 @@ entry: define %struct.uint32x2x2_t @test_vuzp_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vuzp_u32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: @@ -2586,8 +2542,6 @@ entry: define %struct.float32x2x2_t @test_vuzp_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vuzp_f32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: @@ -2756,8 +2710,6 @@ entry: define %struct.int32x2x2_t @test_vzip_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vzip_s32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: @@ -2794,8 +2746,6 @@ entry: define %struct.uint32x2x2_t @test_vzip_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vzip_u32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: @@ -2808,8 +2758,6 @@ entry: define %struct.float32x2x2_t @test_vzip_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vzip_f32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: @@ -2978,8 +2926,6 @@ entry: define %struct.int32x2x2_t @test_vtrn_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vtrn_s32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: @@ -3016,8 +2962,6 @@ entry: define %struct.uint32x2x2_t @test_vtrn_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vtrn_u32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: @@ -3030,8 +2974,6 @@ entry: define %struct.float32x2x2_t @test_vtrn_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vtrn_f32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: @@ -3183,7 +3125,4 @@ define %struct.uint8x8x2_t @test_uzp(<16 x i8> %y) { %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1 ret %struct.uint8x8x2_t %.fca.0.1.insert -; CHECK-AARCH64: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] -; CHECK-AARCH64-NEXT: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -; CHECK-AARCH64-NEXT: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b } diff --git a/test/CodeGen/AArch64/neon-rounding-halving-add.ll b/test/CodeGen/AArch64/neon-rounding-halving-add.ll deleted file mode 100644 index 5c99ba1e4d4f..000000000000 --- a/test/CodeGen/AArch64/neon-rounding-halving-add.ll +++ /dev/null @@ -1,106 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; Just intrinsic calls: arm64 has similar in vhadd.ll - -declare <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_urhadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_urhadd_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: urhadd v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_srhadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_srhadd_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: srhadd v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_urhadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_urhadd_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: urhadd v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_srhadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_srhadd_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: srhadd v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_urhadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_urhadd_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: urhadd v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_srhadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_srhadd_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: srhadd v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -declare <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_urhadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_urhadd_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: urhadd v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_srhadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_srhadd_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: srhadd v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -declare <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_urhadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_urhadd_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: urhadd v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_srhadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_srhadd_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: srhadd v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_urhadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_urhadd_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: urhadd v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_srhadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_srhadd_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: srhadd v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - - diff --git a/test/CodeGen/AArch64/neon-rounding-shift.ll b/test/CodeGen/AArch64/neon-rounding-shift.ll deleted file mode 100644 index 692df988cfbb..000000000000 --- a/test/CodeGen/AArch64/neon-rounding-shift.ll +++ /dev/null @@ -1,122 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; Just intrinsic calls: arm64 has similar in vshift.ll - -declare <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_urshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_urshl_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: urshl v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_srshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_srshl_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: srshl v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_urshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_urshl_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: urshl v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_srshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_srshl_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: srshl v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_urshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_urshl_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: urshl v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_srshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_srshl_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: srshl v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_urshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_urshl_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: urshl v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_srshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_srshl_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: srshl v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -declare <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_urshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_urshl_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: urshl v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_srshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_srshl_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: srshl v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_urshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_urshl_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: urshl v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_srshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_srshl_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: srshl v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>) -declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>) - -define <2 x i64> @test_urshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_urshl_v2i64: - %tmp1 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: urshl v0.2d, v0.2d, v1.2d - ret <2 x i64> %tmp1 -} - -define <2 x i64> @test_srshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_srshl_v2i64: - %tmp1 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: srshl v0.2d, v0.2d, v1.2d - ret <2 x i64> %tmp1 -} - diff --git a/test/CodeGen/AArch64/neon-saturating-add-sub.ll b/test/CodeGen/AArch64/neon-saturating-add-sub.ll deleted file mode 100644 index 996835bfc5ac..000000000000 --- a/test/CodeGen/AArch64/neon-saturating-add-sub.ll +++ /dev/null @@ -1,241 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; Just intrinsic calls: arm64 has similar in vqadd.ll -declare <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_uqadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_uqadd_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: uqadd v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_sqadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_sqadd_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: sqadd v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_uqadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_uqadd_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: uqadd v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_sqadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_sqadd_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: sqadd v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_uqadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_uqadd_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: uqadd v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_sqadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_sqadd_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: sqadd v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -declare <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_uqadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_uqadd_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: uqadd v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_sqadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_sqadd_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: sqadd v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -declare <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_uqadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_uqadd_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: uqadd v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_sqadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_sqadd_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: sqadd v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_uqadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_uqadd_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: uqadd v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_sqadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_sqadd_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: sqadd v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - - - -declare <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64>, <2 x i64>) -declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) - -define <2 x i64> @test_uqadd_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_uqadd_v2i64: - %tmp1 = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: uqadd v0.2d, v0.2d, v1.2d - ret <2 x i64> %tmp1 -} - -define <2 x i64> @test_sqadd_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_sqadd_v2i64: - %tmp1 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: sqadd v0.2d, v0.2d, v1.2d - ret <2 x i64> %tmp1 -} - -declare <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_uqsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_uqsub_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: uqsub v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_sqsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_sqsub_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: sqsub v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_uqsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_uqsub_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: uqsub v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_sqsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_sqsub_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: sqsub v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_uqsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_uqsub_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: uqsub v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_sqsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_sqsub_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: sqsub v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_uqsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_uqsub_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: uqsub v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_sqsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_sqsub_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: sqsub v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -declare <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_uqsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_uqsub_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: uqsub v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_sqsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_sqsub_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: sqsub v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_uqsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_uqsub_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: uqsub v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_sqsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_sqsub_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: sqsub v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -declare <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64>, <2 x i64>) -declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>) - -define <2 x i64> @test_uqsub_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_uqsub_v2i64: - %tmp1 = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: uqsub v0.2d, v0.2d, v1.2d - ret <2 x i64> %tmp1 -} - -define <2 x i64> @test_sqsub_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_sqsub_v2i64: - %tmp1 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: sqsub v0.2d, v0.2d, v1.2d - ret <2 x i64> %tmp1 -} diff --git a/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll b/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll deleted file mode 100644 index a59eebd55d38..000000000000 --- a/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll +++ /dev/null @@ -1,122 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; Just intrinsic calls: arm64 has similar in vshift.ll - -declare <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_uqrshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_uqrshl_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: uqrshl v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_sqrshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_sqrshl_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: sqrshl v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_uqrshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_uqrshl_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: uqrshl v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_sqrshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_sqrshl_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: sqrshl v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_uqrshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_uqrshl_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: uqrshl v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_sqrshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_sqrshl_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: sqrshl v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -declare <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_uqrshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_uqrshl_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: uqrshl v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_sqrshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_sqrshl_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: sqrshl v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -declare <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_uqrshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_uqrshl_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: uqrshl v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_sqrshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_sqrshl_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: sqrshl v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_uqrshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_uqrshl_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: uqrshl v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_sqrshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_sqrshl_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: sqrshl v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>) -declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>) - -define <2 x i64> @test_uqrshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_uqrshl_v2i64: - %tmp1 = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: uqrshl v0.2d, v0.2d, v1.2d - ret <2 x i64> %tmp1 -} - -define <2 x i64> @test_sqrshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_sqrshl_v2i64: - %tmp1 = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: sqrshl v0.2d, v0.2d, v1.2d - ret <2 x i64> %tmp1 -} - diff --git a/test/CodeGen/AArch64/neon-saturating-shift.ll b/test/CodeGen/AArch64/neon-saturating-shift.ll deleted file mode 100644 index 035740cba5d3..000000000000 --- a/test/CodeGen/AArch64/neon-saturating-shift.ll +++ /dev/null @@ -1,122 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; Just intrinsic calls: arm64 has similar in vshift.ll - -declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_uqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_uqshl_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: uqshl v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_sqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_sqshl_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: sqshl v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_uqshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_uqshl_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: uqshl v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_sqshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_sqshl_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: sqshl v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_uqshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_uqshl_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: uqshl v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_sqshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_sqshl_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: sqshl v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_uqshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_uqshl_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: uqshl v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_sqshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_sqshl_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: sqshl v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_uqshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_uqshl_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: uqshl v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_sqshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_sqshl_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: sqshl v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_uqshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_uqshl_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: uqshl v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_sqshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_sqshl_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: sqshl v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>) -declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>) - -define <2 x i64> @test_uqshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_uqshl_v2i64: - %tmp1 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: uqshl v0.2d, v0.2d, v1.2d - ret <2 x i64> %tmp1 -} - -define <2 x i64> @test_sqshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_sqshl_v2i64: - %tmp1 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: sqshl v0.2d, v0.2d, v1.2d - ret <2 x i64> %tmp1 -} - diff --git a/test/CodeGen/AArch64/neon-scalar-abs.ll b/test/CodeGen/AArch64/neon-scalar-abs.ll deleted file mode 100644 index bb351ab86fcd..000000000000 --- a/test/CodeGen/AArch64/neon-scalar-abs.ll +++ /dev/null @@ -1,62 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has tests for i64 versions, uses different approach for others. - -define i64 @test_vabsd_s64(i64 %a) { -; CHECK: test_vabsd_s64 -; CHECK: abs {{d[0-9]+}}, {{d[0-9]+}} -entry: - %vabs.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vabs1.i = tail call <1 x i64> @llvm.aarch64.neon.vabs(<1 x i64> %vabs.i) - %0 = extractelement <1 x i64> %vabs1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vabs(<1 x i64>) - -define i8 @test_vqabsb_s8(i8 %a) { -; CHECK: test_vqabsb_s8 -; CHECK: sqabs {{b[0-9]+}}, {{b[0-9]+}} -entry: - %vqabs.i = insertelement <1 x i8> undef, i8 %a, i32 0 - %vqabs1.i = call <1 x i8> @llvm.arm.neon.vqabs.v1i8(<1 x i8> %vqabs.i) - %0 = extractelement <1 x i8> %vqabs1.i, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.arm.neon.vqabs.v1i8(<1 x i8>) - -define i16 @test_vqabsh_s16(i16 %a) { -; CHECK: test_vqabsh_s16 -; CHECK: sqabs {{h[0-9]+}}, {{h[0-9]+}} -entry: - %vqabs.i = insertelement <1 x i16> undef, i16 %a, i32 0 - %vqabs1.i = call <1 x i16> @llvm.arm.neon.vqabs.v1i16(<1 x i16> %vqabs.i) - %0 = extractelement <1 x i16> %vqabs1.i, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.arm.neon.vqabs.v1i16(<1 x i16>) - -define i32 @test_vqabss_s32(i32 %a) { -; CHECK: test_vqabss_s32 -; CHECK: sqabs {{s[0-9]+}}, {{s[0-9]+}} -entry: - %vqabs.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %vqabs1.i = call <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32> %vqabs.i) - %0 = extractelement <1 x i32> %vqabs1.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32>) - -define i64 @test_vqabsd_s64(i64 %a) { -; CHECK: test_vqabsd_s64 -; CHECK: sqabs {{d[0-9]+}}, {{d[0-9]+}} -entry: - %vqabs.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vqabs1.i = call <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64> %vqabs.i) - %0 = extractelement <1 x i64> %vqabs1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64>) diff --git a/test/CodeGen/AArch64/neon-scalar-add-sub.ll b/test/CodeGen/AArch64/neon-scalar-add-sub.ll deleted file mode 100644 index 7e262cb8bdb6..000000000000 --- a/test/CodeGen/AArch64/neon-scalar-add-sub.ll +++ /dev/null @@ -1,51 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has a copy of the key parts in AdvSIMD-Scalar.ll - -define <1 x i64> @add1xi64(<1 x i64> %A, <1 x i64> %B) { -;CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - %tmp3 = add <1 x i64> %A, %B; - ret <1 x i64> %tmp3 -} - -define <1 x i64> @sub1xi64(<1 x i64> %A, <1 x i64> %B) { -;CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - %tmp3 = sub <1 x i64> %A, %B; - ret <1 x i64> %tmp3 -} - -declare <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_add_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_add_v1i64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_uadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_uadd_v1i64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -declare <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_sub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sub_v1i64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_usub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_usub_v1i64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - - - diff --git a/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll b/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll index f5636db5e142..6cfdc5be1314 100644 --- a/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll +++ b/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s declare float @llvm.fma.f32(float, float, float) diff --git a/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll b/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll deleted file mode 100644 index ff2941325208..000000000000 --- a/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll +++ /dev/null @@ -1,124 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s -; arm64 has separate copy due to intrinsics (aarch64-neon-scalar-by-elem-mul.ll) -define float @test_fmul_lane_ss2S(float %a, <2 x float> %v) { - ; CHECK: test_fmul_lane_ss2S - ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] - %tmp1 = extractelement <2 x float> %v, i32 1 - %tmp2 = fmul float %a, %tmp1; - ret float %tmp2; -} - -define float @test_fmul_lane_ss2S_swap(float %a, <2 x float> %v) { - ; CHECK: test_fmul_lane_ss2S_swap - ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] - %tmp1 = extractelement <2 x float> %v, i32 1 - %tmp2 = fmul float %tmp1, %a; - ret float %tmp2; -} - - -define float @test_fmul_lane_ss4S(float %a, <4 x float> %v) { - ; CHECK: test_fmul_lane_ss4S - ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] - %tmp1 = extractelement <4 x float> %v, i32 3 - %tmp2 = fmul float %a, %tmp1; - ret float %tmp2; -} - -define float @test_fmul_lane_ss4S_swap(float %a, <4 x float> %v) { - ; CHECK: test_fmul_lane_ss4S_swap - ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] - %tmp1 = extractelement <4 x float> %v, i32 3 - %tmp2 = fmul float %tmp1, %a; - ret float %tmp2; -} - - -define double @test_fmul_lane_ddD(double %a, <1 x double> %v) { - ; CHECK: test_fmul_lane_ddD - ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] - %tmp1 = extractelement <1 x double> %v, i32 0 - %tmp2 = fmul double %a, %tmp1; - ret double %tmp2; -} - - - -define double @test_fmul_lane_dd2D(double %a, <2 x double> %v) { - ; CHECK: test_fmul_lane_dd2D - ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] - %tmp1 = extractelement <2 x double> %v, i32 1 - %tmp2 = fmul double %a, %tmp1; - ret double %tmp2; -} - - -define double @test_fmul_lane_dd2D_swap(double %a, <2 x double> %v) { - ; CHECK: test_fmul_lane_dd2D_swap - ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] - %tmp1 = extractelement <2 x double> %v, i32 1 - %tmp2 = fmul double %tmp1, %a; - ret double %tmp2; -} - -declare float @llvm.aarch64.neon.vmulx.f32(float, float) - -define float @test_fmulx_lane_f32(float %a, <2 x float> %v) { - ; CHECK: test_fmulx_lane_f32 - ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] - %tmp1 = extractelement <2 x float> %v, i32 1 - %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %a, float %tmp1) - ret float %tmp2; -} - -define float @test_fmulx_laneq_f32(float %a, <4 x float> %v) { - ; CHECK: test_fmulx_laneq_f32 - ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] - %tmp1 = extractelement <4 x float> %v, i32 3 - %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %a, float %tmp1) - ret float %tmp2; -} - -define float @test_fmulx_laneq_f32_swap(float %a, <4 x float> %v) { - ; CHECK: test_fmulx_laneq_f32_swap - ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] - %tmp1 = extractelement <4 x float> %v, i32 3 - %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %tmp1, float %a) - ret float %tmp2; -} - -declare double @llvm.aarch64.neon.vmulx.f64(double, double) - -define double @test_fmulx_lane_f64(double %a, <1 x double> %v) { - ; CHECK: test_fmulx_lane_f64 - ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] - %tmp1 = extractelement <1 x double> %v, i32 0 - %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1) - ret double %tmp2; -} - -define double @test_fmulx_laneq_f64_0(double %a, <2 x double> %v) { - ; CHECK: test_fmulx_laneq_f64_0 - ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] - %tmp1 = extractelement <2 x double> %v, i32 0 - %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1) - ret double %tmp2; -} - - -define double @test_fmulx_laneq_f64_1(double %a, <2 x double> %v) { - ; CHECK: test_fmulx_laneq_f64_1 - ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] - %tmp1 = extractelement <2 x double> %v, i32 1 - %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1) - ret double %tmp2; -} - -define double @test_fmulx_laneq_f64_1_swap(double %a, <2 x double> %v) { - ; CHECK: test_fmulx_laneq_f64_1_swap - ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] - %tmp1 = extractelement <2 x double> %v, i32 1 - %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %tmp1, double %a) - ret double %tmp2; -} - diff --git a/test/CodeGen/AArch64/neon-scalar-compare.ll b/test/CodeGen/AArch64/neon-scalar-compare.ll deleted file mode 100644 index 2ecde91d7e1b..000000000000 --- a/test/CodeGen/AArch64/neon-scalar-compare.ll +++ /dev/null @@ -1,344 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; arm64 has (the non-trivial parts of) this test covered by vcmp.ll - -;; Scalar Integer Compare - -define i64 @test_vceqd(i64 %a, i64 %b) { -; CHECK: test_vceqd -; CHECK: cmeq {{d[0-9]+}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %vceq.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vceq1.i = insertelement <1 x i64> undef, i64 %b, i32 0 - %vceq2.i = call <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1i64.v1i64(<1 x i64> %vceq.i, <1 x i64> %vceq1.i) - %0 = extractelement <1 x i64> %vceq2.i, i32 0 - ret i64 %0 -} - -define i64 @test_vceqzd(i64 %a) { -; CHECK: test_vceqzd -; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0 -entry: - %vceqz.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vceqz1.i = call <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1i64.v1i64(<1 x i64> %vceqz.i, <1 x i64> zeroinitializer) - %0 = extractelement <1 x i64> %vceqz1.i, i32 0 - ret i64 %0 -} - -define i64 @test_vcged(i64 %a, i64 %b) { -; CHECK: test_vcged -; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %vcge.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vcge1.i = insertelement <1 x i64> undef, i64 %b, i32 0 - %vcge2.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1i64.v1i64(<1 x i64> %vcge.i, <1 x i64> %vcge1.i) - %0 = extractelement <1 x i64> %vcge2.i, i32 0 - ret i64 %0 -} - -define i64 @test_vcgezd(i64 %a) { -; CHECK: test_vcgezd -; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, #0x0 -entry: - %vcgez.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vcgez1.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1i64.v1i64(<1 x i64> %vcgez.i, <1 x i64> zeroinitializer) - %0 = extractelement <1 x i64> %vcgez1.i, i32 0 - ret i64 %0 -} - -define i64 @test_vcgtd(i64 %a, i64 %b) { -; CHECK: test_vcgtd -; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %vcgt.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vcgt1.i = insertelement <1 x i64> undef, i64 %b, i32 0 - %vcgt2.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1i64.v1i64(<1 x i64> %vcgt.i, <1 x i64> %vcgt1.i) - %0 = extractelement <1 x i64> %vcgt2.i, i32 0 - ret i64 %0 -} - -define i64 @test_vcgtzd(i64 %a) { -; CHECK: test_vcgtzd -; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, #0x0 -entry: - %vcgtz.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vcgtz1.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1i64.v1i64(<1 x i64> %vcgtz.i, <1 x i64> zeroinitializer) - %0 = extractelement <1 x i64> %vcgtz1.i, i32 0 - ret i64 %0 -} - -define i64 @test_vcled(i64 %a, i64 %b) { -; CHECK: test_vcled -; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %vcgt.i = insertelement <1 x i64> undef, i64 %b, i32 0 - %vcgt1.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vcgt2.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1i64.v1i64(<1 x i64> %vcgt.i, <1 x i64> %vcgt1.i) - %0 = extractelement <1 x i64> %vcgt2.i, i32 0 - ret i64 %0 -} - -define i64 @test_vclezd(i64 %a) { -; CHECK: test_vclezd -; CHECK: cmle {{d[0-9]}}, {{d[0-9]}}, #0x0 -entry: - %vclez.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vclez1.i = call <1 x i64> @llvm.aarch64.neon.vclez.v1i64.v1i64.v1i64(<1 x i64> %vclez.i, <1 x i64> zeroinitializer) - %0 = extractelement <1 x i64> %vclez1.i, i32 0 - ret i64 %0 -} - -define i64 @test_vcltd(i64 %a, i64 %b) { -; CHECK: test_vcltd -; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %vcge.i = insertelement <1 x i64> undef, i64 %b, i32 0 - %vcge1.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vcge2.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1i64.v1i64(<1 x i64> %vcge.i, <1 x i64> %vcge1.i) - %0 = extractelement <1 x i64> %vcge2.i, i32 0 - ret i64 %0 -} - -define i64 @test_vcltzd(i64 %a) { -; CHECK: test_vcltzd -; CHECK: cmlt {{d[0-9]}}, {{d[0-9]}}, #0x0 -entry: - %vcltz.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vcltz1.i = call <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1i64.v1i64(<1 x i64> %vcltz.i, <1 x i64> zeroinitializer) - %0 = extractelement <1 x i64> %vcltz1.i, i32 0 - ret i64 %0 -} - -define i64 @test_vtstd(i64 %a, i64 %b) { -; CHECK: test_vtstd -; CHECK: cmtst {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %vtst.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vtst1.i = insertelement <1 x i64> undef, i64 %b, i32 0 - %vtst2.i = call <1 x i64> @llvm.aarch64.neon.vtstd.v1i64.v1i64.v1i64(<1 x i64> %vtst.i, <1 x i64> %vtst1.i) - %0 = extractelement <1 x i64> %vtst2.i, i32 0 - ret i64 %0 -} - - -define <1 x i64> @test_vcage_f64(<1 x double> %a, <1 x double> %b) #0 { -; CHECK: test_vcage_f64 -; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %vcage2.i = tail call <1 x i64> @llvm.arm.neon.vacge.v1i64.v1f64(<1 x double> %a, <1 x double> %b) #2 - ret <1 x i64> %vcage2.i -} - -define <1 x i64> @test_vcagt_f64(<1 x double> %a, <1 x double> %b) #0 { -; CHECK: test_vcagt_f64 -; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %vcagt2.i = tail call <1 x i64> @llvm.arm.neon.vacgt.v1i64.v1f64(<1 x double> %a, <1 x double> %b) #2 - ret <1 x i64> %vcagt2.i -} - -define <1 x i64> @test_vcale_f64(<1 x double> %a, <1 x double> %b) #0 { -; CHECK: test_vcale_f64 -; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %vcage2.i = tail call <1 x i64> @llvm.arm.neon.vacge.v1i64.v1f64(<1 x double> %b, <1 x double> %a) #2 - ret <1 x i64> %vcage2.i -} - -define <1 x i64> @test_vcalt_f64(<1 x double> %a, <1 x double> %b) #0 { -; CHECK: test_vcalt_f64 -; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %vcagt2.i = tail call <1 x i64> @llvm.arm.neon.vacgt.v1i64.v1f64(<1 x double> %b, <1 x double> %a) #2 - ret <1 x i64> %vcagt2.i -} - -define <1 x i64> @test_vceq_s64(<1 x i64> %a, <1 x i64> %b) #0 { -; CHECK: test_vceq_s64 -; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = icmp eq <1 x i64> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vceq_u64(<1 x i64> %a, <1 x i64> %b) #0 { -; CHECK: test_vceq_u64 -; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = icmp eq <1 x i64> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vceq_f64(<1 x double> %a, <1 x double> %b) #0 { -; CHECK: test_vceq_f64 -; CHECK: fcmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = fcmp oeq <1 x double> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vcge_s64(<1 x i64> %a, <1 x i64> %b) #0 { -; CHECK: test_vcge_s64 -; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = icmp sge <1 x i64> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vcge_u64(<1 x i64> %a, <1 x i64> %b) #0 { -; CHECK: test_vcge_u64 -; CHECK: cmhs {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = icmp uge <1 x i64> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vcge_f64(<1 x double> %a, <1 x double> %b) #0 { -; CHECK: test_vcge_f64 -; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = fcmp oge <1 x double> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vcle_s64(<1 x i64> %a, <1 x i64> %b) #0 { -; CHECK: test_vcle_s64 -; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = icmp sle <1 x i64> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vcle_u64(<1 x i64> %a, <1 x i64> %b) #0 { -; CHECK: test_vcle_u64 -; CHECK: cmhs {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = icmp ule <1 x i64> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vcle_f64(<1 x double> %a, <1 x double> %b) #0 { -; CHECK: test_vcle_f64 -; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = fcmp ole <1 x double> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vcgt_s64(<1 x i64> %a, <1 x i64> %b) #0 { -; CHECK: test_vcgt_s64 -; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = icmp sgt <1 x i64> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vcgt_u64(<1 x i64> %a, <1 x i64> %b) #0 { -; CHECK: test_vcgt_u64 -; CHECK: cmhi {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = icmp ugt <1 x i64> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vcgt_f64(<1 x double> %a, <1 x double> %b) #0 { -; CHECK: test_vcgt_f64 -; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = fcmp ogt <1 x double> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vclt_s64(<1 x i64> %a, <1 x i64> %b) #0 { -; CHECK: test_vclt_s64 -; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = icmp slt <1 x i64> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vclt_u64(<1 x i64> %a, <1 x i64> %b) #0 { -; CHECK: test_vclt_u64 -; CHECK: cmhi {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = icmp ult <1 x i64> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vclt_f64(<1 x double> %a, <1 x double> %b) #0 { -; CHECK: test_vclt_f64 -; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = fcmp olt <1 x double> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vceqz_s64(<1 x i64> %a) #0 { -; CHECK: test_vceqz_s64 -; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0 - %1 = icmp eq <1 x i64> %a, zeroinitializer - %vceqz.i = sext <1 x i1> %1 to <1 x i64> - ret <1 x i64> %vceqz.i -} - -define <1 x i64> @test_vceqz_u64(<1 x i64> %a) #0 { -; CHECK: test_vceqz_u64 -; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0 - %1 = icmp eq <1 x i64> %a, zeroinitializer - %vceqz.i = sext <1 x i1> %1 to <1 x i64> - ret <1 x i64> %vceqz.i -} - -define <1 x i64> @test_vceqz_p64(<1 x i64> %a) #0 { -; CHECK: test_vceqz_p64 -; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0 - %1 = icmp eq <1 x i64> %a, zeroinitializer - %vceqz.i = sext <1 x i1> %1 to <1 x i64> - ret <1 x i64> %vceqz.i -} - -define <2 x i64> @test_vceqzq_p64(<2 x i64> %a) #0 { -; CHECK: test_vceqzq_p64 -; CHECK: cmeq {{v[0-9]}}.2d, {{v[0-9]}}.2d, #0 - %1 = icmp eq <2 x i64> %a, zeroinitializer - %vceqz.i = sext <2 x i1> %1 to <2 x i64> - ret <2 x i64> %vceqz.i -} - -define <1 x i64> @test_vcgez_s64(<1 x i64> %a) #0 { -; CHECK: test_vcgez_s64 -; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, #0x0 - %1 = icmp sge <1 x i64> %a, zeroinitializer - %vcgez.i = sext <1 x i1> %1 to <1 x i64> - ret <1 x i64> %vcgez.i -} - -define <1 x i64> @test_vclez_s64(<1 x i64> %a) #0 { -; CHECK: test_vclez_s64 -; CHECK: cmle {{d[0-9]}}, {{d[0-9]}}, #0x0 - %1 = icmp sle <1 x i64> %a, zeroinitializer - %vclez.i = sext <1 x i1> %1 to <1 x i64> - ret <1 x i64> %vclez.i -} - -define <1 x i64> @test_vcgtz_s64(<1 x i64> %a) #0 { -; CHECK: test_vcgtz_s64 -; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, #0x0 - %1 = icmp sgt <1 x i64> %a, zeroinitializer - %vcgtz.i = sext <1 x i1> %1 to <1 x i64> - ret <1 x i64> %vcgtz.i -} - -define <1 x i64> @test_vcltz_s64(<1 x i64> %a) #0 { -; CHECK: test_vcltz_s64 -; CHECK: cmlt {{d[0-9]}}, {{d[0-9]}}, #0 - %1 = icmp slt <1 x i64> %a, zeroinitializer - %vcltz.i = sext <1 x i1> %1 to <1 x i64> - ret <1 x i64> %vcltz.i -} - -declare <1 x i64> @llvm.arm.neon.vacgt.v1i64.v1f64(<1 x double>, <1 x double>) -declare <1 x i64> @llvm.arm.neon.vacge.v1i64.v1f64(<1 x double>, <1 x double>) -declare <1 x i64> @llvm.aarch64.neon.vtstd.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vchs.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vclez.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vchi.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) diff --git a/test/CodeGen/AArch64/neon-scalar-copy.ll b/test/CodeGen/AArch64/neon-scalar-copy.ll index a505dafa3e78..ab7ea661b406 100644 --- a/test/CodeGen/AArch64/neon-scalar-copy.ll +++ b/test/CodeGen/AArch64/neon-scalar-copy.ll @@ -1,10 +1,8 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 ; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 define float @test_dup_sv2S(<2 x float> %v) { ; CHECK-LABEL: test_dup_sv2S - ; CHECK-AARCH64: dup {{s[0-9]+}}, {{v[0-9]+}}.s[1] ; CHECK-ARM64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] %tmp1 = extractelement <2 x float> %v, i32 1 ret float %tmp1 @@ -39,7 +37,6 @@ define double @test_dup_dvD(<1 x double> %v) { define double @test_dup_dv2D(<2 x double> %v) { ; CHECK-LABEL: test_dup_dv2D - ; CHECK-AARCH64: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] ; CHECK-ARM64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] %tmp1 = extractelement <2 x double> %v, i32 1 ret double %tmp1 @@ -47,7 +44,6 @@ define double @test_dup_dv2D(<2 x double> %v) { define double @test_dup_dv2D_0(<2 x double> %v) { ; CHECK-LABEL: test_dup_dv2D_0 - ; CHECK-AARCH64: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] ; CHECK-ARM64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] ; CHECK: ret %tmp1 = extractelement <2 x double> %v, i32 1 @@ -56,49 +52,42 @@ define double @test_dup_dv2D_0(<2 x double> %v) { define <1 x i8> @test_vector_dup_bv16B(<16 x i8> %v1) { ; CHECK-LABEL: test_vector_dup_bv16B - ; CHECK-AARCH64: dup {{b[0-9]+}}, {{v[0-9]+}}.b[14] %shuffle.i = shufflevector <16 x i8> %v1, <16 x i8> undef, <1 x i32> ret <1 x i8> %shuffle.i } define <1 x i8> @test_vector_dup_bv8B(<8 x i8> %v1) { ; CHECK-LABEL: test_vector_dup_bv8B - ; CHECK-AARCH64: dup {{b[0-9]+}}, {{v[0-9]+}}.b[7] %shuffle.i = shufflevector <8 x i8> %v1, <8 x i8> undef, <1 x i32> ret <1 x i8> %shuffle.i } define <1 x i16> @test_vector_dup_hv8H(<8 x i16> %v1) { ; CHECK-LABEL: test_vector_dup_hv8H - ; CHECK-AARCH64: dup {{h[0-9]+}}, {{v[0-9]+}}.h[7] %shuffle.i = shufflevector <8 x i16> %v1, <8 x i16> undef, <1 x i32> ret <1 x i16> %shuffle.i } define <1 x i16> @test_vector_dup_hv4H(<4 x i16> %v1) { ; CHECK-LABEL: test_vector_dup_hv4H - ; CHECK-AARCH64: dup {{h[0-9]+}}, {{v[0-9]+}}.h[3] %shuffle.i = shufflevector <4 x i16> %v1, <4 x i16> undef, <1 x i32> ret <1 x i16> %shuffle.i } define <1 x i32> @test_vector_dup_sv4S(<4 x i32> %v1) { ; CHECK-LABEL: test_vector_dup_sv4S - ; CHECK-AARCH64: dup {{s[0-9]+}}, {{v[0-9]+}}.s[3] %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <1 x i32> ret <1 x i32> %shuffle } define <1 x i32> @test_vector_dup_sv2S(<2 x i32> %v1) { ; CHECK-LABEL: test_vector_dup_sv2S - ; CHECK-AARCH64: dup {{s[0-9]+}}, {{v[0-9]+}}.s[1] %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <1 x i32> ret <1 x i32> %shuffle } define <1 x i64> @test_vector_dup_dv2D(<2 x i64> %v1) { ; CHECK-LABEL: test_vector_dup_dv2D - ; CHECK-AARCH64: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] ; CHECK-ARM64: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #8 %shuffle.i = shufflevector <2 x i64> %v1, <2 x i64> undef, <1 x i32> ret <1 x i64> %shuffle.i diff --git a/test/CodeGen/AArch64/neon-scalar-cvt.ll b/test/CodeGen/AArch64/neon-scalar-cvt.ll deleted file mode 100644 index c19b0a765c60..000000000000 --- a/test/CodeGen/AArch64/neon-scalar-cvt.ll +++ /dev/null @@ -1,134 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; arm64 has a different approach to scalars. Discarding. - -define float @test_vcvts_f32_s32(i32 %a) { -; CHECK: test_vcvts_f32_s32 -; CHECK: scvtf {{s[0-9]+}}, {{s[0-9]+}} -entry: - %vcvtf.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %0 = call float @llvm.aarch64.neon.vcvtint2fps.f32.v1i32(<1 x i32> %vcvtf.i) - ret float %0 -} - -declare float @llvm.aarch64.neon.vcvtint2fps.f32.v1i32(<1 x i32>) - -define double @test_vcvtd_f64_s64(i64 %a) { -; CHECK: test_vcvtd_f64_s64 -; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}} -entry: - %vcvtf.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %0 = call double @llvm.aarch64.neon.vcvtint2fps.f64.v1i64(<1 x i64> %vcvtf.i) - ret double %0 -} - -declare double @llvm.aarch64.neon.vcvtint2fps.f64.v1i64(<1 x i64>) - -define float @test_vcvts_f32_u32(i32 %a) { -; CHECK: test_vcvts_f32_u32 -; CHECK: ucvtf {{s[0-9]+}}, {{s[0-9]+}} -entry: - %vcvtf.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %0 = call float @llvm.aarch64.neon.vcvtint2fpu.f32.v1i32(<1 x i32> %vcvtf.i) - ret float %0 -} - -declare float @llvm.aarch64.neon.vcvtint2fpu.f32.v1i32(<1 x i32>) - -define double @test_vcvtd_f64_u64(i64 %a) { -; CHECK: test_vcvtd_f64_u64 -; CHECK: ucvtf {{d[0-9]+}}, {{d[0-9]+}} -entry: - %vcvtf.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %0 = call double @llvm.aarch64.neon.vcvtint2fpu.f64.v1i64(<1 x i64> %vcvtf.i) - ret double %0 -} - -declare double @llvm.aarch64.neon.vcvtint2fpu.f64.v1i64(<1 x i64>) - -define float @test_vcvts_n_f32_s32(i32 %a) { -; CHECK: test_vcvts_n_f32_s32 -; CHECK: scvtf {{s[0-9]+}}, {{s[0-9]+}}, #1 -entry: - %vcvtf = insertelement <1 x i32> undef, i32 %a, i32 0 - %0 = call float @llvm.aarch64.neon.vcvtfxs2fp.n.f32.v1i32(<1 x i32> %vcvtf, i32 1) - ret float %0 -} - -declare float @llvm.aarch64.neon.vcvtfxs2fp.n.f32.v1i32(<1 x i32>, i32) - -define double @test_vcvtd_n_f64_s64(i64 %a) { -; CHECK: test_vcvtd_n_f64_s64 -; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}}, #1 -entry: - %vcvtf = insertelement <1 x i64> undef, i64 %a, i32 0 - %0 = call double @llvm.aarch64.neon.vcvtfxs2fp.n.f64.v1i64(<1 x i64> %vcvtf, i32 1) - ret double %0 -} - -declare double @llvm.aarch64.neon.vcvtfxs2fp.n.f64.v1i64(<1 x i64>, i32) - -define float @test_vcvts_n_f32_u32(i32 %a) { -; CHECK: test_vcvts_n_f32_u32 -; CHECK: ucvtf {{s[0-9]+}}, {{s[0-9]+}}, #1 -entry: - %vcvtf = insertelement <1 x i32> undef, i32 %a, i32 0 - %0 = call float @llvm.aarch64.neon.vcvtfxu2fp.n.f32.v1i32(<1 x i32> %vcvtf, i32 1) - ret float %0 -} - -declare float @llvm.aarch64.neon.vcvtfxu2fp.n.f32.v1i32(<1 x i32>, i32) - -define double @test_vcvtd_n_f64_u64(i64 %a) { -; CHECK: test_vcvtd_n_f64_u64 -; CHECK: ucvtf {{d[0-9]+}}, {{d[0-9]+}}, #1 -entry: - %vcvtf = insertelement <1 x i64> undef, i64 %a, i32 0 - %0 = call double @llvm.aarch64.neon.vcvtfxu2fp.n.f64.v1i64(<1 x i64> %vcvtf, i32 1) - ret double %0 -} - -declare double @llvm.aarch64.neon.vcvtfxu2fp.n.f64.v1i64(<1 x i64>, i32) - -define i32 @test_vcvts_n_s32_f32(float %a) { -; CHECK: test_vcvts_n_s32_f32 -; CHECK: fcvtzs {{s[0-9]+}}, {{s[0-9]+}}, #1 -entry: - %fcvtzs1 = call <1 x i32> @llvm.aarch64.neon.vcvtfp2fxs.n.v1i32.f32(float %a, i32 1) - %0 = extractelement <1 x i32> %fcvtzs1, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vcvtfp2fxs.n.v1i32.f32(float, i32) - -define i64 @test_vcvtd_n_s64_f64(double %a) { -; CHECK: test_vcvtd_n_s64_f64 -; CHECK: fcvtzs {{d[0-9]+}}, {{d[0-9]+}}, #1 -entry: - %fcvtzs1 = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.n.v1i64.f64(double %a, i32 1) - %0 = extractelement <1 x i64> %fcvtzs1, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.n.v1i64.f64(double, i32) - -define i32 @test_vcvts_n_u32_f32(float %a) { -; CHECK: test_vcvts_n_u32_f32 -; CHECK: fcvtzu {{s[0-9]+}}, {{s[0-9]+}}, #32 -entry: - %fcvtzu1 = call <1 x i32> @llvm.aarch64.neon.vcvtfp2fxu.n.v1i32.f32(float %a, i32 32) - %0 = extractelement <1 x i32> %fcvtzu1, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vcvtfp2fxu.n.v1i32.f32(float, i32) - -define i64 @test_vcvtd_n_u64_f64(double %a) { -; CHECK: test_vcvtd_n_u64_f64 -; CHECK: fcvtzu {{d[0-9]+}}, {{d[0-9]+}}, #64 -entry: - %fcvtzu1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.n.v1i64.f64(double %a, i32 64) - %0 = extractelement <1 x i64> %fcvtzu1, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.n.v1i64.f64(double, i32) diff --git a/test/CodeGen/AArch64/neon-scalar-ext.ll b/test/CodeGen/AArch64/neon-scalar-ext.ll deleted file mode 100644 index 502fcdacfc10..000000000000 --- a/test/CodeGen/AArch64/neon-scalar-ext.ll +++ /dev/null @@ -1,114 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; arm64 doesn't use <1 x iN> types, for N < 64. - -define <1 x i64> @test_zext_v1i32_v1i64(<2 x i32> %v) nounwind readnone { -; CHECK-LABEL: test_zext_v1i32_v1i64: -; CHECK: ushll v0.2d, v0.2s, #0 - %1 = extractelement <2 x i32> %v, i32 0 - %2 = insertelement <1 x i32> undef, i32 %1, i32 0 - %3 = zext <1 x i32> %2 to <1 x i64> - ret <1 x i64> %3 -} - -define <1 x i32> @test_zext_v1i16_v1i32(<4 x i16> %v) nounwind readnone { -; CHECK-LABEL: test_zext_v1i16_v1i32: -; CHECK: ushll v0.4s, v0.4h, #0 - %1 = extractelement <4 x i16> %v, i32 0 - %2 = insertelement <1 x i16> undef, i16 %1, i32 0 - %3 = zext <1 x i16> %2 to <1 x i32> - ret <1 x i32> %3 -} - -define <1 x i16> @test_zext_v1i8_v1i16(<8 x i8> %v) nounwind readnone { -; CHECK-LABEL: test_zext_v1i8_v1i16: -; CHECK: ushll v0.8h, v0.8b, #0 - %1 = extractelement <8 x i8> %v, i32 0 - %2 = insertelement <1 x i8> undef, i8 %1, i32 0 - %3 = zext <1 x i8> %2 to <1 x i16> - ret <1 x i16> %3 -} - -define <1 x i32> @test_zext_v1i8_v1i32(<8 x i8> %v) nounwind readnone { -; CHECK-LABEL: test_zext_v1i8_v1i32: -; CHECK: dup b0, v0.b[0] - %1 = extractelement <8 x i8> %v, i32 0 - %2 = insertelement <1 x i8> undef, i8 %1, i32 0 - %3 = zext <1 x i8> %2 to <1 x i32> - ret <1 x i32> %3 -} - -define <1 x i64> @test_zext_v1i16_v1i64(<4 x i16> %v) nounwind readnone { -; CHECK-LABEL: test_zext_v1i16_v1i64: -; CHECK: dup h0, v0.h[0] - %1 = extractelement <4 x i16> %v, i32 0 - %2 = insertelement <1 x i16> undef, i16 %1, i32 0 - %3 = zext <1 x i16> %2 to <1 x i64> - ret <1 x i64> %3 -} - -define <1 x i64> @test_zext_v1i8_v1i64(<8 x i8> %v) nounwind readnone { -; CHECK-LABEL: test_zext_v1i8_v1i64: -; CHECK: dup b0, v0.b[0] - %1 = extractelement <8 x i8> %v, i32 0 - %2 = insertelement <1 x i8> undef, i8 %1, i32 0 - %3 = zext <1 x i8> %2 to <1 x i64> - ret <1 x i64> %3 -} - -define <1 x i64> @test_sext_v1i32_v1i64(<2 x i32> %v) nounwind readnone { -; CHECK-LABEL: test_sext_v1i32_v1i64: -; CHECK: sshll v0.2d, v0.2s, #0 - %1 = extractelement <2 x i32> %v, i32 0 - %2 = insertelement <1 x i32> undef, i32 %1, i32 0 - %3 = sext <1 x i32> %2 to <1 x i64> - ret <1 x i64> %3 -} - -define <1 x i32> @test_sext_v1i16_v1i32(<4 x i16> %v) nounwind readnone { -; CHECK-LABEL: test_sext_v1i16_v1i32: -; CHECK: sshll v0.4s, v0.4h, #0 - %1 = extractelement <4 x i16> %v, i32 0 - %2 = insertelement <1 x i16> undef, i16 %1, i32 0 - %3 = sext <1 x i16> %2 to <1 x i32> - ret <1 x i32> %3 -} - -define <1 x i16> @test_sext_v1i8_v1i16(<8 x i8> %v) nounwind readnone { -; CHECK-LABEL: test_sext_v1i8_v1i16: -; CHECK: sshll v0.8h, v0.8b, #0 - %1 = extractelement <8 x i8> %v, i32 0 - %2 = insertelement <1 x i8> undef, i8 %1, i32 0 - %3 = sext <1 x i8> %2 to <1 x i16> - ret <1 x i16> %3 -} - -define <1 x i32> @test_sext_v1i8_v1i32(<8 x i8> %v) nounwind readnone { -; CHECK-LABEL: test_sext_v1i8_v1i32: -; CHECK: sshll v0.8h, v0.8b, #0 -; CHECK: sshll v0.4s, v0.4h, #0 - %1 = extractelement <8 x i8> %v, i32 0 - %2 = insertelement <1 x i8> undef, i8 %1, i32 0 - %3 = sext <1 x i8> %2 to <1 x i32> - ret <1 x i32> %3 -} - -define <1 x i64> @test_sext_v1i16_v1i64(<4 x i16> %v) nounwind readnone { -; CHECK-LABEL: test_sext_v1i16_v1i64: -; CHECK: sshll v0.4s, v0.4h, #0 -; CHECK: sshll v0.2d, v0.2s, #0 - %1 = extractelement <4 x i16> %v, i32 0 - %2 = insertelement <1 x i16> undef, i16 %1, i32 0 - %3 = sext <1 x i16> %2 to <1 x i64> - ret <1 x i64> %3 -} - -define <1 x i64> @test_sext_v1i8_v1i64(<8 x i8> %v) nounwind readnone { -; CHECK-LABEL: test_sext_v1i8_v1i64: -; CHECK: sshll v0.8h, v0.8b, #0 -; CHECK: sshll v0.4s, v0.4h, #0 -; CHECK: sshll v0.2d, v0.2s, #0 - %1 = extractelement <8 x i8> %v, i32 0 - %2 = insertelement <1 x i8> undef, i8 %1, i32 0 - %3 = sext <1 x i8> %2 to <1 x i64> - ret <1 x i64> %3 -} diff --git a/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll b/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll deleted file mode 100644 index 2004226bd135..000000000000 --- a/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll +++ /dev/null @@ -1,105 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; intrinsic wrangling that arm64 does differently. - -define i8 @test_vqmovunh_s16(i16 %a) { -; CHECK: test_vqmovunh_s16 -; CHECK: sqxtun {{b[0-9]+}}, {{h[0-9]+}} -entry: - %vqmovun.i = insertelement <1 x i16> undef, i16 %a, i32 0 - %vqmovun1.i = call <1 x i8> @llvm.arm.neon.vqmovnsu.v1i8(<1 x i16> %vqmovun.i) - %0 = extractelement <1 x i8> %vqmovun1.i, i32 0 - ret i8 %0 -} - -define i16 @test_vqmovuns_s32(i32 %a) { -; CHECK: test_vqmovuns_s32 -; CHECK: sqxtun {{h[0-9]+}}, {{s[0-9]+}} -entry: - %vqmovun.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %vqmovun1.i = call <1 x i16> @llvm.arm.neon.vqmovnsu.v1i16(<1 x i32> %vqmovun.i) - %0 = extractelement <1 x i16> %vqmovun1.i, i32 0 - ret i16 %0 -} - -define i32 @test_vqmovund_s64(i64 %a) { -; CHECK: test_vqmovund_s64 -; CHECK: sqxtun {{s[0-9]+}}, {{d[0-9]+}} -entry: - %vqmovun.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vqmovun1.i = call <1 x i32> @llvm.arm.neon.vqmovnsu.v1i32(<1 x i64> %vqmovun.i) - %0 = extractelement <1 x i32> %vqmovun1.i, i32 0 - ret i32 %0 -} - -declare <1 x i8> @llvm.arm.neon.vqmovnsu.v1i8(<1 x i16>) -declare <1 x i16> @llvm.arm.neon.vqmovnsu.v1i16(<1 x i32>) -declare <1 x i32> @llvm.arm.neon.vqmovnsu.v1i32(<1 x i64>) - -define i8 @test_vqmovnh_s16(i16 %a) { -; CHECK: test_vqmovnh_s16 -; CHECK: sqxtn {{b[0-9]+}}, {{h[0-9]+}} -entry: - %vqmovn.i = insertelement <1 x i16> undef, i16 %a, i32 0 - %vqmovn1.i = call <1 x i8> @llvm.arm.neon.vqmovns.v1i8(<1 x i16> %vqmovn.i) - %0 = extractelement <1 x i8> %vqmovn1.i, i32 0 - ret i8 %0 -} - -define i16 @test_vqmovns_s32(i32 %a) { -; CHECK: test_vqmovns_s32 -; CHECK: sqxtn {{h[0-9]+}}, {{s[0-9]+}} -entry: - %vqmovn.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %vqmovn1.i = call <1 x i16> @llvm.arm.neon.vqmovns.v1i16(<1 x i32> %vqmovn.i) - %0 = extractelement <1 x i16> %vqmovn1.i, i32 0 - ret i16 %0 -} - -define i32 @test_vqmovnd_s64(i64 %a) { -; CHECK: test_vqmovnd_s64 -; CHECK: sqxtn {{s[0-9]+}}, {{d[0-9]+}} -entry: - %vqmovn.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vqmovn1.i = call <1 x i32> @llvm.arm.neon.vqmovns.v1i32(<1 x i64> %vqmovn.i) - %0 = extractelement <1 x i32> %vqmovn1.i, i32 0 - ret i32 %0 -} - -declare <1 x i8> @llvm.arm.neon.vqmovns.v1i8(<1 x i16>) -declare <1 x i16> @llvm.arm.neon.vqmovns.v1i16(<1 x i32>) -declare <1 x i32> @llvm.arm.neon.vqmovns.v1i32(<1 x i64>) - -define i8 @test_vqmovnh_u16(i16 %a) { -; CHECK: test_vqmovnh_u16 -; CHECK: uqxtn {{b[0-9]+}}, {{h[0-9]+}} -entry: - %vqmovn.i = insertelement <1 x i16> undef, i16 %a, i32 0 - %vqmovn1.i = call <1 x i8> @llvm.arm.neon.vqmovnu.v1i8(<1 x i16> %vqmovn.i) - %0 = extractelement <1 x i8> %vqmovn1.i, i32 0 - ret i8 %0 -} - - -define i16 @test_vqmovns_u32(i32 %a) { -; CHECK: test_vqmovns_u32 -; CHECK: uqxtn {{h[0-9]+}}, {{s[0-9]+}} -entry: - %vqmovn.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %vqmovn1.i = call <1 x i16> @llvm.arm.neon.vqmovnu.v1i16(<1 x i32> %vqmovn.i) - %0 = extractelement <1 x i16> %vqmovn1.i, i32 0 - ret i16 %0 -} - -define i32 @test_vqmovnd_u64(i64 %a) { -; CHECK: test_vqmovnd_u64 -; CHECK: uqxtn {{s[0-9]+}}, {{d[0-9]+}} -entry: - %vqmovn.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vqmovn1.i = call <1 x i32> @llvm.arm.neon.vqmovnu.v1i32(<1 x i64> %vqmovn.i) - %0 = extractelement <1 x i32> %vqmovn1.i, i32 0 - ret i32 %0 -} - -declare <1 x i8> @llvm.arm.neon.vqmovnu.v1i8(<1 x i16>) -declare <1 x i16> @llvm.arm.neon.vqmovnu.v1i16(<1 x i32>) -declare <1 x i32> @llvm.arm.neon.vqmovnu.v1i32(<1 x i64>) diff --git a/test/CodeGen/AArch64/neon-scalar-fabd.ll b/test/CodeGen/AArch64/neon-scalar-fabd.ll deleted file mode 100644 index 9b2ae2bbc0ab..000000000000 --- a/test/CodeGen/AArch64/neon-scalar-fabd.ll +++ /dev/null @@ -1,21 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has these two tests in vabs.ll - -define float @test_vabds_f32(float %a, float %b) { -; CHECK-LABEL: test_vabds_f32 -; CHECK: fabd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} -entry: - %0 = call float @llvm.aarch64.neon.vabd.f32(float %a, float %a) - ret float %0 -} - -define double @test_vabdd_f64(double %a, double %b) { -; CHECK-LABEL: test_vabdd_f64 -; CHECK: fabd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} -entry: - %0 = call double @llvm.aarch64.neon.vabd.f64(double %a, double %b) - ret double %0 -} - -declare double @llvm.aarch64.neon.vabd.f64(double, double) -declare float @llvm.aarch64.neon.vabd.f32(float, float) diff --git a/test/CodeGen/AArch64/neon-scalar-fcvt.ll b/test/CodeGen/AArch64/neon-scalar-fcvt.ll deleted file mode 100644 index 341ed69b4822..000000000000 --- a/test/CodeGen/AArch64/neon-scalar-fcvt.ll +++ /dev/null @@ -1,234 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; arm64 duplicates these tests in cvt.ll - -;; Scalar Floating-point Convert - -define float @test_vcvtxn(double %a) { -; CHECK: test_vcvtxn -; CHECK: fcvtxn {{s[0-9]}}, {{d[0-9]}} -entry: - %vcvtf = call float @llvm.aarch64.neon.fcvtxn(double %a) - ret float %vcvtf -} - -declare float @llvm.aarch64.neon.fcvtxn(double) - -define i32 @test_vcvtass(float %a) { -; CHECK: test_vcvtass -; CHECK: fcvtas {{s[0-9]}}, {{s[0-9]}} -entry: - %vcvtas1.i = call <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.f32(float %a) - %0 = extractelement <1 x i32> %vcvtas1.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.f32(float) - -define i64 @test_test_vcvtasd(double %a) { -; CHECK: test_test_vcvtasd -; CHECK: fcvtas {{d[0-9]}}, {{d[0-9]}} -entry: - %vcvtas1.i = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.f64(double %a) - %0 = extractelement <1 x i64> %vcvtas1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.f64(double) - -define i32 @test_vcvtaus(float %a) { -; CHECK: test_vcvtaus -; CHECK: fcvtau {{s[0-9]}}, {{s[0-9]}} -entry: - %vcvtau1.i = call <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.f32(float %a) - %0 = extractelement <1 x i32> %vcvtau1.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.f32(float) - -define i64 @test_vcvtaud(double %a) { -; CHECK: test_vcvtaud -; CHECK: fcvtau {{d[0-9]}}, {{d[0-9]}} -entry: - %vcvtau1.i = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.f64(double %a) - %0 = extractelement <1 x i64> %vcvtau1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.f64(double) - -define i32 @test_vcvtmss(float %a) { -; CHECK: test_vcvtmss -; CHECK: fcvtms {{s[0-9]}}, {{s[0-9]}} -entry: - %vcvtms1.i = call <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.f32(float %a) - %0 = extractelement <1 x i32> %vcvtms1.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.f32(float) - -define i64 @test_vcvtmd_s64_f64(double %a) { -; CHECK: test_vcvtmd_s64_f64 -; CHECK: fcvtms {{d[0-9]}}, {{d[0-9]}} -entry: - %vcvtms1.i = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.f64(double %a) - %0 = extractelement <1 x i64> %vcvtms1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.f64(double) - -define i32 @test_vcvtmus(float %a) { -; CHECK: test_vcvtmus -; CHECK: fcvtmu {{s[0-9]}}, {{s[0-9]}} -entry: - %vcvtmu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.f32(float %a) - %0 = extractelement <1 x i32> %vcvtmu1.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.f32(float) - -define i64 @test_vcvtmud(double %a) { -; CHECK: test_vcvtmud -; CHECK: fcvtmu {{d[0-9]}}, {{d[0-9]}} -entry: - %vcvtmu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.f64(double %a) - %0 = extractelement <1 x i64> %vcvtmu1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.f64(double) - -define i32 @test_vcvtnss(float %a) { -; CHECK: test_vcvtnss -; CHECK: fcvtns {{s[0-9]}}, {{s[0-9]}} -entry: - %vcvtns1.i = call <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.f32(float %a) - %0 = extractelement <1 x i32> %vcvtns1.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.f32(float) - -define i64 @test_vcvtnd_s64_f64(double %a) { -; CHECK: test_vcvtnd_s64_f64 -; CHECK: fcvtns {{d[0-9]}}, {{d[0-9]}} -entry: - %vcvtns1.i = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.f64(double %a) - %0 = extractelement <1 x i64> %vcvtns1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.f64(double) - -define i32 @test_vcvtnus(float %a) { -; CHECK: test_vcvtnus -; CHECK: fcvtnu {{s[0-9]}}, {{s[0-9]}} -entry: - %vcvtnu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.f32(float %a) - %0 = extractelement <1 x i32> %vcvtnu1.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.f32(float) - -define i64 @test_vcvtnud(double %a) { -; CHECK: test_vcvtnud -; CHECK: fcvtnu {{d[0-9]}}, {{d[0-9]}} -entry: - %vcvtnu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.f64(double %a) - %0 = extractelement <1 x i64> %vcvtnu1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.f64(double) - -define i32 @test_vcvtpss(float %a) { -; CHECK: test_vcvtpss -; CHECK: fcvtps {{s[0-9]}}, {{s[0-9]}} -entry: - %vcvtps1.i = call <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.f32(float %a) - %0 = extractelement <1 x i32> %vcvtps1.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.f32(float) - -define i64 @test_vcvtpd_s64_f64(double %a) { -; CHECK: test_vcvtpd_s64_f64 -; CHECK: fcvtps {{d[0-9]}}, {{d[0-9]}} -entry: - %vcvtps1.i = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.f64(double %a) - %0 = extractelement <1 x i64> %vcvtps1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.f64(double) - -define i32 @test_vcvtpus(float %a) { -; CHECK: test_vcvtpus -; CHECK: fcvtpu {{s[0-9]}}, {{s[0-9]}} -entry: - %vcvtpu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.f32(float %a) - %0 = extractelement <1 x i32> %vcvtpu1.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.f32(float) - -define i64 @test_vcvtpud(double %a) { -; CHECK: test_vcvtpud -; CHECK: fcvtpu {{d[0-9]}}, {{d[0-9]}} -entry: - %vcvtpu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.f64(double %a) - %0 = extractelement <1 x i64> %vcvtpu1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.f64(double) - -define i32 @test_vcvtss(float %a) { -; CHECK: test_vcvtss -; CHECK: fcvtzs {{s[0-9]}}, {{s[0-9]}} -entry: - %vcvtzs1.i = call <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.f32(float %a) - %0 = extractelement <1 x i32> %vcvtzs1.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.f32(float) - -define i64 @test_vcvtd_s64_f64(double %a) { -; CHECK: test_vcvtd_s64_f64 -; CHECK: fcvtzs {{d[0-9]}}, {{d[0-9]}} -entry: - %vcvzs1.i = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.f64(double %a) - %0 = extractelement <1 x i64> %vcvzs1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.f64(double) - -define i32 @test_vcvtus(float %a) { -; CHECK: test_vcvtus -; CHECK: fcvtzu {{s[0-9]}}, {{s[0-9]}} -entry: - %vcvtzu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.f32(float %a) - %0 = extractelement <1 x i32> %vcvtzu1.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.f32(float) - -define i64 @test_vcvtud(double %a) { -; CHECK: test_vcvtud -; CHECK: fcvtzu {{d[0-9]}}, {{d[0-9]}} -entry: - %vcvtzu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.f64(double %a) - %0 = extractelement <1 x i64> %vcvtzu1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.f64(double) diff --git a/test/CodeGen/AArch64/neon-scalar-fp-compare.ll b/test/CodeGen/AArch64/neon-scalar-fp-compare.ll deleted file mode 100644 index b17d8655c6f9..000000000000 --- a/test/CodeGen/AArch64/neon-scalar-fp-compare.ll +++ /dev/null @@ -1,283 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; arm64 does not use intrinsics for comparisons. - -;; Scalar Floating-point Compare - -define i32 @test_vceqs_f32(float %a, float %b) { -; CHECK-LABEL: test_vceqs_f32 -; CHECK: fcmeq {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} -entry: - %fceq2.i = call <1 x i32> @llvm.aarch64.neon.fceq.v1i32.f32.f32(float %a, float %b) - %0 = extractelement <1 x i32> %fceq2.i, i32 0 - ret i32 %0 -} - -define i64 @test_vceqd_f64(double %a, double %b) { -; CHECK-LABEL: test_vceqd_f64 -; CHECK: fcmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %fceq2.i = call <1 x i64> @llvm.aarch64.neon.fceq.v1i64.f64.f64(double %a, double %b) - %0 = extractelement <1 x i64> %fceq2.i, i32 0 - ret i64 %0 -} - -define <1 x i64> @test_vceqz_f64(<1 x double> %a) { -; CHECK-LABEL: test_vceqz_f64 -; CHECK: fcmeq {{d[0-9]+}}, {{d[0-9]+}}, #0.0 -entry: - %0 = fcmp oeq <1 x double> %a, zeroinitializer - %vceqz.i = sext <1 x i1> %0 to <1 x i64> - ret <1 x i64> %vceqz.i -} - -define i32 @test_vceqzs_f32(float %a) { -; CHECK-LABEL: test_vceqzs_f32 -; CHECK: fcmeq {{s[0-9]}}, {{s[0-9]}}, #0.0 -entry: - %fceq1.i = call <1 x i32> @llvm.aarch64.neon.fceq.v1i32.f32.f32(float %a, float 0.0) - %0 = extractelement <1 x i32> %fceq1.i, i32 0 - ret i32 %0 -} - -define i64 @test_vceqzd_f64(double %a) { -; CHECK-LABEL: test_vceqzd_f64 -; CHECK: fcmeq {{d[0-9]}}, {{d[0-9]}}, #0.0 -entry: - %fceq1.i = call <1 x i64> @llvm.aarch64.neon.fceq.v1i64.f64.f32(double %a, float 0.0) - %0 = extractelement <1 x i64> %fceq1.i, i32 0 - ret i64 %0 -} - -define i32 @test_vcges_f32(float %a, float %b) { -; CHECK-LABEL: test_vcges_f32 -; CHECK: fcmge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} -entry: - %fcge2.i = call <1 x i32> @llvm.aarch64.neon.fcge.v1i32.f32.f32(float %a, float %b) - %0 = extractelement <1 x i32> %fcge2.i, i32 0 - ret i32 %0 -} - -define i64 @test_vcged_f64(double %a, double %b) { -; CHECK-LABEL: test_vcged_f64 -; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %fcge2.i = call <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f64(double %a, double %b) - %0 = extractelement <1 x i64> %fcge2.i, i32 0 - ret i64 %0 -} - -define i32 @test_vcgezs_f32(float %a) { -; CHECK-LABEL: test_vcgezs_f32 -; CHECK: fcmge {{s[0-9]}}, {{s[0-9]}}, #0.0 -entry: - %fcge1.i = call <1 x i32> @llvm.aarch64.neon.fcge.v1i32.f32.f32(float %a, float 0.0) - %0 = extractelement <1 x i32> %fcge1.i, i32 0 - ret i32 %0 -} - -define i64 @test_vcgezd_f64(double %a) { -; CHECK-LABEL: test_vcgezd_f64 -; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, #0.0 -entry: - %fcge1.i = call <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f32(double %a, float 0.0) - %0 = extractelement <1 x i64> %fcge1.i, i32 0 - ret i64 %0 -} - -define i32 @test_vcgts_f32(float %a, float %b) { -; CHECK-LABEL: test_vcgts_f32 -; CHECK: fcmgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} -entry: - %fcgt2.i = call <1 x i32> @llvm.aarch64.neon.fcgt.v1i32.f32.f32(float %a, float %b) - %0 = extractelement <1 x i32> %fcgt2.i, i32 0 - ret i32 %0 -} - -define i64 @test_vcgtd_f64(double %a, double %b) { -; CHECK-LABEL: test_vcgtd_f64 -; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %fcgt2.i = call <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f64(double %a, double %b) - %0 = extractelement <1 x i64> %fcgt2.i, i32 0 - ret i64 %0 -} - -define i32 @test_vcgtzs_f32(float %a) { -; CHECK-LABEL: test_vcgtzs_f32 -; CHECK: fcmgt {{s[0-9]}}, {{s[0-9]}}, #0.0 -entry: - %fcgt1.i = call <1 x i32> @llvm.aarch64.neon.fcgt.v1i32.f32.f32(float %a, float 0.0) - %0 = extractelement <1 x i32> %fcgt1.i, i32 0 - ret i32 %0 -} - -define i64 @test_vcgtzd_f64(double %a) { -; CHECK-LABEL: test_vcgtzd_f64 -; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, #0.0 -entry: - %fcgt1.i = call <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f32(double %a, float 0.0) - %0 = extractelement <1 x i64> %fcgt1.i, i32 0 - ret i64 %0 -} - -define i32 @test_vcles_f32(float %a, float %b) { -; CHECK-LABEL: test_vcles_f32 -; CHECK: fcmge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} -entry: - %fcge2.i = call <1 x i32> @llvm.aarch64.neon.fcge.v1i32.f32.f32(float %a, float %b) - %0 = extractelement <1 x i32> %fcge2.i, i32 0 - ret i32 %0 -} - -define i64 @test_vcled_f64(double %a, double %b) { -; CHECK-LABEL: test_vcled_f64 -; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %fcge2.i = call <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f64(double %a, double %b) - %0 = extractelement <1 x i64> %fcge2.i, i32 0 - ret i64 %0 -} - -define i32 @test_vclezs_f32(float %a) { -; CHECK-LABEL: test_vclezs_f32 -; CHECK: fcmle {{s[0-9]}}, {{s[0-9]}}, #0.0 -entry: - %fcle1.i = call <1 x i32> @llvm.aarch64.neon.fclez.v1i32.f32.f32(float %a, float 0.0) - %0 = extractelement <1 x i32> %fcle1.i, i32 0 - ret i32 %0 -} - -define i64 @test_vclezd_f64(double %a) { -; CHECK-LABEL: test_vclezd_f64 -; CHECK: fcmle {{d[0-9]}}, {{d[0-9]}}, #0.0 -entry: - %fcle1.i = call <1 x i64> @llvm.aarch64.neon.fclez.v1i64.f64.f32(double %a, float 0.0) - %0 = extractelement <1 x i64> %fcle1.i, i32 0 - ret i64 %0 -} - -define i32 @test_vclts_f32(float %a, float %b) { -; CHECK-LABEL: test_vclts_f32 -; CHECK: fcmgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} -entry: - %fcgt2.i = call <1 x i32> @llvm.aarch64.neon.fcgt.v1i32.f32.f32(float %a, float %b) - %0 = extractelement <1 x i32> %fcgt2.i, i32 0 - ret i32 %0 -} - -define i64 @test_vcltd_f64(double %a, double %b) { -; CHECK-LABEL: test_vcltd_f64 -; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %fcgt2.i = call <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f64(double %a, double %b) - %0 = extractelement <1 x i64> %fcgt2.i, i32 0 - ret i64 %0 -} - -define i32 @test_vcltzs_f32(float %a) { -; CHECK-LABEL: test_vcltzs_f32 -; CHECK: fcmlt {{s[0-9]}}, {{s[0-9]}}, #0.0 -entry: - %fclt1.i = call <1 x i32> @llvm.aarch64.neon.fcltz.v1i32.f32.f32(float %a, float 0.0) - %0 = extractelement <1 x i32> %fclt1.i, i32 0 - ret i32 %0 -} - -define i64 @test_vcltzd_f64(double %a) { -; CHECK-LABEL: test_vcltzd_f64 -; CHECK: fcmlt {{d[0-9]}}, {{d[0-9]}}, #0.0 -entry: - %fclt1.i = call <1 x i64> @llvm.aarch64.neon.fcltz.v1i64.f64.f32(double %a, float 0.0) - %0 = extractelement <1 x i64> %fclt1.i, i32 0 - ret i64 %0 -} - -define i32 @test_vcages_f32(float %a, float %b) { -; CHECK-LABEL: test_vcages_f32 -; CHECK: facge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} -entry: - %fcage2.i = call <1 x i32> @llvm.aarch64.neon.fcage.v1i32.f32.f32(float %a, float %b) - %0 = extractelement <1 x i32> %fcage2.i, i32 0 - ret i32 %0 -} - -define i64 @test_vcaged_f64(double %a, double %b) { -; CHECK-LABEL: test_vcaged_f64 -; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %fcage2.i = call <1 x i64> @llvm.aarch64.neon.fcage.v1i64.f64.f64(double %a, double %b) - %0 = extractelement <1 x i64> %fcage2.i, i32 0 - ret i64 %0 -} - -define i32 @test_vcagts_f32(float %a, float %b) { -; CHECK-LABEL: test_vcagts_f32 -; CHECK: facgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} -entry: - %fcagt2.i = call <1 x i32> @llvm.aarch64.neon.fcagt.v1i32.f32.f32(float %a, float %b) - %0 = extractelement <1 x i32> %fcagt2.i, i32 0 - ret i32 %0 -} - -define i64 @test_vcagtd_f64(double %a, double %b) { -; CHECK-LABEL: test_vcagtd_f64 -; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %fcagt2.i = call <1 x i64> @llvm.aarch64.neon.fcagt.v1i64.f64.f64(double %a, double %b) - %0 = extractelement <1 x i64> %fcagt2.i, i32 0 - ret i64 %0 -} - -define i32 @test_vcales_f32(float %a, float %b) { -; CHECK-LABEL: test_vcales_f32 -; CHECK: facge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} -entry: - %fcage2.i = call <1 x i32> @llvm.aarch64.neon.fcage.v1i32.f32.f32(float %a, float %b) - %0 = extractelement <1 x i32> %fcage2.i, i32 0 - ret i32 %0 -} - -define i64 @test_vcaled_f64(double %a, double %b) { -; CHECK-LABEL: test_vcaled_f64 -; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %fcage2.i = call <1 x i64> @llvm.aarch64.neon.fcage.v1i64.f64.f64(double %a, double %b) - %0 = extractelement <1 x i64> %fcage2.i, i32 0 - ret i64 %0 -} - -define i32 @test_vcalts_f32(float %a, float %b) { -; CHECK-LABEL: test_vcalts_f32 -; CHECK: facgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} -entry: - %fcalt2.i = call <1 x i32> @llvm.aarch64.neon.fcagt.v1i32.f32.f32(float %a, float %b) - %0 = extractelement <1 x i32> %fcalt2.i, i32 0 - ret i32 %0 -} - -define i64 @test_vcaltd_f64(double %a, double %b) { -; CHECK-LABEL: test_vcaltd_f64 -; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %fcalt2.i = call <1 x i64> @llvm.aarch64.neon.fcagt.v1i64.f64.f64(double %a, double %b) - %0 = extractelement <1 x i64> %fcalt2.i, i32 0 - ret i64 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.fceq.v1i32.f32.f32(float, float) -declare <1 x i64> @llvm.aarch64.neon.fceq.v1i64.f64.f32(double, float) -declare <1 x i64> @llvm.aarch64.neon.fceq.v1i64.f64.f64(double, double) -declare <1 x i32> @llvm.aarch64.neon.fcge.v1i32.f32.f32(float, float) -declare <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f32(double, float) -declare <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f64(double, double) -declare <1 x i32> @llvm.aarch64.neon.fclez.v1i32.f32.f32(float, float) -declare <1 x i64> @llvm.aarch64.neon.fclez.v1i64.f64.f32(double, float) -declare <1 x i32> @llvm.aarch64.neon.fcgt.v1i32.f32.f32(float, float) -declare <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f32(double, float) -declare <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f64(double, double) -declare <1 x i32> @llvm.aarch64.neon.fcltz.v1i32.f32.f32(float, float) -declare <1 x i64> @llvm.aarch64.neon.fcltz.v1i64.f64.f32(double, float) -declare <1 x i32> @llvm.aarch64.neon.fcage.v1i32.f32.f32(float, float) -declare <1 x i64> @llvm.aarch64.neon.fcage.v1i64.f64.f64(double, double) -declare <1 x i32> @llvm.aarch64.neon.fcagt.v1i32.f32.f32(float, float) -declare <1 x i64> @llvm.aarch64.neon.fcagt.v1i64.f64.f64(double, double) diff --git a/test/CodeGen/AArch64/neon-scalar-mul.ll b/test/CodeGen/AArch64/neon-scalar-mul.ll deleted file mode 100644 index ac44c090b411..000000000000 --- a/test/CodeGen/AArch64/neon-scalar-mul.ll +++ /dev/null @@ -1,144 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; Just intrinsic wrangling, and arm64 does scalar differently anyway. - -define i16 @test_vqdmulhh_s16(i16 %a, i16 %b) { -; CHECK: test_vqdmulhh_s16 -; CHECK: sqdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} - %1 = insertelement <1 x i16> undef, i16 %a, i32 0 - %2 = insertelement <1 x i16> undef, i16 %b, i32 0 - %3 = call <1 x i16> @llvm.arm.neon.vqdmulh.v1i16(<1 x i16> %1, <1 x i16> %2) - %4 = extractelement <1 x i16> %3, i32 0 - ret i16 %4 -} - -define i32 @test_vqdmulhs_s32(i32 %a, i32 %b) { -; CHECK: test_vqdmulhs_s32 -; CHECK: sqdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - %1 = insertelement <1 x i32> undef, i32 %a, i32 0 - %2 = insertelement <1 x i32> undef, i32 %b, i32 0 - %3 = call <1 x i32> @llvm.arm.neon.vqdmulh.v1i32(<1 x i32> %1, <1 x i32> %2) - %4 = extractelement <1 x i32> %3, i32 0 - ret i32 %4 -} - -declare <1 x i16> @llvm.arm.neon.vqdmulh.v1i16(<1 x i16>, <1 x i16>) -declare <1 x i32> @llvm.arm.neon.vqdmulh.v1i32(<1 x i32>, <1 x i32>) - -define i16 @test_vqrdmulhh_s16(i16 %a, i16 %b) { -; CHECK: test_vqrdmulhh_s16 -; CHECK: sqrdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} - %1 = insertelement <1 x i16> undef, i16 %a, i32 0 - %2 = insertelement <1 x i16> undef, i16 %b, i32 0 - %3 = call <1 x i16> @llvm.arm.neon.vqrdmulh.v1i16(<1 x i16> %1, <1 x i16> %2) - %4 = extractelement <1 x i16> %3, i32 0 - ret i16 %4 -} - -define i32 @test_vqrdmulhs_s32(i32 %a, i32 %b) { -; CHECK: test_vqrdmulhs_s32 -; CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - %1 = insertelement <1 x i32> undef, i32 %a, i32 0 - %2 = insertelement <1 x i32> undef, i32 %b, i32 0 - %3 = call <1 x i32> @llvm.arm.neon.vqrdmulh.v1i32(<1 x i32> %1, <1 x i32> %2) - %4 = extractelement <1 x i32> %3, i32 0 - ret i32 %4 -} - -declare <1 x i16> @llvm.arm.neon.vqrdmulh.v1i16(<1 x i16>, <1 x i16>) -declare <1 x i32> @llvm.arm.neon.vqrdmulh.v1i32(<1 x i32>, <1 x i32>) - -define float @test_vmulxs_f32(float %a, float %b) { -; CHECK: test_vmulxs_f32 -; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - %1 = call float @llvm.aarch64.neon.vmulx.f32(float %a, float %b) - ret float %1 -} - -define double @test_vmulxd_f64(double %a, double %b) { -; CHECK: test_vmulxd_f64 -; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - %1 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %b) - ret double %1 -} - -declare float @llvm.aarch64.neon.vmulx.f32(float, float) -declare double @llvm.aarch64.neon.vmulx.f64(double, double) - -define i32 @test_vqdmlalh_s16(i32 %a, i16 %b, i16 %c) { -; CHECK: test_vqdmlalh_s16 -; CHECK: sqdmlal {{s[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} -entry: - %vqdmlal.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %vqdmlal1.i = insertelement <1 x i16> undef, i16 %b, i32 0 - %vqdmlal2.i = insertelement <1 x i16> undef, i16 %c, i32 0 - %vqdmlal3.i = call <1 x i32> @llvm.aarch64.neon.vqdmlal.v1i32(<1 x i32> %vqdmlal.i, <1 x i16> %vqdmlal1.i, <1 x i16> %vqdmlal2.i) - %0 = extractelement <1 x i32> %vqdmlal3.i, i32 0 - ret i32 %0 -} - -define i64 @test_vqdmlals_s32(i64 %a, i32 %b, i32 %c) { -; CHECK: test_vqdmlals_s32 -; CHECK: sqdmlal {{d[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} -entry: - %vqdmlal.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vqdmlal1.i = insertelement <1 x i32> undef, i32 %b, i32 0 - %vqdmlal2.i = insertelement <1 x i32> undef, i32 %c, i32 0 - %vqdmlal3.i = call <1 x i64> @llvm.aarch64.neon.vqdmlal.v1i64(<1 x i64> %vqdmlal.i, <1 x i32> %vqdmlal1.i, <1 x i32> %vqdmlal2.i) - %0 = extractelement <1 x i64> %vqdmlal3.i, i32 0 - ret i64 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vqdmlal.v1i32(<1 x i32>, <1 x i16>, <1 x i16>) -declare <1 x i64> @llvm.aarch64.neon.vqdmlal.v1i64(<1 x i64>, <1 x i32>, <1 x i32>) - -define i32 @test_vqdmlslh_s16(i32 %a, i16 %b, i16 %c) { -; CHECK: test_vqdmlslh_s16 -; CHECK: sqdmlsl {{s[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} -entry: - %vqdmlsl.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %vqdmlsl1.i = insertelement <1 x i16> undef, i16 %b, i32 0 - %vqdmlsl2.i = insertelement <1 x i16> undef, i16 %c, i32 0 - %vqdmlsl3.i = call <1 x i32> @llvm.aarch64.neon.vqdmlsl.v1i32(<1 x i32> %vqdmlsl.i, <1 x i16> %vqdmlsl1.i, <1 x i16> %vqdmlsl2.i) - %0 = extractelement <1 x i32> %vqdmlsl3.i, i32 0 - ret i32 %0 -} - -define i64 @test_vqdmlsls_s32(i64 %a, i32 %b, i32 %c) { -; CHECK: test_vqdmlsls_s32 -; CHECK: sqdmlsl {{d[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} -entry: - %vqdmlsl.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vqdmlsl1.i = insertelement <1 x i32> undef, i32 %b, i32 0 - %vqdmlsl2.i = insertelement <1 x i32> undef, i32 %c, i32 0 - %vqdmlsl3.i = call <1 x i64> @llvm.aarch64.neon.vqdmlsl.v1i64(<1 x i64> %vqdmlsl.i, <1 x i32> %vqdmlsl1.i, <1 x i32> %vqdmlsl2.i) - %0 = extractelement <1 x i64> %vqdmlsl3.i, i32 0 - ret i64 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vqdmlsl.v1i32(<1 x i32>, <1 x i16>, <1 x i16>) -declare <1 x i64> @llvm.aarch64.neon.vqdmlsl.v1i64(<1 x i64>, <1 x i32>, <1 x i32>) - -define i32 @test_vqdmullh_s16(i16 %a, i16 %b) { -; CHECK: test_vqdmullh_s16 -; CHECK: sqdmull {{s[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} -entry: - %vqdmull.i = insertelement <1 x i16> undef, i16 %a, i32 0 - %vqdmull1.i = insertelement <1 x i16> undef, i16 %b, i32 0 - %vqdmull2.i = call <1 x i32> @llvm.arm.neon.vqdmull.v1i32(<1 x i16> %vqdmull.i, <1 x i16> %vqdmull1.i) - %0 = extractelement <1 x i32> %vqdmull2.i, i32 0 - ret i32 %0 -} - -define i64 @test_vqdmulls_s32(i32 %a, i32 %b) { -; CHECK: test_vqdmulls_s32 -; CHECK: sqdmull {{d[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} -entry: - %vqdmull.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %vqdmull1.i = insertelement <1 x i32> undef, i32 %b, i32 0 - %vqdmull2.i = call <1 x i64> @llvm.arm.neon.vqdmull.v1i64(<1 x i32> %vqdmull.i, <1 x i32> %vqdmull1.i) - %0 = extractelement <1 x i64> %vqdmull2.i, i32 0 - ret i64 %0 -} - -declare <1 x i32> @llvm.arm.neon.vqdmull.v1i32(<1 x i16>, <1 x i16>) -declare <1 x i64> @llvm.arm.neon.vqdmull.v1i64(<1 x i32>, <1 x i32>) diff --git a/test/CodeGen/AArch64/neon-scalar-neg.ll b/test/CodeGen/AArch64/neon-scalar-neg.ll deleted file mode 100644 index 6eb0a1a152b1..000000000000 --- a/test/CodeGen/AArch64/neon-scalar-neg.ll +++ /dev/null @@ -1,62 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; Intrinsic wrangling. arm64 does it differently. - -define i64 @test_vnegd_s64(i64 %a) { -; CHECK: test_vnegd_s64 -; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}} -entry: - %vneg.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vneg1.i = tail call <1 x i64> @llvm.aarch64.neon.vneg(<1 x i64> %vneg.i) - %0 = extractelement <1 x i64> %vneg1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vneg(<1 x i64>) - -define i8 @test_vqnegb_s8(i8 %a) { -; CHECK: test_vqnegb_s8 -; CHECK: sqneg {{b[0-9]+}}, {{b[0-9]+}} -entry: - %vqneg.i = insertelement <1 x i8> undef, i8 %a, i32 0 - %vqneg1.i = call <1 x i8> @llvm.arm.neon.vqneg.v1i8(<1 x i8> %vqneg.i) - %0 = extractelement <1 x i8> %vqneg1.i, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.arm.neon.vqneg.v1i8(<1 x i8>) - -define i16 @test_vqnegh_s16(i16 %a) { -; CHECK: test_vqnegh_s16 -; CHECK: sqneg {{h[0-9]+}}, {{h[0-9]+}} -entry: - %vqneg.i = insertelement <1 x i16> undef, i16 %a, i32 0 - %vqneg1.i = call <1 x i16> @llvm.arm.neon.vqneg.v1i16(<1 x i16> %vqneg.i) - %0 = extractelement <1 x i16> %vqneg1.i, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.arm.neon.vqneg.v1i16(<1 x i16>) - -define i32 @test_vqnegs_s32(i32 %a) { -; CHECK: test_vqnegs_s32 -; CHECK: sqneg {{s[0-9]+}}, {{s[0-9]+}} -entry: - %vqneg.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %vqneg1.i = call <1 x i32> @llvm.arm.neon.vqneg.v1i32(<1 x i32> %vqneg.i) - %0 = extractelement <1 x i32> %vqneg1.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.arm.neon.vqneg.v1i32(<1 x i32>) - -define i64 @test_vqnegd_s64(i64 %a) { -; CHECK: test_vqnegd_s64 -; CHECK: sqneg {{d[0-9]+}}, {{d[0-9]+}} -entry: - %vqneg.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vqneg1.i = call <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64> %vqneg.i) - %0 = extractelement <1 x i64> %vqneg1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64>) diff --git a/test/CodeGen/AArch64/neon-scalar-recip.ll b/test/CodeGen/AArch64/neon-scalar-recip.ll deleted file mode 100644 index 4b1ca6e91c8d..000000000000 --- a/test/CodeGen/AArch64/neon-scalar-recip.ll +++ /dev/null @@ -1,93 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; duplicates arm64 tests in vsqrt.ll - -define float @test_vrecpss_f32(float %a, float %b) { -; CHECK: test_vrecpss_f32 -; CHECK: frecps {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - %1 = call float @llvm.aarch64.neon.vrecps.f32(float %a, float %b) - ret float %1 -} - -define double @test_vrecpsd_f64(double %a, double %b) { -; CHECK: test_vrecpsd_f64 -; CHECK: frecps {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - %1 = call double @llvm.aarch64.neon.vrecps.f64(double %a, double %b) - ret double %1 -} - -declare float @llvm.aarch64.neon.vrecps.f32(float, float) -declare double @llvm.aarch64.neon.vrecps.f64(double, double) - -define float @test_vrsqrtss_f32(float %a, float %b) { -; CHECK: test_vrsqrtss_f32 -; CHECK: frsqrts {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - %1 = call float @llvm.aarch64.neon.vrsqrts.f32(float %a, float %b) - ret float %1 -} - -define double @test_vrsqrtsd_f64(double %a, double %b) { -; CHECK: test_vrsqrtsd_f64 -; CHECK: frsqrts {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - %1 = call double @llvm.aarch64.neon.vrsqrts.f64(double %a, double %b) - ret double %1 -} - -declare float @llvm.aarch64.neon.vrsqrts.f32(float, float) -declare double @llvm.aarch64.neon.vrsqrts.f64(double, double) - -define float @test_vrecpes_f32(float %a) { -; CHECK: test_vrecpes_f32 -; CHECK: frecpe {{s[0-9]+}}, {{s[0-9]+}} -entry: - %0 = call float @llvm.aarch64.neon.vrecpe.f32(float %a) - ret float %0 -} - -define double @test_vrecped_f64(double %a) { -; CHECK: test_vrecped_f64 -; CHECK: frecpe {{d[0-9]+}}, {{d[0-9]+}} -entry: - %0 = call double @llvm.aarch64.neon.vrecpe.f64(double %a) - ret double %0 -} - -declare float @llvm.aarch64.neon.vrecpe.f32(float) -declare double @llvm.aarch64.neon.vrecpe.f64(double) - -define float @test_vrecpxs_f32(float %a) { -; CHECK: test_vrecpxs_f32 -; CHECK: frecpx {{s[0-9]+}}, {{s[0-9]+}} -entry: - %0 = call float @llvm.aarch64.neon.vrecpx.f32(float %a) - ret float %0 -} - -define double @test_vrecpxd_f64(double %a) { -; CHECK: test_vrecpxd_f64 -; CHECK: frecpx {{d[0-9]+}}, {{d[0-9]+}} -entry: - %0 = call double @llvm.aarch64.neon.vrecpx.f64(double %a) - ret double %0 -} - -declare float @llvm.aarch64.neon.vrecpx.f32(float) -declare double @llvm.aarch64.neon.vrecpx.f64(double) - -define float @test_vrsqrtes_f32(float %a) { -; CHECK: test_vrsqrtes_f32 -; CHECK: frsqrte {{s[0-9]+}}, {{s[0-9]+}} -entry: - %0 = call float @llvm.aarch64.neon.vrsqrte.f32(float %a) - ret float %0 -} - -define double @test_vrsqrted_f64(double %a) { -; CHECK: test_vrsqrted_f64 -; CHECK: frsqrte {{d[0-9]+}}, {{d[0-9]+}} -entry: - %0 = call double @llvm.aarch64.neon.vrsqrte.f64(double %a) - ret double %0 -} - -declare float @llvm.aarch64.neon.vrsqrte.f32(float) -declare double @llvm.aarch64.neon.vrsqrte.f64(double) diff --git a/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll b/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll deleted file mode 100644 index 2b94d7524eb9..000000000000 --- a/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll +++ /dev/null @@ -1,216 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; Intrinsic wrangling. Duplicates various arm64 tests. - -declare <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64>) - -define <1 x i64> @test_addp_v1i64(<2 x i64> %a) { -; CHECK: test_addp_v1i64: -; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d - %val = call <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64> %a) - ret <1 x i64> %val -} - -declare float @llvm.aarch64.neon.vpfadd.f32.v2f32(<2 x float>) - -define float @test_faddp_f32(<2 x float> %a) { -; CHECK: test_faddp_f32: -; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s - %val = call float @llvm.aarch64.neon.vpfadd.f32.v2f32(<2 x float> %a) - ret float %val -} - -declare double @llvm.aarch64.neon.vpfadd.f64.v2f64(<2 x double>) - -define double @test_faddp_f64(<2 x double> %a) { -; CHECK: test_faddp_f64: -; CHECK: faddp {{d[0-9]+}}, {{v[0-9]+}}.2d - %val = call double @llvm.aarch64.neon.vpfadd.f64.v2f64(<2 x double> %a) - ret double %val -} - - -declare float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float>) - -define float @test_fmaxp_f32(<2 x float> %a) { -; CHECK: test_fmaxp_f32: -; CHECK: fmaxp {{s[0-9]+}}, {{v[0-9]+}}.2s - %val = call float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float> %a) - ret float %val -} - -declare double @llvm.aarch64.neon.vpmax.f64.v2f64(<2 x double>) - -define double @test_fmaxp_f64(<2 x double> %a) { -; CHECK: test_fmaxp_f64: -; CHECK: fmaxp {{d[0-9]+}}, {{v[0-9]+}}.2d - %val = call double @llvm.aarch64.neon.vpmax.f64.v2f64(<2 x double> %a) - ret double %val -} - -declare float @llvm.aarch64.neon.vpmin.f32.v2f32(<2 x float>) - -define float @test_fminp_f32(<2 x float> %a) { -; CHECK: test_fminp_f32: -; CHECK: fminp {{s[0-9]+}}, {{v[0-9]+}}.2s - %val = call float @llvm.aarch64.neon.vpmin.f32.v2f32(<2 x float> %a) - ret float %val -} - -declare double @llvm.aarch64.neon.vpmin.f64.v2f64(<2 x double>) - -define double @test_fminp_f64(<2 x double> %a) { -; CHECK: test_fminp_f64: -; CHECK: fminp {{d[0-9]+}}, {{v[0-9]+}}.2d - %val = call double @llvm.aarch64.neon.vpmin.f64.v2f64(<2 x double> %a) - ret double %val -} - -declare float @llvm.aarch64.neon.vpfmaxnm.f32.v2f32(<2 x float>) - -define float @test_fmaxnmp_f32(<2 x float> %a) { -; CHECK: test_fmaxnmp_f32: -; CHECK: fmaxnmp {{s[0-9]+}}, {{v[0-9]+}}.2s - %val = call float @llvm.aarch64.neon.vpfmaxnm.f32.v2f32(<2 x float> %a) - ret float %val -} - -declare double @llvm.aarch64.neon.vpfmaxnm.f64.v2f64(<2 x double>) - -define double @test_fmaxnmp_f64(<2 x double> %a) { -; CHECK: test_fmaxnmp_f64: -; CHECK: fmaxnmp {{d[0-9]+}}, {{v[0-9]+}}.2d - %val = call double @llvm.aarch64.neon.vpfmaxnm.f64.v2f64(<2 x double> %a) - ret double %val -} - -declare float @llvm.aarch64.neon.vpfminnm.f32.v2f32(<2 x float>) - -define float @test_fminnmp_f32(<2 x float> %a) { -; CHECK: test_fminnmp_f32: -; CHECK: fminnmp {{s[0-9]+}}, {{v[0-9]+}}.2s - %val = call float @llvm.aarch64.neon.vpfminnm.f32.v2f32(<2 x float> %a) - ret float %val -} - -declare double @llvm.aarch64.neon.vpfminnm.f64.v2f64(<2 x double>) - -define double @test_fminnmp_f64(<2 x double> %a) { -; CHECK: test_fminnmp_f64: -; CHECK: fminnmp {{d[0-9]+}}, {{v[0-9]+}}.2d - %val = call double @llvm.aarch64.neon.vpfminnm.f64.v2f64(<2 x double> %a) - ret double %val -} - -define float @test_vaddv_f32(<2 x float> %a) { -; CHECK-LABEL: test_vaddv_f32 -; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s - %1 = call float @llvm.aarch64.neon.vpfadd.f32.v2f32(<2 x float> %a) - ret float %1 -} - -define float @test_vaddvq_f32(<4 x float> %a) { -; CHECK-LABEL: test_vaddvq_f32 -; CHECK: faddp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s - %1 = call float @llvm.aarch64.neon.vpfadd.f32.v4f32(<4 x float> %a) - ret float %1 -} - -define double @test_vaddvq_f64(<2 x double> %a) { -; CHECK-LABEL: test_vaddvq_f64 -; CHECK: faddp {{d[0-9]+}}, {{v[0-9]+}}.2d - %1 = call double @llvm.aarch64.neon.vpfadd.f64.v2f64(<2 x double> %a) - ret double %1 -} - -define float @test_vmaxv_f32(<2 x float> %a) { -; CHECK-LABEL: test_vmaxv_f32 -; CHECK: fmaxp {{s[0-9]+}}, {{v[0-9]+}}.2s - %1 = call float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float> %a) - ret float %1 -} - -define double @test_vmaxvq_f64(<2 x double> %a) { -; CHECK-LABEL: test_vmaxvq_f64 -; CHECK: fmaxp {{d[0-9]+}}, {{v[0-9]+}}.2d - %1 = call double @llvm.aarch64.neon.vpmax.f64.v2f64(<2 x double> %a) - ret double %1 -} - -define float @test_vminv_f32(<2 x float> %a) { -; CHECK-LABEL: test_vminv_f32 -; CHECK: fminp {{s[0-9]+}}, {{v[0-9]+}}.2s - %1 = call float @llvm.aarch64.neon.vpmin.f32.v2f32(<2 x float> %a) - ret float %1 -} - -define double @test_vminvq_f64(<2 x double> %a) { -; CHECK-LABEL: test_vminvq_f64 -; CHECK: fminp {{d[0-9]+}}, {{v[0-9]+}}.2d - %1 = call double @llvm.aarch64.neon.vpmin.f64.v2f64(<2 x double> %a) - ret double %1 -} - -define double @test_vmaxnmvq_f64(<2 x double> %a) { -; CHECK-LABEL: test_vmaxnmvq_f64 -; CHECK: fmaxnmp {{d[0-9]+}}, {{v[0-9]+}}.2d - %1 = call double @llvm.aarch64.neon.vpfmaxnm.f64.v2f64(<2 x double> %a) - ret double %1 -} - -define float @test_vmaxnmv_f32(<2 x float> %a) { -; CHECK-LABEL: test_vmaxnmv_f32 -; CHECK: fmaxnmp {{s[0-9]+}}, {{v[0-9]+}}.2s - %1 = call float @llvm.aarch64.neon.vpfmaxnm.f32.v2f32(<2 x float> %a) - ret float %1 -} - -define double @test_vminnmvq_f64(<2 x double> %a) { -; CHECK-LABEL: test_vminnmvq_f64 -; CHECK: fminnmp {{d[0-9]+}}, {{v[0-9]+}}.2d - %1 = call double @llvm.aarch64.neon.vpfminnm.f64.v2f64(<2 x double> %a) - ret double %1 -} - -define float @test_vminnmv_f32(<2 x float> %a) { -; CHECK-LABEL: test_vminnmv_f32 -; CHECK: fminnmp {{s[0-9]+}}, {{v[0-9]+}}.2s - %1 = call float @llvm.aarch64.neon.vpfminnm.f32.v2f32(<2 x float> %a) - ret float %1 -} - -define <2 x i64> @test_vpaddq_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: test_vpaddq_s64 -; CHECK: addp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - %1 = call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %a, <2 x i64> %b) - ret <2 x i64> %1 -} - -define <2 x i64> @test_vpaddq_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: test_vpaddq_u64 -; CHECK: addp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - %1 = call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %a, <2 x i64> %b) - ret <2 x i64> %1 -} - -define i64 @test_vaddvq_s64(<2 x i64> %a) { -; CHECK-LABEL: test_vaddvq_s64 -; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d - %1 = call <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64> %a) - %2 = extractelement <1 x i64> %1, i32 0 - ret i64 %2 -} - -define i64 @test_vaddvq_u64(<2 x i64> %a) { -; CHECK-LABEL: test_vaddvq_u64 -; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d - %1 = call <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64> %a) - %2 = extractelement <1 x i64> %1, i32 0 - ret i64 %2 -} - -declare <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64>) - -declare <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64>, <2 x i64>) - -declare float @llvm.aarch64.neon.vpfadd.f32.v4f32(<4 x float>) diff --git a/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll b/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll deleted file mode 100644 index ae097afb3a37..000000000000 --- a/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll +++ /dev/null @@ -1,39 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; Duplicates arm64'd vshift.ll - -declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_urshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_urshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_srshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_srshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -declare <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_urshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_urshl_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_srshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_srshl_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - - - diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll b/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll deleted file mode 100644 index ea5f8f9286fc..000000000000 --- a/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll +++ /dev/null @@ -1,243 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; Intrinsic wrangling and arm64 does it differently. - -declare <1 x i8> @llvm.arm.neon.vqaddu.v1i8(<1 x i8>, <1 x i8>) -declare <1 x i8> @llvm.arm.neon.vqadds.v1i8(<1 x i8>, <1 x i8>) - -define <1 x i8> @test_uqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { -; CHECK: test_uqadd_v1i8_aarch64: - %tmp1 = call <1 x i8> @llvm.arm.neon.vqaddu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: uqadd {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} - ret <1 x i8> %tmp1 -} - -define <1 x i8> @test_sqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { -; CHECK: test_sqadd_v1i8_aarch64: - %tmp1 = call <1 x i8> @llvm.arm.neon.vqadds.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: sqadd {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} - ret <1 x i8> %tmp1 -} - -declare <1 x i8> @llvm.arm.neon.vqsubu.v1i8(<1 x i8>, <1 x i8>) -declare <1 x i8> @llvm.arm.neon.vqsubs.v1i8(<1 x i8>, <1 x i8>) - -define <1 x i8> @test_uqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { -; CHECK: test_uqsub_v1i8_aarch64: - %tmp1 = call <1 x i8> @llvm.arm.neon.vqsubu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: uqsub {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} - ret <1 x i8> %tmp1 -} - -define <1 x i8> @test_sqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { -; CHECK: test_sqsub_v1i8_aarch64: - %tmp1 = call <1 x i8> @llvm.arm.neon.vqsubs.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: sqsub {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} - ret <1 x i8> %tmp1 -} - -declare <1 x i16> @llvm.arm.neon.vqaddu.v1i16(<1 x i16>, <1 x i16>) -declare <1 x i16> @llvm.arm.neon.vqadds.v1i16(<1 x i16>, <1 x i16>) - -define <1 x i16> @test_uqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { -; CHECK: test_uqadd_v1i16_aarch64: - %tmp1 = call <1 x i16> @llvm.arm.neon.vqaddu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: uqadd {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} - ret <1 x i16> %tmp1 -} - -define <1 x i16> @test_sqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { -; CHECK: test_sqadd_v1i16_aarch64: - %tmp1 = call <1 x i16> @llvm.arm.neon.vqadds.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: sqadd {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} - ret <1 x i16> %tmp1 -} - -declare <1 x i16> @llvm.arm.neon.vqsubu.v1i16(<1 x i16>, <1 x i16>) -declare <1 x i16> @llvm.arm.neon.vqsubs.v1i16(<1 x i16>, <1 x i16>) - -define <1 x i16> @test_uqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { -; CHECK: test_uqsub_v1i16_aarch64: - %tmp1 = call <1 x i16> @llvm.arm.neon.vqsubu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: uqsub {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} - ret <1 x i16> %tmp1 -} - -define <1 x i16> @test_sqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { -; CHECK: test_sqsub_v1i16_aarch64: - %tmp1 = call <1 x i16> @llvm.arm.neon.vqsubs.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: sqsub {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} - ret <1 x i16> %tmp1 -} - -declare <1 x i32> @llvm.arm.neon.vqaddu.v1i32(<1 x i32>, <1 x i32>) -declare <1 x i32> @llvm.arm.neon.vqadds.v1i32(<1 x i32>, <1 x i32>) - -define <1 x i32> @test_uqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { -; CHECK: test_uqadd_v1i32_aarch64: - %tmp1 = call <1 x i32> @llvm.arm.neon.vqaddu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: uqadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - ret <1 x i32> %tmp1 -} - -define <1 x i32> @test_sqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { -; CHECK: test_sqadd_v1i32_aarch64: - %tmp1 = call <1 x i32> @llvm.arm.neon.vqadds.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: sqadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - ret <1 x i32> %tmp1 -} - -declare <1 x i32> @llvm.arm.neon.vqsubu.v1i32(<1 x i32>, <1 x i32>) -declare <1 x i32> @llvm.arm.neon.vqsubs.v1i32(<1 x i32>, <1 x i32>) - -define <1 x i32> @test_uqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { -; CHECK: test_uqsub_v1i32_aarch64: - %tmp1 = call <1 x i32> @llvm.arm.neon.vqsubu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: uqsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - ret <1 x i32> %tmp1 -} - - -define <1 x i32> @test_sqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { -; CHECK: test_sqsub_v1i32_aarch64: - %tmp1 = call <1 x i32> @llvm.arm.neon.vqsubs.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: sqsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - ret <1 x i32> %tmp1 -} - -declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_uqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_uqadd_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: uqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sqadd_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_uqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_uqsub_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: uqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sqsub_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -define i8 @test_vuqaddb_s8(i8 %a, i8 %b) { -; CHECK: test_vuqaddb_s8 -; CHECK: suqadd {{b[0-9]+}}, {{b[0-9]+}} -entry: - %vuqadd.i = insertelement <1 x i8> undef, i8 %a, i32 0 - %vuqadd1.i = insertelement <1 x i8> undef, i8 %b, i32 0 - %vuqadd2.i = call <1 x i8> @llvm.aarch64.neon.vuqadd.v1i8(<1 x i8> %vuqadd.i, <1 x i8> %vuqadd1.i) - %0 = extractelement <1 x i8> %vuqadd2.i, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8>, <1 x i8>) - -define i16 @test_vuqaddh_s16(i16 %a, i16 %b) { -; CHECK: test_vuqaddh_s16 -; CHECK: suqadd {{h[0-9]+}}, {{h[0-9]+}} -entry: - %vuqadd.i = insertelement <1 x i16> undef, i16 %a, i32 0 - %vuqadd1.i = insertelement <1 x i16> undef, i16 %b, i32 0 - %vuqadd2.i = call <1 x i16> @llvm.aarch64.neon.vuqadd.v1i16(<1 x i16> %vuqadd.i, <1 x i16> %vuqadd1.i) - %0 = extractelement <1 x i16> %vuqadd2.i, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16>, <1 x i16>) - -define i32 @test_vuqadds_s32(i32 %a, i32 %b) { -; CHECK: test_vuqadds_s32 -; CHECK: suqadd {{s[0-9]+}}, {{s[0-9]+}} -entry: - %vuqadd.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %vuqadd1.i = insertelement <1 x i32> undef, i32 %b, i32 0 - %vuqadd2.i = call <1 x i32> @llvm.aarch64.neon.vuqadd.v1i32(<1 x i32> %vuqadd.i, <1 x i32> %vuqadd1.i) - %0 = extractelement <1 x i32> %vuqadd2.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vsqadd.v1i32(<1 x i32>, <1 x i32>) - -define i64 @test_vuqaddd_s64(i64 %a, i64 %b) { -; CHECK: test_vuqaddd_s64 -; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}} -entry: - %vuqadd.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vuqadd1.i = insertelement <1 x i64> undef, i64 %b, i32 0 - %vuqadd2.i = call <1 x i64> @llvm.aarch64.neon.vuqadd.v1i64(<1 x i64> %vuqadd.i, <1 x i64> %vuqadd1.i) - %0 = extractelement <1 x i64> %vuqadd2.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vsqadd.v1i64(<1 x i64>, <1 x i64>) - -define i8 @test_vsqaddb_u8(i8 %a, i8 %b) { -; CHECK: test_vsqaddb_u8 -; CHECK: usqadd {{b[0-9]+}}, {{b[0-9]+}} -entry: - %vsqadd.i = insertelement <1 x i8> undef, i8 %a, i32 0 - %vsqadd1.i = insertelement <1 x i8> undef, i8 %b, i32 0 - %vsqadd2.i = call <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8> %vsqadd.i, <1 x i8> %vsqadd1.i) - %0 = extractelement <1 x i8> %vsqadd2.i, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.aarch64.neon.vuqadd.v1i8(<1 x i8>, <1 x i8>) - -define i16 @test_vsqaddh_u16(i16 %a, i16 %b) { -; CHECK: test_vsqaddh_u16 -; CHECK: usqadd {{h[0-9]+}}, {{h[0-9]+}} -entry: - %vsqadd.i = insertelement <1 x i16> undef, i16 %a, i32 0 - %vsqadd1.i = insertelement <1 x i16> undef, i16 %b, i32 0 - %vsqadd2.i = call <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16> %vsqadd.i, <1 x i16> %vsqadd1.i) - %0 = extractelement <1 x i16> %vsqadd2.i, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.aarch64.neon.vuqadd.v1i16(<1 x i16>, <1 x i16>) - -define i32 @test_vsqadds_u32(i32 %a, i32 %b) { -; CHECK: test_vsqadds_u32 -; CHECK: usqadd {{s[0-9]+}}, {{s[0-9]+}} -entry: - %vsqadd.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %vsqadd1.i = insertelement <1 x i32> undef, i32 %b, i32 0 - %vsqadd2.i = call <1 x i32> @llvm.aarch64.neon.vsqadd.v1i32(<1 x i32> %vsqadd.i, <1 x i32> %vsqadd1.i) - %0 = extractelement <1 x i32> %vsqadd2.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vuqadd.v1i32(<1 x i32>, <1 x i32>) - -define i64 @test_vsqaddd_u64(i64 %a, i64 %b) { -; CHECK: test_vsqaddd_u64 -; CHECK: usqadd {{d[0-9]+}}, {{d[0-9]+}} -entry: - %vsqadd.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsqadd1.i = insertelement <1 x i64> undef, i64 %b, i32 0 - %vsqadd2.i = call <1 x i64> @llvm.aarch64.neon.vsqadd.v1i64(<1 x i64> %vsqadd.i, <1 x i64> %vsqadd1.i) - %0 = extractelement <1 x i64> %vsqadd2.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vuqadd.v1i64(<1 x i64>, <1 x i64>) diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll b/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll deleted file mode 100644 index e78c55bfe166..000000000000 --- a/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll +++ /dev/null @@ -1,95 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; Intrinsic wrangling and arm64 does it differently. - -declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_uqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_uqrshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sqrshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -declare <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8>, <1 x i8>) -declare <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8>, <1 x i8>) - -define <1 x i8> @test_uqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { -; CHECK: test_uqrshl_v1i8_aarch64: - %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: uqrshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} - - ret <1 x i8> %tmp1 -} - -define <1 x i8> @test_sqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { -; CHECK: test_sqrshl_v1i8_aarch64: - %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: sqrshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} - ret <1 x i8> %tmp1 -} - -declare <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16>, <1 x i16>) -declare <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16>, <1 x i16>) - -define <1 x i16> @test_uqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { -; CHECK: test_uqrshl_v1i16_aarch64: - %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: uqrshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} - - ret <1 x i16> %tmp1 -} - -define <1 x i16> @test_sqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { -; CHECK: test_sqrshl_v1i16_aarch64: - %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: sqrshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} - ret <1 x i16> %tmp1 -} - -declare <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32>, <1 x i32>) -declare <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32>, <1 x i32>) - -define <1 x i32> @test_uqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { -; CHECK: test_uqrshl_v1i32_aarch64: - %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: uqrshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - - ret <1 x i32> %tmp1 -} - -define <1 x i32> @test_sqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { -; CHECK: test_sqrshl_v1i32_aarch64: - %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: sqrshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - ret <1 x i32> %tmp1 -} - -declare <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_uqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_uqrshl_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sqrshl_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - - - diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll b/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll deleted file mode 100644 index b7f956cf612a..000000000000 --- a/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll +++ /dev/null @@ -1,89 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; Intrinsic wrangling and arm64 does it differently. - -declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_uqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_uqshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sqshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -declare <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8>, <1 x i8>) -declare <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8>, <1 x i8>) - -define <1 x i8> @test_uqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { -; CHECK: test_uqshl_v1i8_aarch64: - %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: uqshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} - ret <1 x i8> %tmp1 -} - -define <1 x i8> @test_sqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { -; CHECK: test_sqshl_v1i8_aarch64: - %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: sqshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} - ret <1 x i8> %tmp1 -} - -declare <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16>, <1 x i16>) -declare <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16>, <1 x i16>) - -define <1 x i16> @test_uqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { -; CHECK: test_uqshl_v1i16_aarch64: - %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: uqshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} - ret <1 x i16> %tmp1 -} - -define <1 x i16> @test_sqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { -; CHECK: test_sqshl_v1i16_aarch64: - %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: sqshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} - ret <1 x i16> %tmp1 -} - -declare <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32>, <1 x i32>) -declare <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32>, <1 x i32>) - -define <1 x i32> @test_uqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { -; CHECK: test_uqshl_v1i32_aarch64: - %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: uqshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - ret <1 x i32> %tmp1 -} - -define <1 x i32> @test_sqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { -; CHECK: test_sqshl_v1i32_aarch64: - %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: sqshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - ret <1 x i32> %tmp1 -} - -declare <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_uqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_uqshl_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sqshl_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - - diff --git a/test/CodeGen/AArch64/neon-scalar-shift-imm.ll b/test/CodeGen/AArch64/neon-scalar-shift-imm.ll deleted file mode 100644 index a2bdae5f52ce..000000000000 --- a/test/CodeGen/AArch64/neon-scalar-shift-imm.ll +++ /dev/null @@ -1,532 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; Intrinsic wrangling & arm64 does it differently. - -define i64 @test_vshrd_n_s64(i64 %a) { -; CHECK: test_vshrd_n_s64 -; CHECK: sshr {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vsshr = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsshr1 = call <1 x i64> @llvm.aarch64.neon.vshrds.n(<1 x i64> %vsshr, i32 63) - %0 = extractelement <1 x i64> %vsshr1, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vshrds.n(<1 x i64>, i32) - -define i64 @test_vshrd_n_u64(i64 %a) { -; CHECK: test_vshrd_n_u64 -; CHECK: ushr {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vushr = insertelement <1 x i64> undef, i64 %a, i32 0 - %vushr1 = call <1 x i64> @llvm.aarch64.neon.vshrdu.n(<1 x i64> %vushr, i32 63) - %0 = extractelement <1 x i64> %vushr1, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vshrdu.n(<1 x i64>, i32) - -define i64 @test_vrshrd_n_s64(i64 %a) { -; CHECK: test_vrshrd_n_s64 -; CHECK: srshr {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vsrshr = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsrshr1 = call <1 x i64> @llvm.aarch64.neon.vsrshr.v1i64(<1 x i64> %vsrshr, i32 63) - %0 = extractelement <1 x i64> %vsrshr1, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vsrshr.v1i64(<1 x i64>, i32) - -define i64 @test_vrshrd_n_u64(i64 %a) { -; CHECK: test_vrshrd_n_u64 -; CHECK: urshr {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vurshr = insertelement <1 x i64> undef, i64 %a, i32 0 - %vurshr1 = call <1 x i64> @llvm.aarch64.neon.vurshr.v1i64(<1 x i64> %vurshr, i32 63) - %0 = extractelement <1 x i64> %vurshr1, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vurshr.v1i64(<1 x i64>, i32) - -define i64 @test_vsrad_n_s64(i64 %a, i64 %b) { -; CHECK: test_vsrad_n_s64 -; CHECK: ssra {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vssra = insertelement <1 x i64> undef, i64 %a, i32 0 - %vssra1 = insertelement <1 x i64> undef, i64 %b, i32 0 - %vssra2 = call <1 x i64> @llvm.aarch64.neon.vsrads.n(<1 x i64> %vssra, <1 x i64> %vssra1, i32 63) - %0 = extractelement <1 x i64> %vssra2, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vsrads.n(<1 x i64>, <1 x i64>, i32) - -define i64 @test_vsrad_n_u64(i64 %a, i64 %b) { -; CHECK: test_vsrad_n_u64 -; CHECK: usra {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vusra = insertelement <1 x i64> undef, i64 %a, i32 0 - %vusra1 = insertelement <1 x i64> undef, i64 %b, i32 0 - %vusra2 = call <1 x i64> @llvm.aarch64.neon.vsradu.n(<1 x i64> %vusra, <1 x i64> %vusra1, i32 63) - %0 = extractelement <1 x i64> %vusra2, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vsradu.n(<1 x i64>, <1 x i64>, i32) - -define i64 @test_vrsrad_n_s64(i64 %a, i64 %b) { -; CHECK: test_vrsrad_n_s64 -; CHECK: srsra {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vsrsra = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsrsra1 = insertelement <1 x i64> undef, i64 %b, i32 0 - %vsrsra2 = call <1 x i64> @llvm.aarch64.neon.vrsrads.n(<1 x i64> %vsrsra, <1 x i64> %vsrsra1, i32 63) - %0 = extractelement <1 x i64> %vsrsra2, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vrsrads.n(<1 x i64>, <1 x i64>, i32) - -define i64 @test_vrsrad_n_u64(i64 %a, i64 %b) { -; CHECK: test_vrsrad_n_u64 -; CHECK: ursra {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vursra = insertelement <1 x i64> undef, i64 %a, i32 0 - %vursra1 = insertelement <1 x i64> undef, i64 %b, i32 0 - %vursra2 = call <1 x i64> @llvm.aarch64.neon.vrsradu.n(<1 x i64> %vursra, <1 x i64> %vursra1, i32 63) - %0 = extractelement <1 x i64> %vursra2, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vrsradu.n(<1 x i64>, <1 x i64>, i32) - -define i64 @test_vshld_n_s64(i64 %a) { -; CHECK: test_vshld_n_s64 -; CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vshl = insertelement <1 x i64> undef, i64 %a, i32 0 - %vshl1 = call <1 x i64> @llvm.aarch64.neon.vshld.n(<1 x i64> %vshl, i32 63) - %0 = extractelement <1 x i64> %vshl1, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vshld.n(<1 x i64>, i32) - -define i64 @test_vshld_n_u64(i64 %a) { -; CHECK: test_vshld_n_u64 -; CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vshl = insertelement <1 x i64> undef, i64 %a, i32 0 - %vshl1 = call <1 x i64> @llvm.aarch64.neon.vshld.n(<1 x i64> %vshl, i32 63) - %0 = extractelement <1 x i64> %vshl1, i32 0 - ret i64 %0 -} - -define i8 @test_vqshlb_n_s8(i8 %a) { -; CHECK: test_vqshlb_n_s8 -; CHECK: sqshl {{b[0-9]+}}, {{b[0-9]+}}, #7 -entry: - %vsqshl = insertelement <1 x i8> undef, i8 %a, i32 0 - %vsqshl1 = call <1 x i8> @llvm.aarch64.neon.vqshls.n.v1i8(<1 x i8> %vsqshl, i32 7) - %0 = extractelement <1 x i8> %vsqshl1, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.aarch64.neon.vqshls.n.v1i8(<1 x i8>, i32) - -define i16 @test_vqshlh_n_s16(i16 %a) { -; CHECK: test_vqshlh_n_s16 -; CHECK: sqshl {{h[0-9]+}}, {{h[0-9]+}}, #15 -entry: - %vsqshl = insertelement <1 x i16> undef, i16 %a, i32 0 - %vsqshl1 = call <1 x i16> @llvm.aarch64.neon.vqshls.n.v1i16(<1 x i16> %vsqshl, i32 15) - %0 = extractelement <1 x i16> %vsqshl1, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.aarch64.neon.vqshls.n.v1i16(<1 x i16>, i32) - -define i32 @test_vqshls_n_s32(i32 %a) { -; CHECK: test_vqshls_n_s32 -; CHECK: sqshl {{s[0-9]+}}, {{s[0-9]+}}, #31 -entry: - %vsqshl = insertelement <1 x i32> undef, i32 %a, i32 0 - %vsqshl1 = call <1 x i32> @llvm.aarch64.neon.vqshls.n.v1i32(<1 x i32> %vsqshl, i32 31) - %0 = extractelement <1 x i32> %vsqshl1, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vqshls.n.v1i32(<1 x i32>, i32) - -define i64 @test_vqshld_n_s64(i64 %a) { -; CHECK: test_vqshld_n_s64 -; CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vsqshl = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsqshl1 = call <1 x i64> @llvm.aarch64.neon.vqshls.n.v1i64(<1 x i64> %vsqshl, i32 63) - %0 = extractelement <1 x i64> %vsqshl1, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vqshls.n.v1i64(<1 x i64>, i32) - -define i8 @test_vqshlb_n_u8(i8 %a) { -; CHECK: test_vqshlb_n_u8 -; CHECK: uqshl {{b[0-9]+}}, {{b[0-9]+}}, #7 -entry: - %vuqshl = insertelement <1 x i8> undef, i8 %a, i32 0 - %vuqshl1 = call <1 x i8> @llvm.aarch64.neon.vqshlu.n.v1i8(<1 x i8> %vuqshl, i32 7) - %0 = extractelement <1 x i8> %vuqshl1, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.aarch64.neon.vqshlu.n.v1i8(<1 x i8>, i32) - -define i16 @test_vqshlh_n_u16(i16 %a) { -; CHECK: test_vqshlh_n_u16 -; CHECK: uqshl {{h[0-9]+}}, {{h[0-9]+}}, #15 -entry: - %vuqshl = insertelement <1 x i16> undef, i16 %a, i32 0 - %vuqshl1 = call <1 x i16> @llvm.aarch64.neon.vqshlu.n.v1i16(<1 x i16> %vuqshl, i32 15) - %0 = extractelement <1 x i16> %vuqshl1, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.aarch64.neon.vqshlu.n.v1i16(<1 x i16>, i32) - -define i32 @test_vqshls_n_u32(i32 %a) { -; CHECK: test_vqshls_n_u32 -; CHECK: uqshl {{s[0-9]+}}, {{s[0-9]+}}, #31 -entry: - %vuqshl = insertelement <1 x i32> undef, i32 %a, i32 0 - %vuqshl1 = call <1 x i32> @llvm.aarch64.neon.vqshlu.n.v1i32(<1 x i32> %vuqshl, i32 31) - %0 = extractelement <1 x i32> %vuqshl1, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vqshlu.n.v1i32(<1 x i32>, i32) - -define i64 @test_vqshld_n_u64(i64 %a) { -; CHECK: test_vqshld_n_u64 -; CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vuqshl = insertelement <1 x i64> undef, i64 %a, i32 0 - %vuqshl1 = call <1 x i64> @llvm.aarch64.neon.vqshlu.n.v1i64(<1 x i64> %vuqshl, i32 63) - %0 = extractelement <1 x i64> %vuqshl1, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vqshlu.n.v1i64(<1 x i64>, i32) - -define i8 @test_vqshlub_n_s8(i8 %a) { -; CHECK: test_vqshlub_n_s8 -; CHECK: sqshlu {{b[0-9]+}}, {{b[0-9]+}}, #7 -entry: - %vsqshlu = insertelement <1 x i8> undef, i8 %a, i32 0 - %vsqshlu1 = call <1 x i8> @llvm.aarch64.neon.vsqshlu.v1i8(<1 x i8> %vsqshlu, i32 7) - %0 = extractelement <1 x i8> %vsqshlu1, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.aarch64.neon.vsqshlu.v1i8(<1 x i8>, i32) - -define i16 @test_vqshluh_n_s16(i16 %a) { -; CHECK: test_vqshluh_n_s16 -; CHECK: sqshlu {{h[0-9]+}}, {{h[0-9]+}}, #15 -entry: - %vsqshlu = insertelement <1 x i16> undef, i16 %a, i32 0 - %vsqshlu1 = call <1 x i16> @llvm.aarch64.neon.vsqshlu.v1i16(<1 x i16> %vsqshlu, i32 15) - %0 = extractelement <1 x i16> %vsqshlu1, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.aarch64.neon.vsqshlu.v1i16(<1 x i16>, i32) - -define i32 @test_vqshlus_n_s32(i32 %a) { -; CHECK: test_vqshlus_n_s32 -; CHECK: sqshlu {{s[0-9]+}}, {{s[0-9]+}}, #31 -entry: - %vsqshlu = insertelement <1 x i32> undef, i32 %a, i32 0 - %vsqshlu1 = call <1 x i32> @llvm.aarch64.neon.vsqshlu.v1i32(<1 x i32> %vsqshlu, i32 31) - %0 = extractelement <1 x i32> %vsqshlu1, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vsqshlu.v1i32(<1 x i32>, i32) - -define i64 @test_vqshlud_n_s64(i64 %a) { -; CHECK: test_vqshlud_n_s64 -; CHECK: sqshlu {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vsqshlu = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsqshlu1 = call <1 x i64> @llvm.aarch64.neon.vsqshlu.v1i64(<1 x i64> %vsqshlu, i32 63) - %0 = extractelement <1 x i64> %vsqshlu1, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vsqshlu.v1i64(<1 x i64>, i32) - -define i64 @test_vsrid_n_s64(i64 %a, i64 %b) { -; CHECK: test_vsrid_n_s64 -; CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vsri = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsri1 = insertelement <1 x i64> undef, i64 %b, i32 0 - %vsri2 = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> %vsri, <1 x i64> %vsri1, i32 63) - %0 = extractelement <1 x i64> %vsri2, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64>, <1 x i64>, i32) - -define i64 @test_vsrid_n_u64(i64 %a, i64 %b) { -; CHECK: test_vsrid_n_u64 -; CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vsri = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsri1 = insertelement <1 x i64> undef, i64 %b, i32 0 - %vsri2 = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> %vsri, <1 x i64> %vsri1, i32 63) - %0 = extractelement <1 x i64> %vsri2, i32 0 - ret i64 %0 -} - -define i64 @test_vslid_n_s64(i64 %a, i64 %b) { -; CHECK: test_vslid_n_s64 -; CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vsli = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsli1 = insertelement <1 x i64> undef, i64 %b, i32 0 - %vsli2 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %vsli, <1 x i64> %vsli1, i32 63) - %0 = extractelement <1 x i64> %vsli2, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) - -define i64 @test_vslid_n_u64(i64 %a, i64 %b) { -; CHECK: test_vslid_n_u64 -; CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vsli = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsli1 = insertelement <1 x i64> undef, i64 %b, i32 0 - %vsli2 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %vsli, <1 x i64> %vsli1, i32 63) - %0 = extractelement <1 x i64> %vsli2, i32 0 - ret i64 %0 -} - -define i8 @test_vqshrnh_n_s16(i16 %a) { -; CHECK: test_vqshrnh_n_s16 -; CHECK: sqshrn {{b[0-9]+}}, {{h[0-9]+}}, #8 -entry: - %vsqshrn = insertelement <1 x i16> undef, i16 %a, i32 0 - %vsqshrn1 = call <1 x i8> @llvm.aarch64.neon.vsqshrn.v1i8(<1 x i16> %vsqshrn, i32 8) - %0 = extractelement <1 x i8> %vsqshrn1, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.aarch64.neon.vsqshrn.v1i8(<1 x i16>, i32) - -define i16 @test_vqshrns_n_s32(i32 %a) { -; CHECK: test_vqshrns_n_s32 -; CHECK: sqshrn {{h[0-9]+}}, {{s[0-9]+}}, #16 -entry: - %vsqshrn = insertelement <1 x i32> undef, i32 %a, i32 0 - %vsqshrn1 = call <1 x i16> @llvm.aarch64.neon.vsqshrn.v1i16(<1 x i32> %vsqshrn, i32 16) - %0 = extractelement <1 x i16> %vsqshrn1, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.aarch64.neon.vsqshrn.v1i16(<1 x i32>, i32) - -define i32 @test_vqshrnd_n_s64(i64 %a) { -; CHECK: test_vqshrnd_n_s64 -; CHECK: sqshrn {{s[0-9]+}}, {{d[0-9]+}}, #32 -entry: - %vsqshrn = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsqshrn1 = call <1 x i32> @llvm.aarch64.neon.vsqshrn.v1i32(<1 x i64> %vsqshrn, i32 32) - %0 = extractelement <1 x i32> %vsqshrn1, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vsqshrn.v1i32(<1 x i64>, i32) - -define i8 @test_vqshrnh_n_u16(i16 %a) { -; CHECK: test_vqshrnh_n_u16 -; CHECK: uqshrn {{b[0-9]+}}, {{h[0-9]+}}, #8 -entry: - %vuqshrn = insertelement <1 x i16> undef, i16 %a, i32 0 - %vuqshrn1 = call <1 x i8> @llvm.aarch64.neon.vuqshrn.v1i8(<1 x i16> %vuqshrn, i32 8) - %0 = extractelement <1 x i8> %vuqshrn1, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.aarch64.neon.vuqshrn.v1i8(<1 x i16>, i32) - -define i16 @test_vqshrns_n_u32(i32 %a) { -; CHECK: test_vqshrns_n_u32 -; CHECK: uqshrn {{h[0-9]+}}, {{s[0-9]+}}, #16 -entry: - %vuqshrn = insertelement <1 x i32> undef, i32 %a, i32 0 - %vuqshrn1 = call <1 x i16> @llvm.aarch64.neon.vuqshrn.v1i16(<1 x i32> %vuqshrn, i32 16) - %0 = extractelement <1 x i16> %vuqshrn1, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.aarch64.neon.vuqshrn.v1i16(<1 x i32>, i32) - -define i32 @test_vqshrnd_n_u64(i64 %a) { -; CHECK: test_vqshrnd_n_u64 -; CHECK: uqshrn {{s[0-9]+}}, {{d[0-9]+}}, #32 -entry: - %vuqshrn = insertelement <1 x i64> undef, i64 %a, i32 0 - %vuqshrn1 = call <1 x i32> @llvm.aarch64.neon.vuqshrn.v1i32(<1 x i64> %vuqshrn, i32 32) - %0 = extractelement <1 x i32> %vuqshrn1, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vuqshrn.v1i32(<1 x i64>, i32) - -define i8 @test_vqrshrnh_n_s16(i16 %a) { -; CHECK: test_vqrshrnh_n_s16 -; CHECK: sqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #8 -entry: - %vsqrshrn = insertelement <1 x i16> undef, i16 %a, i32 0 - %vsqrshrn1 = call <1 x i8> @llvm.aarch64.neon.vsqrshrn.v1i8(<1 x i16> %vsqrshrn, i32 8) - %0 = extractelement <1 x i8> %vsqrshrn1, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.aarch64.neon.vsqrshrn.v1i8(<1 x i16>, i32) - -define i16 @test_vqrshrns_n_s32(i32 %a) { -; CHECK: test_vqrshrns_n_s32 -; CHECK: sqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #16 -entry: - %vsqrshrn = insertelement <1 x i32> undef, i32 %a, i32 0 - %vsqrshrn1 = call <1 x i16> @llvm.aarch64.neon.vsqrshrn.v1i16(<1 x i32> %vsqrshrn, i32 16) - %0 = extractelement <1 x i16> %vsqrshrn1, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.aarch64.neon.vsqrshrn.v1i16(<1 x i32>, i32) - -define i32 @test_vqrshrnd_n_s64(i64 %a) { -; CHECK: test_vqrshrnd_n_s64 -; CHECK: sqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #32 -entry: - %vsqrshrn = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsqrshrn1 = call <1 x i32> @llvm.aarch64.neon.vsqrshrn.v1i32(<1 x i64> %vsqrshrn, i32 32) - %0 = extractelement <1 x i32> %vsqrshrn1, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vsqrshrn.v1i32(<1 x i64>, i32) - -define i8 @test_vqrshrnh_n_u16(i16 %a) { -; CHECK: test_vqrshrnh_n_u16 -; CHECK: uqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #8 -entry: - %vuqrshrn = insertelement <1 x i16> undef, i16 %a, i32 0 - %vuqrshrn1 = call <1 x i8> @llvm.aarch64.neon.vuqrshrn.v1i8(<1 x i16> %vuqrshrn, i32 8) - %0 = extractelement <1 x i8> %vuqrshrn1, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.aarch64.neon.vuqrshrn.v1i8(<1 x i16>, i32) - -define i16 @test_vqrshrns_n_u32(i32 %a) { -; CHECK: test_vqrshrns_n_u32 -; CHECK: uqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #16 -entry: - %vuqrshrn = insertelement <1 x i32> undef, i32 %a, i32 0 - %vuqrshrn1 = call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %vuqrshrn, i32 16) - %0 = extractelement <1 x i16> %vuqrshrn1, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32>, i32) - -define i32 @test_vqrshrnd_n_u64(i64 %a) { -; CHECK: test_vqrshrnd_n_u64 -; CHECK: uqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #32 -entry: - %vuqrshrn = insertelement <1 x i64> undef, i64 %a, i32 0 - %vuqrshrn1 = call <1 x i32> @llvm.aarch64.neon.vuqrshrn.v1i32(<1 x i64> %vuqrshrn, i32 32) - %0 = extractelement <1 x i32> %vuqrshrn1, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vuqrshrn.v1i32(<1 x i64>, i32) - -define i8 @test_vqshrunh_n_s16(i16 %a) { -; CHECK: test_vqshrunh_n_s16 -; CHECK: sqshrun {{b[0-9]+}}, {{h[0-9]+}}, #8 -entry: - %vsqshrun = insertelement <1 x i16> undef, i16 %a, i32 0 - %vsqshrun1 = call <1 x i8> @llvm.aarch64.neon.vsqshrun.v1i8(<1 x i16> %vsqshrun, i32 8) - %0 = extractelement <1 x i8> %vsqshrun1, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.aarch64.neon.vsqshrun.v1i8(<1 x i16>, i32) - -define i16 @test_vqshruns_n_s32(i32 %a) { -; CHECK: test_vqshruns_n_s32 -; CHECK: sqshrun {{h[0-9]+}}, {{s[0-9]+}}, #16 -entry: - %vsqshrun = insertelement <1 x i32> undef, i32 %a, i32 0 - %vsqshrun1 = call <1 x i16> @llvm.aarch64.neon.vsqshrun.v1i16(<1 x i32> %vsqshrun, i32 16) - %0 = extractelement <1 x i16> %vsqshrun1, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.aarch64.neon.vsqshrun.v1i16(<1 x i32>, i32) - -define i32 @test_vqshrund_n_s64(i64 %a) { -; CHECK: test_vqshrund_n_s64 -; CHECK: sqshrun {{s[0-9]+}}, {{d[0-9]+}}, #32 -entry: - %vsqshrun = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsqshrun1 = call <1 x i32> @llvm.aarch64.neon.vsqshrun.v1i32(<1 x i64> %vsqshrun, i32 32) - %0 = extractelement <1 x i32> %vsqshrun1, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vsqshrun.v1i32(<1 x i64>, i32) - -define i8 @test_vqrshrunh_n_s16(i16 %a) { -; CHECK: test_vqrshrunh_n_s16 -; CHECK: sqrshrun {{b[0-9]+}}, {{h[0-9]+}}, #8 -entry: - %vsqrshrun = insertelement <1 x i16> undef, i16 %a, i32 0 - %vsqrshrun1 = call <1 x i8> @llvm.aarch64.neon.vsqrshrun.v1i8(<1 x i16> %vsqrshrun, i32 8) - %0 = extractelement <1 x i8> %vsqrshrun1, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.aarch64.neon.vsqrshrun.v1i8(<1 x i16>, i32) - -define i16 @test_vqrshruns_n_s32(i32 %a) { -; CHECK: test_vqrshruns_n_s32 -; CHECK: sqrshrun {{h[0-9]+}}, {{s[0-9]+}}, #16 -entry: - %vsqrshrun = insertelement <1 x i32> undef, i32 %a, i32 0 - %vsqrshrun1 = call <1 x i16> @llvm.aarch64.neon.vsqrshrun.v1i16(<1 x i32> %vsqrshrun, i32 16) - %0 = extractelement <1 x i16> %vsqrshrun1, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.aarch64.neon.vsqrshrun.v1i16(<1 x i32>, i32) - -define i32 @test_vqrshrund_n_s64(i64 %a) { -; CHECK: test_vqrshrund_n_s64 -; CHECK: sqrshrun {{s[0-9]+}}, {{d[0-9]+}}, #32 -entry: - %vsqrshrun = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsqrshrun1 = call <1 x i32> @llvm.aarch64.neon.vsqrshrun.v1i32(<1 x i64> %vsqrshrun, i32 32) - %0 = extractelement <1 x i32> %vsqrshrun1, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vsqrshrun.v1i32(<1 x i64>, i32) diff --git a/test/CodeGen/AArch64/neon-scalar-shift.ll b/test/CodeGen/AArch64/neon-scalar-shift.ll deleted file mode 100644 index cf3fc0c486a5..000000000000 --- a/test/CodeGen/AArch64/neon-scalar-shift.ll +++ /dev/null @@ -1,237 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; Duplicates existing arm64 tests in vshift.ll and vcmp.ll - -declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_ushl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_ushl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: ushl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: sshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -declare <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_ushl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_ushl_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: ushl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sshl_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: sshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_vtst_s64(<1 x i64> %a, <1 x i64> %b) { -; CHECK-LABEL: test_vtst_s64 -; CHECK: cmtst {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} -entry: - %0 = and <1 x i64> %a, %b - %1 = icmp ne <1 x i64> %0, zeroinitializer - %vtst.i = sext <1 x i1> %1 to <1 x i64> - ret <1 x i64> %vtst.i -} - -define <1 x i64> @test_vtst_u64(<1 x i64> %a, <1 x i64> %b) { -; CHECK-LABEL: test_vtst_u64 -; CHECK: cmtst {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} -entry: - %0 = and <1 x i64> %a, %b - %1 = icmp ne <1 x i64> %0, zeroinitializer - %vtst.i = sext <1 x i1> %1 to <1 x i64> - ret <1 x i64> %vtst.i -} - -define <1 x i64> @test_vsli_n_p64(<1 x i64> %a, <1 x i64> %b) { -; CHECK-LABEL: test_vsli_n_p64 -; CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #0 -entry: - %vsli_n2 = tail call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %a, <1 x i64> %b, i32 0) - ret <1 x i64> %vsli_n2 -} - -declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) - -define <2 x i64> @test_vsliq_n_p64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: test_vsliq_n_p64 -; CHECK: sli {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0 -entry: - %vsli_n2 = tail call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %a, <2 x i64> %b, i32 0) - ret <2 x i64> %vsli_n2 -} - -declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) - -define <2 x i32> @test_vrsqrte_u32(<2 x i32> %a) { -; CHECK-LABEL: test_vrsqrte_u32 -; CHECK: ursqrte {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vrsqrte1.i = tail call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %a) - ret <2 x i32> %vrsqrte1.i -} - -define <4 x i32> @test_vrsqrteq_u32(<4 x i32> %a) { -; CHECK-LABEL: test_vrsqrteq_u32 -; CHECK: ursqrte {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vrsqrte1.i = tail call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %a) - ret <4 x i32> %vrsqrte1.i -} - -define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) { -; CHECK-LABEL: test_vqshl_n_s8 -; CHECK: sqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0 -entry: - %vqshl_n = tail call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer) - ret <8 x i8> %vqshl_n -} - -declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>) - -define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) { -; CHECK-LABEL: test_vqshlq_n_s8 -; CHECK: sqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0 -entry: - %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer) - ret <16 x i8> %vqshl_n -} - -declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>) - -define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) { -; CHECK-LABEL: test_vqshl_n_s16 -; CHECK: sqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0 -entry: - %vqshl_n1 = tail call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> zeroinitializer) - ret <4 x i16> %vqshl_n1 -} - -declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>) - -define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) { -; CHECK-LABEL: test_vqshlq_n_s16 -; CHECK: sqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0 -entry: - %vqshl_n1 = tail call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> zeroinitializer) - ret <8 x i16> %vqshl_n1 -} - -declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>) - -define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) { -; CHECK-LABEL: test_vqshl_n_s32 -; CHECK: sqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0 -entry: - %vqshl_n1 = tail call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> zeroinitializer) - ret <2 x i32> %vqshl_n1 -} - -declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>) - -define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) { -; CHECK-LABEL: test_vqshlq_n_s32 -; CHECK: sqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0 -entry: - %vqshl_n1 = tail call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> zeroinitializer) - ret <4 x i32> %vqshl_n1 -} - -declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>) - -define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) { -; CHECK-LABEL: test_vqshlq_n_s64 -; CHECK: sqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0 -entry: - %vqshl_n1 = tail call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> zeroinitializer) - ret <2 x i64> %vqshl_n1 -} - -declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>) - -define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) { -; CHECK-LABEL: test_vqshl_n_u8 -; CHECK: uqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0 -entry: - %vqshl_n = tail call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer) - ret <8 x i8> %vqshl_n -} - -declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>) - -define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) { -; CHECK-LABEL: test_vqshlq_n_u8 -; CHECK: uqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0 -entry: - %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer) - ret <16 x i8> %vqshl_n -} - -declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>) - -define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) { -; CHECK-LABEL: test_vqshl_n_u16 -; CHECK: uqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0 -entry: - %vqshl_n1 = tail call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> zeroinitializer) - ret <4 x i16> %vqshl_n1 -} - -declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>) - -define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) { -; CHECK-LABEL: test_vqshlq_n_u16 -; CHECK: uqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0 -entry: - %vqshl_n1 = tail call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> zeroinitializer) - ret <8 x i16> %vqshl_n1 -} - -declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>) - -define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) { -; CHECK-LABEL: test_vqshl_n_u32 -; CHECK: uqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0 -entry: - %vqshl_n1 = tail call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> zeroinitializer) - ret <2 x i32> %vqshl_n1 -} - -declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>) - -define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) { -; CHECK-LABEL: test_vqshlq_n_u32 -; CHECK: uqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0 -entry: - %vqshl_n1 = tail call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> zeroinitializer) - ret <4 x i32> %vqshl_n1 -} - -declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>) - -define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) { -; CHECK-LABEL: test_vqshlq_n_u64 -; CHECK: uqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, -entry: - %vqshl_n1 = tail call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> zeroinitializer) - ret <2 x i64> %vqshl_n1 -} - -declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>) - -declare <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32>) - -declare <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32>) diff --git a/test/CodeGen/AArch64/neon-select_cc.ll b/test/CodeGen/AArch64/neon-select_cc.ll deleted file mode 100644 index 57a819671b60..000000000000 --- a/test/CodeGen/AArch64/neon-select_cc.ll +++ /dev/null @@ -1,202 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s -; arm64 has separate copy of this test due to different codegen. -define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) { -; CHECK-LABEL: test_select_cc_v8i8_i8: -; CHECK: and w0, w0, #0xff -; CHECK-NEXT: cmp w0, w1, uxtb -; CHECK-NEXT: csetm w0, eq -; CHECK-NEXT: dup v{{[0-9]+}}.8b, w0 -; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b - %cmp31 = icmp eq i8 %a, %b - %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d - ret <8x i8> %e -} - -define <8x i8> @test_select_cc_v8i8_f32(float %a, float %b, <8x i8> %c, <8x i8> %d ) { -; CHECK-LABEL: test_select_cc_v8i8_f32: -; CHECK: fcmeq v{{[0-9]+}}.4s, v0.4s, v1.4s -; CHECK-NEXT: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0] -; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v2.8b, v3.8b - %cmp31 = fcmp oeq float %a, %b - %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d - ret <8x i8> %e -} - -define <8x i8> @test_select_cc_v8i8_f64(double %a, double %b, <8x i8> %c, <8x i8> %d ) { -; CHECK-LABEL: test_select_cc_v8i8_f64: -; CHECK: fcmeq v{{[0-9]+}}.2d, v0.2d, v1.2d -; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v2.8b, v3.8b - %cmp31 = fcmp oeq double %a, %b - %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d - ret <8x i8> %e -} - -define <16x i8> @test_select_cc_v16i8_i8(i8 %a, i8 %b, <16x i8> %c, <16x i8> %d ) { -; CHECK-LABEL: test_select_cc_v16i8_i8: -; CHECK: and w0, w0, #0xff -; CHECK-NEXT: cmp w0, w1, uxtb -; CHECK-NEXT: csetm w0, eq -; CHECK-NEXT: dup v{{[0-9]+}}.16b, w0 -; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b - %cmp31 = icmp eq i8 %a, %b - %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d - ret <16x i8> %e -} - -define <16x i8> @test_select_cc_v16i8_f32(float %a, float %b, <16x i8> %c, <16x i8> %d ) { -; CHECK-LABEL: test_select_cc_v16i8_f32: -; CHECK: fcmeq v{{[0-9]+}}.4s, v0.4s, v1.4s -; CHECK-NEXT: dup v{{[0-9]+}}.4s, v{{[0-9]+}}.s[0] -; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v2.16b, v3.16b - %cmp31 = fcmp oeq float %a, %b - %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d - ret <16x i8> %e -} - -define <16x i8> @test_select_cc_v16i8_f64(double %a, double %b, <16x i8> %c, <16x i8> %d ) { -; CHECK-LABEL: test_select_cc_v16i8_f64: -; CHECK: fcmeq v{{[0-9]+}}.2d, v0.2d, v1.2d -; CHECK-NEXT: dup v{{[0-9]+}}.2d, v{{[0-9]+}}.d[0] -; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v2.16b, v3.16b - %cmp31 = fcmp oeq double %a, %b - %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d - ret <16x i8> %e -} - -define <4x i16> @test_select_cc_v4i16(i16 %a, i16 %b, <4x i16> %c, <4x i16> %d ) { -; CHECK-LABEL: test_select_cc_v4i16: -; CHECK: and w0, w0, #0xffff -; CHECK-NEXT: cmp w0, w1, uxth -; CHECK-NEXT: csetm w0, eq -; CHECK-NEXT: dup v{{[0-9]+}}.4h, w0 -; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b - %cmp31 = icmp eq i16 %a, %b - %e = select i1 %cmp31, <4x i16> %c, <4x i16> %d - ret <4x i16> %e -} - -define <8x i16> @test_select_cc_v8i16(i16 %a, i16 %b, <8x i16> %c, <8x i16> %d ) { -; CHECK-LABEL: test_select_cc_v8i16: -; CHECK: and w0, w0, #0xffff -; CHECK-NEXT: cmp w0, w1, uxth -; CHECK-NEXT: csetm w0, eq -; CHECK-NEXT: dup v{{[0-9]+}}.8h, w0 -; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b - %cmp31 = icmp eq i16 %a, %b - %e = select i1 %cmp31, <8x i16> %c, <8x i16> %d - ret <8x i16> %e -} - -define <2x i32> @test_select_cc_v2i32(i32 %a, i32 %b, <2x i32> %c, <2x i32> %d ) { -; CHECK-LABEL: test_select_cc_v2i32: -; CHECK: cmp w0, w1, uxtw -; CHECK-NEXT: csetm w0, eq -; CHECK-NEXT: dup v{{[0-9]+}}.2s, w0 -; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b - %cmp31 = icmp eq i32 %a, %b - %e = select i1 %cmp31, <2x i32> %c, <2x i32> %d - ret <2x i32> %e -} - -define <4x i32> @test_select_cc_v4i32(i32 %a, i32 %b, <4x i32> %c, <4x i32> %d ) { -; CHECK-LABEL: test_select_cc_v4i32: -; CHECK: cmp w0, w1, uxtw -; CHECK-NEXT: csetm w0, eq -; CHECK-NEXT: dup v{{[0-9]+}}.4s, w0 -; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b - %cmp31 = icmp eq i32 %a, %b - %e = select i1 %cmp31, <4x i32> %c, <4x i32> %d - ret <4x i32> %e -} - -define <1x i64> @test_select_cc_v1i64(i64 %a, i64 %b, <1x i64> %c, <1x i64> %d ) { -; CHECK-LABEL: test_select_cc_v1i64: -; CHECK: cmp x0, x1 -; CHECK-NEXT: csetm x0, eq -; CHECK-NEXT: fmov d{{[0-9]+}}, x0 -; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b - %cmp31 = icmp eq i64 %a, %b - %e = select i1 %cmp31, <1x i64> %c, <1x i64> %d - ret <1x i64> %e -} - -define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d ) { -; CHECK-LABEL: test_select_cc_v2i64: -; CHECK: cmp x0, x1 -; CHECK-NEXT: csetm x0, eq -; CHECK-NEXT: dup v{{[0-9]+}}.2d, x0 -; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b - %cmp31 = icmp eq i64 %a, %b - %e = select i1 %cmp31, <2x i64> %c, <2x i64> %d - ret <2x i64> %e -} - -define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) { -; CHECK-LABEL: test_select_cc_v1f32: -; CHECK: fcmp s0, s1 -; CHECK-NEXT: fcsel s0, s2, s3, eq - %cmp31 = fcmp oeq float %a, %b - %e = select i1 %cmp31, <1 x float> %c, <1 x float> %d - ret <1 x float> %e -} - -define <2 x float> @test_select_cc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d ) { -; CHECK-LABEL: test_select_cc_v2f32: -; CHECK: fcmeq v{{[0-9]+}}.4s, v0.4s, v1.4s -; CHECK-NEXT: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0] -; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v2.8b, v3.8b - %cmp31 = fcmp oeq float %a, %b - %e = select i1 %cmp31, <2 x float> %c, <2 x float> %d - ret <2 x float> %e -} - -define <4x float> @test_select_cc_v4f32(float %a, float %b, <4x float> %c, <4x float> %d ) { -; CHECK-LABEL: test_select_cc_v4f32: -; CHECK: fcmeq v{{[0-9]+}}.4s, v0.4s, v1.4s -; CHECK-NEXT: dup v{{[0-9]+}}.4s, v{{[0-9]+}}.s[0] -; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v2.16b, v3.16b - %cmp31 = fcmp oeq float %a, %b - %e = select i1 %cmp31, <4x float> %c, <4x float> %d - ret <4x float> %e -} - -define <4x float> @test_select_cc_v4f32_icmp(i32 %a, i32 %b, <4x float> %c, <4x float> %d ) { -; CHECK-LABEL: test_select_cc_v4f32_icmp: -; CHECK: cmp w0, w1, uxtw -; CHECK: csetm w0, eq -; CHECK-NEXT: dup v{{[0-9]+}}.4s, w0 -; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b - %cmp31 = icmp eq i32 %a, %b - %e = select i1 %cmp31, <4x float> %c, <4x float> %d - ret <4x float> %e -} - -define <1 x double> @test_select_cc_v1f64(double %a, double %b, <1 x double> %c, <1 x double> %d ) { -; CHECK-LABEL: test_select_cc_v1f64: -; CHECK: fcmeq v{{[0-9]+}}.2d, v0.2d, v1.2d -; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v2.8b, v3.8b - %cmp31 = fcmp oeq double %a, %b - %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d - ret <1 x double> %e -} - -define <1 x double> @test_select_cc_v1f64_icmp(i64 %a, i64 %b, <1 x double> %c, <1 x double> %d ) { -; CHECK-LABEL: test_select_cc_v1f64_icmp: -; CHECK: cmp x0, x1 -; CHECK-NEXT: csetm x0, eq -; CHECK-NEXT: fmov d{{[0-9]+}}, x0 -; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b - %cmp31 = icmp eq i64 %a, %b - %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d - ret <1 x double> %e -} - -define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d ) { -; CHECK-LABEL: test_select_cc_v2f64: -; CHECK: fcmeq v{{[0-9]+}}.2d, v0.2d, v1.2d -; CHECK-NEXT: dup v{{[0-9]+}}.2d, v{{[0-9]+}}.d[0] -; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v2.16b, v3.16b - %cmp31 = fcmp oeq double %a, %b - %e = select i1 %cmp31, <2 x double> %c, <2 x double> %d - ret <2 x double> %e -} diff --git a/test/CodeGen/AArch64/neon-shift-left-long.ll b/test/CodeGen/AArch64/neon-shift-left-long.ll index d16b131559bd..1d9c92c999d9 100644 --- a/test/CodeGen/AArch64/neon-shift-left-long.ll +++ b/test/CodeGen/AArch64/neon-shift-left-long.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i16> @test_sshll_v8i8(<8 x i8> %a) { diff --git a/test/CodeGen/AArch64/neon-shift.ll b/test/CodeGen/AArch64/neon-shift.ll deleted file mode 100644 index 088200d972c5..000000000000 --- a/test/CodeGen/AArch64/neon-shift.ll +++ /dev/null @@ -1,172 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 already has these tests: pure intrinsics & trivial shifts. - -declare <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_uqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_uqshl_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: ushl v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_sqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_sqshl_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: sshl v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_ushl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_ushl_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: ushl v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_sshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_sshl_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: sshl v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_ushl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_ushl_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: ushl v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_sshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_sshl_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: sshl v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -declare <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_ushl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_ushl_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: ushl v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_sshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_sshl_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: sshl v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -declare <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_ushl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_ushl_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: ushl v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_sshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_sshl_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: sshl v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_ushl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_ushl_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: ushl v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_sshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_sshl_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: sshl v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>) -declare <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64>, <2 x i64>) - -define <2 x i64> @test_ushl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_ushl_v2i64: - %tmp1 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: ushl v0.2d, v0.2d, v1.2d - ret <2 x i64> %tmp1 -} - -define <2 x i64> @test_sshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_sshl_v2i64: - %tmp1 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: sshl v0.2d, v0.2d, v1.2d - ret <2 x i64> %tmp1 -} - - -define <8 x i8> @test_shl_v8i8(<8 x i8> %a) { -; CHECK: test_shl_v8i8: -; CHECK: shl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %tmp = shl <8 x i8> %a, - ret <8 x i8> %tmp -} - -define <4 x i16> @test_shl_v4i16(<4 x i16> %a) { -; CHECK: test_shl_v4i16: -; CHECK: shl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %tmp = shl <4 x i16> %a, - ret <4 x i16> %tmp -} - -define <2 x i32> @test_shl_v2i32(<2 x i32> %a) { -; CHECK: test_shl_v2i32: -; CHECK: shl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %tmp = shl <2 x i32> %a, - ret <2 x i32> %tmp -} - -define <16 x i8> @test_shl_v16i8(<16 x i8> %a) { -; CHECK: test_shl_v16i8: -; CHECK: shl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %tmp = shl <16 x i8> %a, - ret <16 x i8> %tmp -} - -define <8 x i16> @test_shl_v8i16(<8 x i16> %a) { -; CHECK: test_shl_v8i16: -; CHECK: shl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %tmp = shl <8 x i16> %a, - ret <8 x i16> %tmp -} - -define <4 x i32> @test_shl_v4i32(<4 x i32> %a) { -; CHECK: test_shl_v4i32: -; CHECK: shl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %tmp = shl <4 x i32> %a, - ret <4 x i32> %tmp -} - -define <2 x i64> @test_shl_v2i64(<2 x i64> %a) { -; CHECK: test_shl_v2i64: -; CHECK: shl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #63 - %tmp = shl <2 x i64> %a, - ret <2 x i64> %tmp -} - diff --git a/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll b/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll deleted file mode 100644 index 628a6760c9eb..000000000000 --- a/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll +++ /dev/null @@ -1,334 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has all tests not involving v1iN. - -define <8 x i8> @shl.v8i8(<8 x i8> %a, <8 x i8> %b) { -; CHECK-LABEL: shl.v8i8: -; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %c = shl <8 x i8> %a, %b - ret <8 x i8> %c -} - -define <4 x i16> @shl.v4i16(<4 x i16> %a, <4 x i16> %b) { -; CHECK-LABEL: shl.v4i16: -; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %c = shl <4 x i16> %a, %b - ret <4 x i16> %c -} - -define <2 x i32> @shl.v2i32(<2 x i32> %a, <2 x i32> %b) { -; CHECK-LABEL: shl.v2i32: -; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %c = shl <2 x i32> %a, %b - ret <2 x i32> %c -} - -define <1 x i64> @shl.v1i64(<1 x i64> %a, <1 x i64> %b) { -; CHECK-LABEL: shl.v1i64: -; CHECK: ushl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %c = shl <1 x i64> %a, %b - ret <1 x i64> %c -} - -define <16 x i8> @shl.v16i8(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shl.v16i8: -; CHECK: ushl v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %c = shl <16 x i8> %a, %b - ret <16 x i8> %c -} - -define <8 x i16> @shl.v8i16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: shl.v8i16: -; CHECK: ushl v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %c = shl <8 x i16> %a, %b - ret <8 x i16> %c -} - -define <4 x i32> @shl.v4i32(<4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: shl.v4i32: -; CHECK: ushl v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %c = shl <4 x i32> %a, %b - ret <4 x i32> %c -} - -define <2 x i64> @shl.v2i64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: shl.v2i64: -; CHECK: ushl v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %c = shl <2 x i64> %a, %b - ret <2 x i64> %c -} - -define <8 x i8> @lshr.v8i8(<8 x i8> %a, <8 x i8> %b) { -; CHECK-LABEL: lshr.v8i8: -; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b -; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %c = lshr <8 x i8> %a, %b - ret <8 x i8> %c -} - -define <4 x i16> @lshr.v4i16(<4 x i16> %a, <4 x i16> %b) { -; CHECK-LABEL: lshr.v4i16: -; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h -; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %c = lshr <4 x i16> %a, %b - ret <4 x i16> %c -} - -define <2 x i32> @lshr.v2i32(<2 x i32> %a, <2 x i32> %b) { -; CHECK-LABEL: lshr.v2i32: -; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s -; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %c = lshr <2 x i32> %a, %b - ret <2 x i32> %c -} - -define <1 x i64> @lshr.v1i64(<1 x i64> %a, <1 x i64> %b) { -; CHECK-LABEL: lshr.v1i64: -; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}} -; CHECK: ushl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %c = lshr <1 x i64> %a, %b - ret <1 x i64> %c -} - -define <16 x i8> @lshr.v16i8(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: lshr.v16i8: -; CHECK: neg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b -; CHECK: ushl v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %c = lshr <16 x i8> %a, %b - ret <16 x i8> %c -} - -define <8 x i16> @lshr.v8i16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: lshr.v8i16: -; CHECK: neg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h -; CHECK: ushl v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %c = lshr <8 x i16> %a, %b - ret <8 x i16> %c -} - -define <4 x i32> @lshr.v4i32(<4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: lshr.v4i32: -; CHECK: neg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s -; CHECK: ushl v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %c = lshr <4 x i32> %a, %b - ret <4 x i32> %c -} - -define <2 x i64> @lshr.v2i64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: lshr.v2i64: -; CHECK: neg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d -; CHECK: ushl v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %c = lshr <2 x i64> %a, %b - ret <2 x i64> %c -} - -define <8 x i8> @ashr.v8i8(<8 x i8> %a, <8 x i8> %b) { -; CHECK-LABEL: ashr.v8i8: -; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b -; CHECK: sshl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %c = ashr <8 x i8> %a, %b - ret <8 x i8> %c -} - -define <4 x i16> @ashr.v4i16(<4 x i16> %a, <4 x i16> %b) { -; CHECK-LABEL: ashr.v4i16: -; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h -; CHECK: sshl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %c = ashr <4 x i16> %a, %b - ret <4 x i16> %c -} - -define <2 x i32> @ashr.v2i32(<2 x i32> %a, <2 x i32> %b) { -; CHECK-LABEL: ashr.v2i32: -; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s -; CHECK: sshl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %c = ashr <2 x i32> %a, %b - ret <2 x i32> %c -} - -define <1 x i64> @ashr.v1i64(<1 x i64> %a, <1 x i64> %b) { -; CHECK-LABEL: ashr.v1i64: -; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}} -; CHECK: sshl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %c = ashr <1 x i64> %a, %b - ret <1 x i64> %c -} - -define <16 x i8> @ashr.v16i8(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: ashr.v16i8: -; CHECK: neg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b -; CHECK: sshl v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %c = ashr <16 x i8> %a, %b - ret <16 x i8> %c -} - -define <8 x i16> @ashr.v8i16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: ashr.v8i16: -; CHECK: neg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h -; CHECK: sshl v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %c = ashr <8 x i16> %a, %b - ret <8 x i16> %c -} - -define <4 x i32> @ashr.v4i32(<4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: ashr.v4i32: -; CHECK: neg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s -; CHECK: sshl v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %c = ashr <4 x i32> %a, %b - ret <4 x i32> %c -} - -define <2 x i64> @ashr.v2i64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: ashr.v2i64: -; CHECK: neg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d -; CHECK: sshl v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %c = ashr <2 x i64> %a, %b - ret <2 x i64> %c -} - -define <1 x i64> @shl.v1i64.0(<1 x i64> %a) { -; CHECK-LABEL: shl.v1i64.0: -; CHECK-NOT: shl d{{[0-9]+}}, d{{[0-9]+}}, #0 - %c = shl <1 x i64> %a, zeroinitializer - ret <1 x i64> %c -} - -define <2 x i32> @shl.v2i32.0(<2 x i32> %a) { -; CHECK-LABEL: shl.v2i32.0: -; CHECK-NOT: shl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #0 - %c = shl <2 x i32> %a, zeroinitializer - ret <2 x i32> %c -} - -; The following test cases test shl/ashr/lshr with v1i8/v1i16/v1i32 types - -define <1 x i8> @shl.v1i8(<1 x i8> %a, <1 x i8> %b) { -; CHECK-LABEL: shl.v1i8: -; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %c = shl <1 x i8> %a, %b - ret <1 x i8> %c -} - -define <1 x i16> @shl.v1i16(<1 x i16> %a, <1 x i16> %b) { -; CHECK-LABEL: shl.v1i16: -; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %c = shl <1 x i16> %a, %b - ret <1 x i16> %c -} - -define <1 x i32> @shl.v1i32(<1 x i32> %a, <1 x i32> %b) { -; CHECK-LABEL: shl.v1i32: -; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %c = shl <1 x i32> %a, %b - ret <1 x i32> %c -} - -define <1 x i8> @ashr.v1i8(<1 x i8> %a, <1 x i8> %b) { -; CHECK-LABEL: ashr.v1i8: -; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b -; CHECK: sshl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %c = ashr <1 x i8> %a, %b - ret <1 x i8> %c -} - -define <1 x i16> @ashr.v1i16(<1 x i16> %a, <1 x i16> %b) { -; CHECK-LABEL: ashr.v1i16: -; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h -; CHECK: sshl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %c = ashr <1 x i16> %a, %b - ret <1 x i16> %c -} - -define <1 x i32> @ashr.v1i32(<1 x i32> %a, <1 x i32> %b) { -; CHECK-LABEL: ashr.v1i32: -; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s -; CHECK: sshl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %c = ashr <1 x i32> %a, %b - ret <1 x i32> %c -} - -define <1 x i8> @lshr.v1i8(<1 x i8> %a, <1 x i8> %b) { -; CHECK-LABEL: lshr.v1i8: -; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b -; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %c = lshr <1 x i8> %a, %b - ret <1 x i8> %c -} - -define <1 x i16> @lshr.v1i16(<1 x i16> %a, <1 x i16> %b) { -; CHECK-LABEL: lshr.v1i16: -; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h -; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %c = lshr <1 x i16> %a, %b - ret <1 x i16> %c -} - -define <1 x i32> @lshr.v1i32(<1 x i32> %a, <1 x i32> %b) { -; CHECK-LABEL: lshr.v1i32: -; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s -; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %c = lshr <1 x i32> %a, %b - ret <1 x i32> %c -} - -define <1 x i8> @shl.v1i8.imm(<1 x i8> %a) { -; CHECK-LABEL: shl.v1i8.imm: -; CHECK: shl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, #3 - %c = shl <1 x i8> %a, - ret <1 x i8> %c -} - -define <1 x i16> @shl.v1i16.imm(<1 x i16> %a) { -; CHECK-LABEL: shl.v1i16.imm: -; CHECK: shl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #5 - %c = shl <1 x i16> %a, - ret <1 x i16> %c -} - -define <1 x i32> @shl.v1i32.imm(<1 x i32> %a) { -; CHECK-LABEL: shl.v1i32.imm: -; CHECK-NOT: shl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #0 - %c = shl <1 x i32> %a, zeroinitializer - ret <1 x i32> %c -} - -define <1 x i8> @ashr.v1i8.imm(<1 x i8> %a) { -; CHECK-LABEL: ashr.v1i8.imm: -; CHECK: sshr v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, #3 - %c = ashr <1 x i8> %a, - ret <1 x i8> %c -} - -define <1 x i16> @ashr.v1i16.imm(<1 x i16> %a) { -; CHECK-LABEL: ashr.v1i16.imm: -; CHECK: sshr v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #10 - %c = ashr <1 x i16> %a, - ret <1 x i16> %c -} - -define <1 x i32> @ashr.v1i32.imm(<1 x i32> %a) { -; CHECK-LABEL: ashr.v1i32.imm: -; CHECK: sshr v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #31 - %c = ashr <1 x i32> %a, - ret <1 x i32> %c -} - -define <1 x i8> @lshr.v1i8.imm(<1 x i8> %a) { -; CHECK-LABEL: lshr.v1i8.imm: -; CHECK: ushr v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, #3 - %c = lshr <1 x i8> %a, - ret <1 x i8> %c -} - -define <1 x i16> @lshr.v1i16.imm(<1 x i16> %a) { -; CHECK-LABEL: lshr.v1i16.imm: -; CHECK: ushr v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #10 - %c = lshr <1 x i16> %a, - ret <1 x i16> %c -} - -define <1 x i32> @lshr.v1i32.imm(<1 x i32> %a) { -; CHECK-LABEL: lshr.v1i32.imm: -; CHECK: ushr v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #31 - %c = lshr <1 x i32> %a, - ret <1 x i32> %c -} diff --git a/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll b/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll deleted file mode 100644 index a3b160413f5e..000000000000 --- a/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll +++ /dev/null @@ -1,2317 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s - -; arm64 already has these. Essentially just a copy/paste from Clang output from -; arm_neon.h - -define void @test_ldst1_v16i8(<16 x i8>* %ptr, <16 x i8>* %ptr2) { -; CHECK-LABEL: test_ldst1_v16i8: -; CHECK: ld1 { v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}] -; CHECK: st1 { v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}] - %tmp = load <16 x i8>* %ptr - store <16 x i8> %tmp, <16 x i8>* %ptr2 - ret void -} - -define void @test_ldst1_v8i16(<8 x i16>* %ptr, <8 x i16>* %ptr2) { -; CHECK-LABEL: test_ldst1_v8i16: -; CHECK: ld1 { v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}] -; CHECK: st1 { v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}] - %tmp = load <8 x i16>* %ptr - store <8 x i16> %tmp, <8 x i16>* %ptr2 - ret void -} - -define void @test_ldst1_v4i32(<4 x i32>* %ptr, <4 x i32>* %ptr2) { -; CHECK-LABEL: test_ldst1_v4i32: -; CHECK: ld1 { v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}] -; CHECK: st1 { v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}] - %tmp = load <4 x i32>* %ptr - store <4 x i32> %tmp, <4 x i32>* %ptr2 - ret void -} - -define void @test_ldst1_v2i64(<2 x i64>* %ptr, <2 x i64>* %ptr2) { -; CHECK-LABEL: test_ldst1_v2i64: -; CHECK: ld1 { v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}] -; CHECK: st1 { v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}] - %tmp = load <2 x i64>* %ptr - store <2 x i64> %tmp, <2 x i64>* %ptr2 - ret void -} - -define void @test_ldst1_v8i8(<8 x i8>* %ptr, <8 x i8>* %ptr2) { -; CHECK-LABEL: test_ldst1_v8i8: -; CHECK: ld1 { v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}] -; CHECK: st1 { v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}] - %tmp = load <8 x i8>* %ptr - store <8 x i8> %tmp, <8 x i8>* %ptr2 - ret void -} - -define void @test_ldst1_v4i16(<4 x i16>* %ptr, <4 x i16>* %ptr2) { -; CHECK-LABEL: test_ldst1_v4i16: -; CHECK: ld1 { v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}] -; CHECK: st1 { v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}] - %tmp = load <4 x i16>* %ptr - store <4 x i16> %tmp, <4 x i16>* %ptr2 - ret void -} - -define void @test_ldst1_v2i32(<2 x i32>* %ptr, <2 x i32>* %ptr2) { -; CHECK-LABEL: test_ldst1_v2i32: -; CHECK: ld1 { v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}] -; CHECK: st1 { v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}] - %tmp = load <2 x i32>* %ptr - store <2 x i32> %tmp, <2 x i32>* %ptr2 - ret void -} - -define void @test_ldst1_v1i64(<1 x i64>* %ptr, <1 x i64>* %ptr2) { -; CHECK-LABEL: test_ldst1_v1i64: -; CHECK: ld1 { v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}] -; CHECK: st1 { v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}] - %tmp = load <1 x i64>* %ptr - store <1 x i64> %tmp, <1 x i64>* %ptr2 - ret void -} - -%struct.int8x16x2_t = type { [2 x <16 x i8>] } -%struct.int16x8x2_t = type { [2 x <8 x i16>] } -%struct.int32x4x2_t = type { [2 x <4 x i32>] } -%struct.int64x2x2_t = type { [2 x <2 x i64>] } -%struct.float32x4x2_t = type { [2 x <4 x float>] } -%struct.float64x2x2_t = type { [2 x <2 x double>] } -%struct.int8x8x2_t = type { [2 x <8 x i8>] } -%struct.int16x4x2_t = type { [2 x <4 x i16>] } -%struct.int32x2x2_t = type { [2 x <2 x i32>] } -%struct.int64x1x2_t = type { [2 x <1 x i64>] } -%struct.float32x2x2_t = type { [2 x <2 x float>] } -%struct.float64x1x2_t = type { [2 x <1 x double>] } -%struct.int8x16x3_t = type { [3 x <16 x i8>] } -%struct.int16x8x3_t = type { [3 x <8 x i16>] } -%struct.int32x4x3_t = type { [3 x <4 x i32>] } -%struct.int64x2x3_t = type { [3 x <2 x i64>] } -%struct.float32x4x3_t = type { [3 x <4 x float>] } -%struct.float64x2x3_t = type { [3 x <2 x double>] } -%struct.int8x8x3_t = type { [3 x <8 x i8>] } -%struct.int16x4x3_t = type { [3 x <4 x i16>] } -%struct.int32x2x3_t = type { [3 x <2 x i32>] } -%struct.int64x1x3_t = type { [3 x <1 x i64>] } -%struct.float32x2x3_t = type { [3 x <2 x float>] } -%struct.float64x1x3_t = type { [3 x <1 x double>] } -%struct.int8x16x4_t = type { [4 x <16 x i8>] } -%struct.int16x8x4_t = type { [4 x <8 x i16>] } -%struct.int32x4x4_t = type { [4 x <4 x i32>] } -%struct.int64x2x4_t = type { [4 x <2 x i64>] } -%struct.float32x4x4_t = type { [4 x <4 x float>] } -%struct.float64x2x4_t = type { [4 x <2 x double>] } -%struct.int8x8x4_t = type { [4 x <8 x i8>] } -%struct.int16x4x4_t = type { [4 x <4 x i16>] } -%struct.int32x2x4_t = type { [4 x <2 x i32>] } -%struct.int64x1x4_t = type { [4 x <1 x i64>] } -%struct.float32x2x4_t = type { [4 x <2 x float>] } -%struct.float64x1x4_t = type { [4 x <1 x double>] } - - -define <16 x i8> @test_vld1q_s8(i8* readonly %a) { -; CHECK-LABEL: test_vld1q_s8 -; CHECK: ld1 { v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}] - %vld1 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %a, i32 1) - ret <16 x i8> %vld1 -} - -define <8 x i16> @test_vld1q_s16(i16* readonly %a) { -; CHECK-LABEL: test_vld1q_s16 -; CHECK: ld1 { v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %vld1 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %1, i32 2) - ret <8 x i16> %vld1 -} - -define <4 x i32> @test_vld1q_s32(i32* readonly %a) { -; CHECK-LABEL: test_vld1q_s32 -; CHECK: ld1 { v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %vld1 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %1, i32 4) - ret <4 x i32> %vld1 -} - -define <2 x i64> @test_vld1q_s64(i64* readonly %a) { -; CHECK-LABEL: test_vld1q_s64 -; CHECK: ld1 { v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %vld1 = tail call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %1, i32 8) - ret <2 x i64> %vld1 -} - -define <4 x float> @test_vld1q_f32(float* readonly %a) { -; CHECK-LABEL: test_vld1q_f32 -; CHECK: ld1 { v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %vld1 = tail call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %1, i32 4) - ret <4 x float> %vld1 -} - -define <2 x double> @test_vld1q_f64(double* readonly %a) { -; CHECK-LABEL: test_vld1q_f64 -; CHECK: ld1 { v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %vld1 = tail call <2 x double> @llvm.arm.neon.vld1.v2f64(i8* %1, i32 8) - ret <2 x double> %vld1 -} - -define <8 x i8> @test_vld1_s8(i8* readonly %a) { -; CHECK-LABEL: test_vld1_s8 -; CHECK: ld1 { v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}] - %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %a, i32 1) - ret <8 x i8> %vld1 -} - -define <4 x i16> @test_vld1_s16(i16* readonly %a) { -; CHECK-LABEL: test_vld1_s16 -; CHECK: ld1 { v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %vld1 = tail call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %1, i32 2) - ret <4 x i16> %vld1 -} - -define <2 x i32> @test_vld1_s32(i32* readonly %a) { -; CHECK-LABEL: test_vld1_s32 -; CHECK: ld1 { v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %vld1 = tail call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %1, i32 4) - ret <2 x i32> %vld1 -} - -define <1 x i64> @test_vld1_s64(i64* readonly %a) { -; CHECK-LABEL: test_vld1_s64 -; CHECK: ld1 { v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %vld1 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %1, i32 8) - ret <1 x i64> %vld1 -} - -define <2 x float> @test_vld1_f32(float* readonly %a) { -; CHECK-LABEL: test_vld1_f32 -; CHECK: ld1 { v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %vld1 = tail call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %1, i32 4) - ret <2 x float> %vld1 -} - -define <1 x double> @test_vld1_f64(double* readonly %a) { -; CHECK-LABEL: test_vld1_f64 -; CHECK: ld1 { v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %vld1 = tail call <1 x double> @llvm.arm.neon.vld1.v1f64(i8* %1, i32 8) - ret <1 x double> %vld1 -} - -define <8 x i8> @test_vld1_p8(i8* readonly %a) { -; CHECK-LABEL: test_vld1_p8 -; CHECK: ld1 { v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}] - %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %a, i32 1) - ret <8 x i8> %vld1 -} - -define <4 x i16> @test_vld1_p16(i16* readonly %a) { -; CHECK-LABEL: test_vld1_p16 -; CHECK: ld1 { v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %vld1 = tail call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %1, i32 2) - ret <4 x i16> %vld1 -} - -define %struct.int8x16x2_t @test_vld2q_s8(i8* readonly %a) { -; CHECK-LABEL: test_vld2q_s8 -; CHECK: ld2 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}] - %vld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8* %a, i32 1) - %vld2.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 1 - %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vld2.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2.fca.1.extract, 0, 1 - ret %struct.int8x16x2_t %.fca.0.1.insert -} - -define %struct.int16x8x2_t @test_vld2q_s16(i16* readonly %a) { -; CHECK-LABEL: test_vld2q_s16 -; CHECK: ld2 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %vld2 = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16(i8* %1, i32 2) - %vld2.fca.0.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2, 1 - %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vld2.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vld2.fca.1.extract, 0, 1 - ret %struct.int16x8x2_t %.fca.0.1.insert -} - -define %struct.int32x4x2_t @test_vld2q_s32(i32* readonly %a) { -; CHECK-LABEL: test_vld2q_s32 -; CHECK: ld2 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %vld2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8* %1, i32 4) - %vld2.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2, 1 - %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vld2.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vld2.fca.1.extract, 0, 1 - ret %struct.int32x4x2_t %.fca.0.1.insert -} - -define %struct.int64x2x2_t @test_vld2q_s64(i64* readonly %a) { -; CHECK-LABEL: test_vld2q_s64 -; CHECK: ld2 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %vld2 = tail call { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2.v2i64(i8* %1, i32 8) - %vld2.fca.0.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2, 1 - %.fca.0.0.insert = insertvalue %struct.int64x2x2_t undef, <2 x i64> %vld2.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x2x2_t %.fca.0.0.insert, <2 x i64> %vld2.fca.1.extract, 0, 1 - ret %struct.int64x2x2_t %.fca.0.1.insert -} - -define %struct.float32x4x2_t @test_vld2q_f32(float* readonly %a) { -; CHECK-LABEL: test_vld2q_f32 -; CHECK: ld2 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %1, i32 4) - %vld2.fca.0.extract = extractvalue { <4 x float>, <4 x float> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <4 x float>, <4 x float> } %vld2, 1 - %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vld2.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vld2.fca.1.extract, 0, 1 - ret %struct.float32x4x2_t %.fca.0.1.insert -} - -define %struct.float64x2x2_t @test_vld2q_f64(double* readonly %a) { -; CHECK-LABEL: test_vld2q_f64 -; CHECK: ld2 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %vld2 = tail call { <2 x double>, <2 x double> } @llvm.arm.neon.vld2.v2f64(i8* %1, i32 8) - %vld2.fca.0.extract = extractvalue { <2 x double>, <2 x double> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <2 x double>, <2 x double> } %vld2, 1 - %.fca.0.0.insert = insertvalue %struct.float64x2x2_t undef, <2 x double> %vld2.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x2x2_t %.fca.0.0.insert, <2 x double> %vld2.fca.1.extract, 0, 1 - ret %struct.float64x2x2_t %.fca.0.1.insert -} - -define %struct.int8x8x2_t @test_vld2_s8(i8* readonly %a) { -; CHECK-LABEL: test_vld2_s8 -; CHECK: ld2 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}] - %vld2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8* %a, i32 1) - %vld2.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 1 - %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vld2.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vld2.fca.1.extract, 0, 1 - ret %struct.int8x8x2_t %.fca.0.1.insert -} - -define %struct.int16x4x2_t @test_vld2_s16(i16* readonly %a) { -; CHECK-LABEL: test_vld2_s16 -; CHECK: ld2 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %vld2 = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16(i8* %1, i32 2) - %vld2.fca.0.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2, 1 - %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vld2.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vld2.fca.1.extract, 0, 1 - ret %struct.int16x4x2_t %.fca.0.1.insert -} - -define %struct.int32x2x2_t @test_vld2_s32(i32* readonly %a) { -; CHECK-LABEL: test_vld2_s32 -; CHECK: ld2 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %vld2 = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8* %1, i32 4) - %vld2.fca.0.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2, 1 - %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vld2.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vld2.fca.1.extract, 0, 1 - ret %struct.int32x2x2_t %.fca.0.1.insert -} - -define %struct.int64x1x2_t @test_vld2_s64(i64* readonly %a) { -; CHECK-LABEL: test_vld2_s64 -; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %vld2 = tail call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8* %1, i32 8) - %vld2.fca.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2, 1 - %.fca.0.0.insert = insertvalue %struct.int64x1x2_t undef, <1 x i64> %vld2.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x1x2_t %.fca.0.0.insert, <1 x i64> %vld2.fca.1.extract, 0, 1 - ret %struct.int64x1x2_t %.fca.0.1.insert -} - -define %struct.float32x2x2_t @test_vld2_f32(float* readonly %a) { -; CHECK-LABEL: test_vld2_f32 -; CHECK: ld2 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %vld2 = tail call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8* %1, i32 4) - %vld2.fca.0.extract = extractvalue { <2 x float>, <2 x float> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <2 x float>, <2 x float> } %vld2, 1 - %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vld2.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vld2.fca.1.extract, 0, 1 - ret %struct.float32x2x2_t %.fca.0.1.insert -} - -define %struct.float64x1x2_t @test_vld2_f64(double* readonly %a) { -; CHECK-LABEL: test_vld2_f64 -; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %vld2 = tail call { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8* %1, i32 8) - %vld2.fca.0.extract = extractvalue { <1 x double>, <1 x double> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <1 x double>, <1 x double> } %vld2, 1 - %.fca.0.0.insert = insertvalue %struct.float64x1x2_t undef, <1 x double> %vld2.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x1x2_t %.fca.0.0.insert, <1 x double> %vld2.fca.1.extract, 0, 1 - ret %struct.float64x1x2_t %.fca.0.1.insert -} - -define %struct.int8x16x3_t @test_vld3q_s8(i8* readonly %a) { -; CHECK-LABEL: test_vld3q_s8 -; CHECK: ld3 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}] - %vld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %a, i32 1) - %vld3.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 2 - %.fca.0.0.insert = insertvalue %struct.int8x16x3_t undef, <16 x i8> %vld3.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x16x3_t %.fca.0.0.insert, <16 x i8> %vld3.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int8x16x3_t %.fca.0.1.insert, <16 x i8> %vld3.fca.2.extract, 0, 2 - ret %struct.int8x16x3_t %.fca.0.2.insert -} - -define %struct.int16x8x3_t @test_vld3q_s16(i16* readonly %a) { -; CHECK-LABEL: test_vld3q_s16 -; CHECK: ld3 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %vld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16(i8* %1, i32 2) - %vld3.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 2 - %.fca.0.0.insert = insertvalue %struct.int16x8x3_t undef, <8 x i16> %vld3.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x8x3_t %.fca.0.0.insert, <8 x i16> %vld3.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int16x8x3_t %.fca.0.1.insert, <8 x i16> %vld3.fca.2.extract, 0, 2 - ret %struct.int16x8x3_t %.fca.0.2.insert -} - -define %struct.int32x4x3_t @test_vld3q_s32(i32* readonly %a) { -; CHECK-LABEL: test_vld3q_s32 -; CHECK: ld3 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %vld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8* %1, i32 4) - %vld3.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 2 - %.fca.0.0.insert = insertvalue %struct.int32x4x3_t undef, <4 x i32> %vld3.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x4x3_t %.fca.0.0.insert, <4 x i32> %vld3.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int32x4x3_t %.fca.0.1.insert, <4 x i32> %vld3.fca.2.extract, 0, 2 - ret %struct.int32x4x3_t %.fca.0.2.insert -} - -define %struct.int64x2x3_t @test_vld3q_s64(i64* readonly %a) { -; CHECK-LABEL: test_vld3q_s64 -; CHECK: ld3 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %vld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3.v2i64(i8* %1, i32 8) - %vld3.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 2 - %.fca.0.0.insert = insertvalue %struct.int64x2x3_t undef, <2 x i64> %vld3.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x2x3_t %.fca.0.0.insert, <2 x i64> %vld3.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int64x2x3_t %.fca.0.1.insert, <2 x i64> %vld3.fca.2.extract, 0, 2 - ret %struct.int64x2x3_t %.fca.0.2.insert -} - -define %struct.float32x4x3_t @test_vld3q_f32(float* readonly %a) { -; CHECK-LABEL: test_vld3q_f32 -; CHECK: ld3 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %vld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8* %1, i32 4) - %vld3.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 2 - %.fca.0.0.insert = insertvalue %struct.float32x4x3_t undef, <4 x float> %vld3.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x4x3_t %.fca.0.0.insert, <4 x float> %vld3.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float32x4x3_t %.fca.0.1.insert, <4 x float> %vld3.fca.2.extract, 0, 2 - ret %struct.float32x4x3_t %.fca.0.2.insert -} - -define %struct.float64x2x3_t @test_vld3q_f64(double* readonly %a) { -; CHECK-LABEL: test_vld3q_f64 -; CHECK: ld3 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %vld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3.v2f64(i8* %1, i32 8) - %vld3.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 2 - %.fca.0.0.insert = insertvalue %struct.float64x2x3_t undef, <2 x double> %vld3.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x2x3_t %.fca.0.0.insert, <2 x double> %vld3.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float64x2x3_t %.fca.0.1.insert, <2 x double> %vld3.fca.2.extract, 0, 2 - ret %struct.float64x2x3_t %.fca.0.2.insert -} - -define %struct.int8x8x3_t @test_vld3_s8(i8* readonly %a) { -; CHECK-LABEL: test_vld3_s8 -; CHECK: ld3 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}] - %vld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8* %a, i32 1) - %vld3.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 2 - %.fca.0.0.insert = insertvalue %struct.int8x8x3_t undef, <8 x i8> %vld3.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x8x3_t %.fca.0.0.insert, <8 x i8> %vld3.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int8x8x3_t %.fca.0.1.insert, <8 x i8> %vld3.fca.2.extract, 0, 2 - ret %struct.int8x8x3_t %.fca.0.2.insert -} - -define %struct.int16x4x3_t @test_vld3_s16(i16* readonly %a) { -; CHECK-LABEL: test_vld3_s16 -; CHECK: ld3 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %vld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8* %1, i32 2) - %vld3.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 2 - %.fca.0.0.insert = insertvalue %struct.int16x4x3_t undef, <4 x i16> %vld3.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x4x3_t %.fca.0.0.insert, <4 x i16> %vld3.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int16x4x3_t %.fca.0.1.insert, <4 x i16> %vld3.fca.2.extract, 0, 2 - ret %struct.int16x4x3_t %.fca.0.2.insert -} - -define %struct.int32x2x3_t @test_vld3_s32(i32* readonly %a) { -; CHECK-LABEL: test_vld3_s32 -; CHECK: ld3 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %vld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32(i8* %1, i32 4) - %vld3.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 2 - %.fca.0.0.insert = insertvalue %struct.int32x2x3_t undef, <2 x i32> %vld3.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x2x3_t %.fca.0.0.insert, <2 x i32> %vld3.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int32x2x3_t %.fca.0.1.insert, <2 x i32> %vld3.fca.2.extract, 0, 2 - ret %struct.int32x2x3_t %.fca.0.2.insert -} - -define %struct.int64x1x3_t @test_vld3_s64(i64* readonly %a) { -; CHECK-LABEL: test_vld3_s64 -; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %vld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8* %1, i32 8) - %vld3.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 2 - %.fca.0.0.insert = insertvalue %struct.int64x1x3_t undef, <1 x i64> %vld3.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x1x3_t %.fca.0.0.insert, <1 x i64> %vld3.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int64x1x3_t %.fca.0.1.insert, <1 x i64> %vld3.fca.2.extract, 0, 2 - ret %struct.int64x1x3_t %.fca.0.2.insert -} - -define %struct.float32x2x3_t @test_vld3_f32(float* readonly %a) { -; CHECK-LABEL: test_vld3_f32 -; CHECK: ld3 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %vld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32(i8* %1, i32 4) - %vld3.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 2 - %.fca.0.0.insert = insertvalue %struct.float32x2x3_t undef, <2 x float> %vld3.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x2x3_t %.fca.0.0.insert, <2 x float> %vld3.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float32x2x3_t %.fca.0.1.insert, <2 x float> %vld3.fca.2.extract, 0, 2 - ret %struct.float32x2x3_t %.fca.0.2.insert -} - -define %struct.float64x1x3_t @test_vld3_f64(double* readonly %a) { -; CHECK-LABEL: test_vld3_f64 -; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %vld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8* %1, i32 8) - %vld3.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 2 - %.fca.0.0.insert = insertvalue %struct.float64x1x3_t undef, <1 x double> %vld3.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x1x3_t %.fca.0.0.insert, <1 x double> %vld3.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float64x1x3_t %.fca.0.1.insert, <1 x double> %vld3.fca.2.extract, 0, 2 - ret %struct.float64x1x3_t %.fca.0.2.insert -} - -define %struct.int8x16x4_t @test_vld4q_s8(i8* readonly %a) { -; CHECK-LABEL: test_vld4q_s8 -; CHECK: ld4 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}] - %vld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %a, i32 1) - %vld4.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 3 - %.fca.0.0.insert = insertvalue %struct.int8x16x4_t undef, <16 x i8> %vld4.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x16x4_t %.fca.0.0.insert, <16 x i8> %vld4.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int8x16x4_t %.fca.0.1.insert, <16 x i8> %vld4.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int8x16x4_t %.fca.0.2.insert, <16 x i8> %vld4.fca.3.extract, 0, 3 - ret %struct.int8x16x4_t %.fca.0.3.insert -} - -define %struct.int16x8x4_t @test_vld4q_s16(i16* readonly %a) { -; CHECK-LABEL: test_vld4q_s16 -; CHECK: ld4 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %vld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8* %1, i32 2) - %vld4.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 3 - %.fca.0.0.insert = insertvalue %struct.int16x8x4_t undef, <8 x i16> %vld4.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x8x4_t %.fca.0.0.insert, <8 x i16> %vld4.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int16x8x4_t %.fca.0.1.insert, <8 x i16> %vld4.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int16x8x4_t %.fca.0.2.insert, <8 x i16> %vld4.fca.3.extract, 0, 3 - ret %struct.int16x8x4_t %.fca.0.3.insert -} - -define %struct.int32x4x4_t @test_vld4q_s32(i32* readonly %a) { -; CHECK-LABEL: test_vld4q_s32 -; CHECK: ld4 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %vld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32(i8* %1, i32 4) - %vld4.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 3 - %.fca.0.0.insert = insertvalue %struct.int32x4x4_t undef, <4 x i32> %vld4.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x4x4_t %.fca.0.0.insert, <4 x i32> %vld4.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int32x4x4_t %.fca.0.1.insert, <4 x i32> %vld4.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int32x4x4_t %.fca.0.2.insert, <4 x i32> %vld4.fca.3.extract, 0, 3 - ret %struct.int32x4x4_t %.fca.0.3.insert -} - -define %struct.int64x2x4_t @test_vld4q_s64(i64* readonly %a) { -; CHECK-LABEL: test_vld4q_s64 -; CHECK: ld4 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %vld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4.v2i64(i8* %1, i32 8) - %vld4.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 3 - %.fca.0.0.insert = insertvalue %struct.int64x2x4_t undef, <2 x i64> %vld4.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x2x4_t %.fca.0.0.insert, <2 x i64> %vld4.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int64x2x4_t %.fca.0.1.insert, <2 x i64> %vld4.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int64x2x4_t %.fca.0.2.insert, <2 x i64> %vld4.fca.3.extract, 0, 3 - ret %struct.int64x2x4_t %.fca.0.3.insert -} - -define %struct.float32x4x4_t @test_vld4q_f32(float* readonly %a) { -; CHECK-LABEL: test_vld4q_f32 -; CHECK: ld4 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %vld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32(i8* %1, i32 4) - %vld4.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 3 - %.fca.0.0.insert = insertvalue %struct.float32x4x4_t undef, <4 x float> %vld4.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x4x4_t %.fca.0.0.insert, <4 x float> %vld4.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float32x4x4_t %.fca.0.1.insert, <4 x float> %vld4.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.float32x4x4_t %.fca.0.2.insert, <4 x float> %vld4.fca.3.extract, 0, 3 - ret %struct.float32x4x4_t %.fca.0.3.insert -} - -define %struct.float64x2x4_t @test_vld4q_f64(double* readonly %a) { -; CHECK-LABEL: test_vld4q_f64 -; CHECK: ld4 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %vld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4.v2f64(i8* %1, i32 8) - %vld4.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 3 - %.fca.0.0.insert = insertvalue %struct.float64x2x4_t undef, <2 x double> %vld4.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x2x4_t %.fca.0.0.insert, <2 x double> %vld4.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float64x2x4_t %.fca.0.1.insert, <2 x double> %vld4.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.float64x2x4_t %.fca.0.2.insert, <2 x double> %vld4.fca.3.extract, 0, 3 - ret %struct.float64x2x4_t %.fca.0.3.insert -} - -define %struct.int8x8x4_t @test_vld4_s8(i8* readonly %a) { -; CHECK-LABEL: test_vld4_s8 -; CHECK: ld4 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}] - %vld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8* %a, i32 1) - %vld4.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 3 - %.fca.0.0.insert = insertvalue %struct.int8x8x4_t undef, <8 x i8> %vld4.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x8x4_t %.fca.0.0.insert, <8 x i8> %vld4.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int8x8x4_t %.fca.0.1.insert, <8 x i8> %vld4.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int8x8x4_t %.fca.0.2.insert, <8 x i8> %vld4.fca.3.extract, 0, 3 - ret %struct.int8x8x4_t %.fca.0.3.insert -} - -define %struct.int16x4x4_t @test_vld4_s16(i16* readonly %a) { -; CHECK-LABEL: test_vld4_s16 -; CHECK: ld4 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %vld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8* %1, i32 2) - %vld4.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 3 - %.fca.0.0.insert = insertvalue %struct.int16x4x4_t undef, <4 x i16> %vld4.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x4x4_t %.fca.0.0.insert, <4 x i16> %vld4.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int16x4x4_t %.fca.0.1.insert, <4 x i16> %vld4.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int16x4x4_t %.fca.0.2.insert, <4 x i16> %vld4.fca.3.extract, 0, 3 - ret %struct.int16x4x4_t %.fca.0.3.insert -} - -define %struct.int32x2x4_t @test_vld4_s32(i32* readonly %a) { -; CHECK-LABEL: test_vld4_s32 -; CHECK: ld4 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %vld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32(i8* %1, i32 4) - %vld4.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 3 - %.fca.0.0.insert = insertvalue %struct.int32x2x4_t undef, <2 x i32> %vld4.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x2x4_t %.fca.0.0.insert, <2 x i32> %vld4.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int32x2x4_t %.fca.0.1.insert, <2 x i32> %vld4.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int32x2x4_t %.fca.0.2.insert, <2 x i32> %vld4.fca.3.extract, 0, 3 - ret %struct.int32x2x4_t %.fca.0.3.insert -} - -define %struct.int64x1x4_t @test_vld4_s64(i64* readonly %a) { -; CHECK-LABEL: test_vld4_s64 -; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %vld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8* %1, i32 8) - %vld4.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 3 - %.fca.0.0.insert = insertvalue %struct.int64x1x4_t undef, <1 x i64> %vld4.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x1x4_t %.fca.0.0.insert, <1 x i64> %vld4.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int64x1x4_t %.fca.0.1.insert, <1 x i64> %vld4.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int64x1x4_t %.fca.0.2.insert, <1 x i64> %vld4.fca.3.extract, 0, 3 - ret %struct.int64x1x4_t %.fca.0.3.insert -} - -define %struct.float32x2x4_t @test_vld4_f32(float* readonly %a) { -; CHECK-LABEL: test_vld4_f32 -; CHECK: ld4 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %vld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32(i8* %1, i32 4) - %vld4.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 3 - %.fca.0.0.insert = insertvalue %struct.float32x2x4_t undef, <2 x float> %vld4.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x2x4_t %.fca.0.0.insert, <2 x float> %vld4.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float32x2x4_t %.fca.0.1.insert, <2 x float> %vld4.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.float32x2x4_t %.fca.0.2.insert, <2 x float> %vld4.fca.3.extract, 0, 3 - ret %struct.float32x2x4_t %.fca.0.3.insert -} - -define %struct.float64x1x4_t @test_vld4_f64(double* readonly %a) { -; CHECK-LABEL: test_vld4_f64 -; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %vld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8* %1, i32 8) - %vld4.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 3 - %.fca.0.0.insert = insertvalue %struct.float64x1x4_t undef, <1 x double> %vld4.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x1x4_t %.fca.0.0.insert, <1 x double> %vld4.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float64x1x4_t %.fca.0.1.insert, <1 x double> %vld4.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.float64x1x4_t %.fca.0.2.insert, <1 x double> %vld4.fca.3.extract, 0, 3 - ret %struct.float64x1x4_t %.fca.0.3.insert -} - -declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32) -declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) -declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32) -declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) -declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) -declare <2 x double> @llvm.arm.neon.vld1.v2f64(i8*, i32) -declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32) -declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32) -declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32) -declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) -declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32) -declare <1 x double> @llvm.arm.neon.vld1.v1f64(i8*, i32) -declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8*, i32) -declare { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16(i8*, i32) -declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8*, i32) -declare { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2.v2i64(i8*, i32) -declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8*, i32) -declare { <2 x double>, <2 x double> } @llvm.arm.neon.vld2.v2f64(i8*, i32) -declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8*, i32) -declare { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16(i8*, i32) -declare { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8*, i32) -declare { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8*, i32) -declare { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8*, i32) -declare { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8*, i32) -declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8*, i32) -declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16(i8*, i32) -declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8*, i32) -declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3.v2i64(i8*, i32) -declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8*, i32) -declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3.v2f64(i8*, i32) -declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8*, i32) -declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8*, i32) -declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32(i8*, i32) -declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8*, i32) -declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32(i8*, i32) -declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8*, i32) -declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32) -declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8*, i32) -declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32(i8*, i32) -declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4.v2i64(i8*, i32) -declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32(i8*, i32) -declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4.v2f64(i8*, i32) -declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8*, i32) -declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8*, i32) -declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32(i8*, i32) -declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8*, i32) -declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32(i8*, i32) -declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8*, i32) - -define void @test_vst1q_s8(i8* %a, <16 x i8> %b) { -; CHECK-LABEL: test_vst1q_s8 -; CHECK: st1 { v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}] - tail call void @llvm.arm.neon.vst1.v16i8(i8* %a, <16 x i8> %b, i32 1) - ret void -} - -define void @test_vst1q_s16(i16* %a, <8 x i16> %b) { -; CHECK-LABEL: test_vst1q_s16 -; CHECK: st1 { v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst1.v8i16(i8* %1, <8 x i16> %b, i32 2) - ret void -} - -define void @test_vst1q_s32(i32* %a, <4 x i32> %b) { -; CHECK-LABEL: test_vst1q_s32 -; CHECK: st1 { v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst1.v4i32(i8* %1, <4 x i32> %b, i32 4) - ret void -} - -define void @test_vst1q_s64(i64* %a, <2 x i64> %b) { -; CHECK-LABEL: test_vst1q_s64 -; CHECK: st1 { v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst1.v2i64(i8* %1, <2 x i64> %b, i32 8) - ret void -} - -define void @test_vst1q_f32(float* %a, <4 x float> %b) { -; CHECK-LABEL: test_vst1q_f32 -; CHECK: st1 { v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst1.v4f32(i8* %1, <4 x float> %b, i32 4) - ret void -} - -define void @test_vst1q_f64(double* %a, <2 x double> %b) { -; CHECK-LABEL: test_vst1q_f64 -; CHECK: st1 { v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst1.v2f64(i8* %1, <2 x double> %b, i32 8) - ret void -} - -define void @test_vst1_s8(i8* %a, <8 x i8> %b) { -; CHECK-LABEL: test_vst1_s8 -; CHECK: st1 { v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}] - tail call void @llvm.arm.neon.vst1.v8i8(i8* %a, <8 x i8> %b, i32 1) - ret void -} - -define void @test_vst1_s16(i16* %a, <4 x i16> %b) { -; CHECK-LABEL: test_vst1_s16 -; CHECK: st1 { v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst1.v4i16(i8* %1, <4 x i16> %b, i32 2) - ret void -} - -define void @test_vst1_s32(i32* %a, <2 x i32> %b) { -; CHECK-LABEL: test_vst1_s32 -; CHECK: st1 { v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst1.v2i32(i8* %1, <2 x i32> %b, i32 4) - ret void -} - -define void @test_vst1_s64(i64* %a, <1 x i64> %b) { -; CHECK-LABEL: test_vst1_s64 -; CHECK: st1 { v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst1.v1i64(i8* %1, <1 x i64> %b, i32 8) - ret void -} - -define void @test_vst1_f32(float* %a, <2 x float> %b) { -; CHECK-LABEL: test_vst1_f32 -; CHECK: st1 { v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst1.v2f32(i8* %1, <2 x float> %b, i32 4) - ret void -} - -define void @test_vst1_f64(double* %a, <1 x double> %b) { -; CHECK-LABEL: test_vst1_f64 -; CHECK: st1 { v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst1.v1f64(i8* %1, <1 x double> %b, i32 8) - ret void -} - -define void @test_vst2q_s8(i8* %a, [2 x <16 x i8>] %b.coerce) { -; CHECK-LABEL: test_vst2q_s8 -; CHECK: st2 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %b.coerce, 1 - tail call void @llvm.arm.neon.vst2.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, i32 1) - ret void -} - -define void @test_vst2q_s16(i16* %a, [2 x <8 x i16>] %b.coerce) { -; CHECK-LABEL: test_vst2q_s16 -; CHECK: st2 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [2 x <8 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <8 x i16>] %b.coerce, 1 - %1 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst2.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, i32 2) - ret void -} - -define void @test_vst2q_s32(i32* %a, [2 x <4 x i32>] %b.coerce) { -; CHECK-LABEL: test_vst2q_s32 -; CHECK: st2 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %b.coerce, 1 - %1 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst2.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, i32 4) - ret void -} - -define void @test_vst2q_s64(i64* %a, [2 x <2 x i64>] %b.coerce) { -; CHECK-LABEL: test_vst2q_s64 -; CHECK: st2 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [2 x <2 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <2 x i64>] %b.coerce, 1 - %1 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst2.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, i32 8) - ret void -} - -define void @test_vst2q_f32(float* %a, [2 x <4 x float>] %b.coerce) { -; CHECK-LABEL: test_vst2q_f32 -; CHECK: st2 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [2 x <4 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <4 x float>] %b.coerce, 1 - %1 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, i32 4) - ret void -} - -define void @test_vst2q_f64(double* %a, [2 x <2 x double>] %b.coerce) { -; CHECK-LABEL: test_vst2q_f64 -; CHECK: st2 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [2 x <2 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <2 x double>] %b.coerce, 1 - %1 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst2.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, i32 8) - ret void -} - -define void @test_vst2_s8(i8* %a, [2 x <8 x i8>] %b.coerce) { -; CHECK-LABEL: test_vst2_s8 -; CHECK: st2 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [2 x <8 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <8 x i8>] %b.coerce, 1 - tail call void @llvm.arm.neon.vst2.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, i32 1) - ret void -} - -define void @test_vst2_s16(i16* %a, [2 x <4 x i16>] %b.coerce) { -; CHECK-LABEL: test_vst2_s16 -; CHECK: st2 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [2 x <4 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <4 x i16>] %b.coerce, 1 - %1 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst2.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, i32 2) - ret void -} - -define void @test_vst2_s32(i32* %a, [2 x <2 x i32>] %b.coerce) { -; CHECK-LABEL: test_vst2_s32 -; CHECK: st2 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [2 x <2 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <2 x i32>] %b.coerce, 1 - %1 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst2.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, i32 4) - ret void -} - -define void @test_vst2_s64(i64* %a, [2 x <1 x i64>] %b.coerce) { -; CHECK-LABEL: test_vst2_s64 -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [2 x <1 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <1 x i64>] %b.coerce, 1 - %1 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst2.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, i32 8) - ret void -} - -define void @test_vst2_f32(float* %a, [2 x <2 x float>] %b.coerce) { -; CHECK-LABEL: test_vst2_f32 -; CHECK: st2 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [2 x <2 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <2 x float>] %b.coerce, 1 - %1 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst2.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, i32 4) - ret void -} - -define void @test_vst2_f64(double* %a, [2 x <1 x double>] %b.coerce) { -; CHECK-LABEL: test_vst2_f64 -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [2 x <1 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <1 x double>] %b.coerce, 1 - %1 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst2.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, i32 8) - ret void -} - -define void @test_vst3q_s8(i8* %a, [3 x <16 x i8>] %b.coerce) { -; CHECK-LABEL: test_vst3q_s8 -; CHECK: st3 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %b.coerce, 2 - tail call void @llvm.arm.neon.vst3.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, i32 1) - ret void -} - -define void @test_vst3q_s16(i16* %a, [3 x <8 x i16>] %b.coerce) { -; CHECK-LABEL: test_vst3q_s16 -; CHECK: st3 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [3 x <8 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <8 x i16>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <8 x i16>] %b.coerce, 2 - %1 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst3.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, i32 2) - ret void -} - -define void @test_vst3q_s32(i32* %a, [3 x <4 x i32>] %b.coerce) { -; CHECK-LABEL: test_vst3q_s32 -; CHECK: st3 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [3 x <4 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <4 x i32>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <4 x i32>] %b.coerce, 2 - %1 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst3.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, i32 4) - ret void -} - -define void @test_vst3q_s64(i64* %a, [3 x <2 x i64>] %b.coerce) { -; CHECK-LABEL: test_vst3q_s64 -; CHECK: st3 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [3 x <2 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <2 x i64>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <2 x i64>] %b.coerce, 2 - %1 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst3.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, i32 8) - ret void -} - -define void @test_vst3q_f32(float* %a, [3 x <4 x float>] %b.coerce) { -; CHECK-LABEL: test_vst3q_f32 -; CHECK: st3 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [3 x <4 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <4 x float>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <4 x float>] %b.coerce, 2 - %1 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst3.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, i32 4) - ret void -} - -define void @test_vst3q_f64(double* %a, [3 x <2 x double>] %b.coerce) { -; CHECK-LABEL: test_vst3q_f64 -; CHECK: st3 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [3 x <2 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <2 x double>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <2 x double>] %b.coerce, 2 - %1 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst3.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, i32 8) - ret void -} - -define void @test_vst3_s8(i8* %a, [3 x <8 x i8>] %b.coerce) { -; CHECK-LABEL: test_vst3_s8 -; CHECK: st3 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [3 x <8 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <8 x i8>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <8 x i8>] %b.coerce, 2 - tail call void @llvm.arm.neon.vst3.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, i32 1) - ret void -} - -define void @test_vst3_s16(i16* %a, [3 x <4 x i16>] %b.coerce) { -; CHECK-LABEL: test_vst3_s16 -; CHECK: st3 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [3 x <4 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <4 x i16>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <4 x i16>] %b.coerce, 2 - %1 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst3.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, i32 2) - ret void -} - -define void @test_vst3_s32(i32* %a, [3 x <2 x i32>] %b.coerce) { -; CHECK-LABEL: test_vst3_s32 -; CHECK: st3 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [3 x <2 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <2 x i32>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <2 x i32>] %b.coerce, 2 - %1 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst3.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, i32 4) - ret void -} - -define void @test_vst3_s64(i64* %a, [3 x <1 x i64>] %b.coerce) { -; CHECK-LABEL: test_vst3_s64 -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [3 x <1 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <1 x i64>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <1 x i64>] %b.coerce, 2 - %1 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst3.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, i32 8) - ret void -} - -define void @test_vst3_f32(float* %a, [3 x <2 x float>] %b.coerce) { -; CHECK-LABEL: test_vst3_f32 -; CHECK: st3 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [3 x <2 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <2 x float>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <2 x float>] %b.coerce, 2 - %1 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst3.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, i32 4) - ret void -} - -define void @test_vst3_f64(double* %a, [3 x <1 x double>] %b.coerce) { -; CHECK-LABEL: test_vst3_f64 -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [3 x <1 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <1 x double>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <1 x double>] %b.coerce, 2 - %1 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst3.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, i32 8) - ret void -} - -define void @test_vst4q_s8(i8* %a, [4 x <16 x i8>] %b.coerce) { -; CHECK-LABEL: test_vst4q_s8 -; CHECK: st4 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [4 x <16 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <16 x i8>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <16 x i8>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <16 x i8>] %b.coerce, 3 - tail call void @llvm.arm.neon.vst4.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, <16 x i8> %b.coerce.fca.3.extract, i32 1) - ret void -} - -define void @test_vst4q_s16(i16* %a, [4 x <8 x i16>] %b.coerce) { -; CHECK-LABEL: test_vst4q_s16 -; CHECK: st4 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [4 x <8 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <8 x i16>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <8 x i16>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <8 x i16>] %b.coerce, 3 - %1 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst4.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, <8 x i16> %b.coerce.fca.3.extract, i32 2) - ret void -} - -define void @test_vst4q_s32(i32* %a, [4 x <4 x i32>] %b.coerce) { -; CHECK-LABEL: test_vst4q_s32 -; CHECK: st4 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [4 x <4 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <4 x i32>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <4 x i32>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <4 x i32>] %b.coerce, 3 - %1 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst4.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, <4 x i32> %b.coerce.fca.3.extract, i32 4) - ret void -} - -define void @test_vst4q_s64(i64* %a, [4 x <2 x i64>] %b.coerce) { -; CHECK-LABEL: test_vst4q_s64 -; CHECK: st4 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [4 x <2 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <2 x i64>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <2 x i64>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <2 x i64>] %b.coerce, 3 - %1 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst4.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, <2 x i64> %b.coerce.fca.3.extract, i32 8) - ret void -} - -define void @test_vst4q_f32(float* %a, [4 x <4 x float>] %b.coerce) { -; CHECK-LABEL: test_vst4q_f32 -; CHECK: st4 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [4 x <4 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <4 x float>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <4 x float>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <4 x float>] %b.coerce, 3 - %1 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst4.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, <4 x float> %b.coerce.fca.3.extract, i32 4) - ret void -} - -define void @test_vst4q_f64(double* %a, [4 x <2 x double>] %b.coerce) { -; CHECK-LABEL: test_vst4q_f64 -; CHECK: st4 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [4 x <2 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <2 x double>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <2 x double>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <2 x double>] %b.coerce, 3 - %1 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst4.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, <2 x double> %b.coerce.fca.3.extract, i32 8) - ret void -} - -define void @test_vst4_s8(i8* %a, [4 x <8 x i8>] %b.coerce) { -; CHECK-LABEL: test_vst4_s8 -; CHECK: st4 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [4 x <8 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <8 x i8>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <8 x i8>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <8 x i8>] %b.coerce, 3 - tail call void @llvm.arm.neon.vst4.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, <8 x i8> %b.coerce.fca.3.extract, i32 1) - ret void -} - -define void @test_vst4_s16(i16* %a, [4 x <4 x i16>] %b.coerce) { -; CHECK-LABEL: test_vst4_s16 -; CHECK: st4 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [4 x <4 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <4 x i16>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <4 x i16>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <4 x i16>] %b.coerce, 3 - %1 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst4.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, <4 x i16> %b.coerce.fca.3.extract, i32 2) - ret void -} - -define void @test_vst4_s32(i32* %a, [4 x <2 x i32>] %b.coerce) { -; CHECK-LABEL: test_vst4_s32 -; CHECK: st4 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [4 x <2 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <2 x i32>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <2 x i32>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <2 x i32>] %b.coerce, 3 - %1 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst4.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, <2 x i32> %b.coerce.fca.3.extract, i32 4) - ret void -} - -define void @test_vst4_s64(i64* %a, [4 x <1 x i64>] %b.coerce) { -; CHECK-LABEL: test_vst4_s64 -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [4 x <1 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <1 x i64>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <1 x i64>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <1 x i64>] %b.coerce, 3 - %1 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst4.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, <1 x i64> %b.coerce.fca.3.extract, i32 8) - ret void -} - -define void @test_vst4_f32(float* %a, [4 x <2 x float>] %b.coerce) { -; CHECK-LABEL: test_vst4_f32 -; CHECK: st4 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [4 x <2 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <2 x float>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <2 x float>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <2 x float>] %b.coerce, 3 - %1 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst4.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, <2 x float> %b.coerce.fca.3.extract, i32 4) - ret void -} - -define void @test_vst4_f64(double* %a, [4 x <1 x double>] %b.coerce) { -; CHECK-LABEL: test_vst4_f64 -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [4 x <1 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <1 x double>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <1 x double>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <1 x double>] %b.coerce, 3 - %1 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst4.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, <1 x double> %b.coerce.fca.3.extract, i32 8) - ret void -} - -declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32) -declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) -declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32) -declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>, i32) -declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) -declare void @llvm.arm.neon.vst1.v2f64(i8*, <2 x double>, i32) -declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) -declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) -declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32) -declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>, i32) -declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32) -declare void @llvm.arm.neon.vst1.v1f64(i8*, <1 x double>, i32) -declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32) -declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>, i32) -declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32) -declare void @llvm.arm.neon.vst2.v2i64(i8*, <2 x i64>, <2 x i64>, i32) -declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) -declare void @llvm.arm.neon.vst2.v2f64(i8*, <2 x double>, <2 x double>, i32) -declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) -declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32) -declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) -declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>, i32) -declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>, i32) -declare void @llvm.arm.neon.vst2.v1f64(i8*, <1 x double>, <1 x double>, i32) -declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32) -declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) -declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) -declare void @llvm.arm.neon.vst3.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32) -declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) -declare void @llvm.arm.neon.vst3.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32) -declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) -declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) -declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) -declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32) -declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) -declare void @llvm.arm.neon.vst3.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32) -declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32) -declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) -declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) -declare void @llvm.arm.neon.vst4.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32) -declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) -declare void @llvm.arm.neon.vst4.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32) -declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) -declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) -declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) -declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32) -declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) -declare void @llvm.arm.neon.vst4.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32) - -define %struct.int8x16x2_t @test_vld1q_s8_x2(i8* %a) { -; CHECK-LABEL: test_vld1q_s8_x2 -; CHECK: ld1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}] - %1 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8* %a, i32 1) - %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0 - %3 = extractvalue { <16 x i8>, <16 x i8> } %1, 1 - %4 = insertvalue %struct.int8x16x2_t undef, <16 x i8> %2, 0, 0 - %5 = insertvalue %struct.int8x16x2_t %4, <16 x i8> %3, 0, 1 - ret %struct.int8x16x2_t %5 -} - -define %struct.int16x8x2_t @test_vld1q_s16_x2(i16* %a) { -; CHECK-LABEL: test_vld1q_s16_x2 -; CHECK: ld1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8* %1, i32 2) - %3 = extractvalue { <8 x i16>, <8 x i16> } %2, 0 - %4 = extractvalue { <8 x i16>, <8 x i16> } %2, 1 - %5 = insertvalue %struct.int16x8x2_t undef, <8 x i16> %3, 0, 0 - %6 = insertvalue %struct.int16x8x2_t %5, <8 x i16> %4, 0, 1 - ret %struct.int16x8x2_t %6 -} - -define %struct.int32x4x2_t @test_vld1q_s32_x2(i32* %a) { -; CHECK-LABEL: test_vld1q_s32_x2 -; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x2.v4i32(i8* %1, i32 4) - %3 = extractvalue { <4 x i32>, <4 x i32> } %2, 0 - %4 = extractvalue { <4 x i32>, <4 x i32> } %2, 1 - %5 = insertvalue %struct.int32x4x2_t undef, <4 x i32> %3, 0, 0 - %6 = insertvalue %struct.int32x4x2_t %5, <4 x i32> %4, 0, 1 - ret %struct.int32x4x2_t %6 -} - -define %struct.int64x2x2_t @test_vld1q_s64_x2(i64* %a) { -; CHECK-LABEL: test_vld1q_s64_x2 -; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x2.v2i64(i8* %1, i32 8) - %3 = extractvalue { <2 x i64>, <2 x i64> } %2, 0 - %4 = extractvalue { <2 x i64>, <2 x i64> } %2, 1 - %5 = insertvalue %struct.int64x2x2_t undef, <2 x i64> %3, 0, 0 - %6 = insertvalue %struct.int64x2x2_t %5, <2 x i64> %4, 0, 1 - ret %struct.int64x2x2_t %6 -} - -define %struct.float32x4x2_t @test_vld1q_f32_x2(float* %a) { -; CHECK-LABEL: test_vld1q_f32_x2 -; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x2.v4f32(i8* %1, i32 4) - %3 = extractvalue { <4 x float>, <4 x float> } %2, 0 - %4 = extractvalue { <4 x float>, <4 x float> } %2, 1 - %5 = insertvalue %struct.float32x4x2_t undef, <4 x float> %3, 0, 0 - %6 = insertvalue %struct.float32x4x2_t %5, <4 x float> %4, 0, 1 - ret %struct.float32x4x2_t %6 -} - - -define %struct.float64x2x2_t @test_vld1q_f64_x2(double* %a) { -; CHECK-LABEL: test_vld1q_f64_x2 -; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x2.v2f64(i8* %1, i32 8) - %3 = extractvalue { <2 x double>, <2 x double> } %2, 0 - %4 = extractvalue { <2 x double>, <2 x double> } %2, 1 - %5 = insertvalue %struct.float64x2x2_t undef, <2 x double> %3, 0, 0 - %6 = insertvalue %struct.float64x2x2_t %5, <2 x double> %4, 0, 1 - ret %struct.float64x2x2_t %6 -} - -define %struct.int8x8x2_t @test_vld1_s8_x2(i8* %a) { -; CHECK-LABEL: test_vld1_s8_x2 -; CHECK: ld1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}] - %1 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x2.v8i8(i8* %a, i32 1) - %2 = extractvalue { <8 x i8>, <8 x i8> } %1, 0 - %3 = extractvalue { <8 x i8>, <8 x i8> } %1, 1 - %4 = insertvalue %struct.int8x8x2_t undef, <8 x i8> %2, 0, 0 - %5 = insertvalue %struct.int8x8x2_t %4, <8 x i8> %3, 0, 1 - ret %struct.int8x8x2_t %5 -} - -define %struct.int16x4x2_t @test_vld1_s16_x2(i16* %a) { -; CHECK-LABEL: test_vld1_s16_x2 -; CHECK: ld1 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x2.v4i16(i8* %1, i32 2) - %3 = extractvalue { <4 x i16>, <4 x i16> } %2, 0 - %4 = extractvalue { <4 x i16>, <4 x i16> } %2, 1 - %5 = insertvalue %struct.int16x4x2_t undef, <4 x i16> %3, 0, 0 - %6 = insertvalue %struct.int16x4x2_t %5, <4 x i16> %4, 0, 1 - ret %struct.int16x4x2_t %6 -} - -define %struct.int32x2x2_t @test_vld1_s32_x2(i32* %a) { -; CHECK-LABEL: test_vld1_s32_x2 -; CHECK: ld1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x2.v2i32(i8* %1, i32 4) - %3 = extractvalue { <2 x i32>, <2 x i32> } %2, 0 - %4 = extractvalue { <2 x i32>, <2 x i32> } %2, 1 - %5 = insertvalue %struct.int32x2x2_t undef, <2 x i32> %3, 0, 0 - %6 = insertvalue %struct.int32x2x2_t %5, <2 x i32> %4, 0, 1 - ret %struct.int32x2x2_t %6 -} - -define %struct.int64x1x2_t @test_vld1_s64_x2(i64* %a) { -; CHECK-LABEL: test_vld1_s64_x2 -; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x2.v1i64(i8* %1, i32 8) - %3 = extractvalue { <1 x i64>, <1 x i64> } %2, 0 - %4 = extractvalue { <1 x i64>, <1 x i64> } %2, 1 - %5 = insertvalue %struct.int64x1x2_t undef, <1 x i64> %3, 0, 0 - %6 = insertvalue %struct.int64x1x2_t %5, <1 x i64> %4, 0, 1 - ret %struct.int64x1x2_t %6 -} - -define %struct.float32x2x2_t @test_vld1_f32_x2(float* %a) { -; CHECK-LABEL: test_vld1_f32_x2 -; CHECK: ld1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x2.v2f32(i8* %1, i32 4) - %3 = extractvalue { <2 x float>, <2 x float> } %2, 0 - %4 = extractvalue { <2 x float>, <2 x float> } %2, 1 - %5 = insertvalue %struct.float32x2x2_t undef, <2 x float> %3, 0, 0 - %6 = insertvalue %struct.float32x2x2_t %5, <2 x float> %4, 0, 1 - ret %struct.float32x2x2_t %6 -} - -define %struct.float64x1x2_t @test_vld1_f64_x2(double* %a) { -; CHECK-LABEL: test_vld1_f64_x2 -; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x2.v1f64(i8* %1, i32 8) - %3 = extractvalue { <1 x double>, <1 x double> } %2, 0 - %4 = extractvalue { <1 x double>, <1 x double> } %2, 1 - %5 = insertvalue %struct.float64x1x2_t undef, <1 x double> %3, 0, 0 - %6 = insertvalue %struct.float64x1x2_t %5, <1 x double> %4, 0, 1 - ret %struct.float64x1x2_t %6 -} - -define %struct.int8x16x3_t @test_vld1q_s8_x3(i8* %a) { -; CHECK-LABEL: test_vld1q_s8_x3 -; CHECK: ld1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, -; [{{x[0-9]+|sp}}] - %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x3.v16i8(i8* %a, i32 1) - %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 0 - %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 1 - %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 2 - %5 = insertvalue %struct.int8x16x3_t undef, <16 x i8> %2, 0, 0 - %6 = insertvalue %struct.int8x16x3_t %5, <16 x i8> %3, 0, 1 - %7 = insertvalue %struct.int8x16x3_t %6, <16 x i8> %4, 0, 2 - ret %struct.int8x16x3_t %7 -} - -define %struct.int16x8x3_t @test_vld1q_s16_x3(i16* %a) { -; CHECK-LABEL: test_vld1q_s16_x3 -; CHECK: ld1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, -; [{{x[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %2 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8* %1, i32 2) - %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 0 - %4 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 1 - %5 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 2 - %6 = insertvalue %struct.int16x8x3_t undef, <8 x i16> %3, 0, 0 - %7 = insertvalue %struct.int16x8x3_t %6, <8 x i16> %4, 0, 1 - %8 = insertvalue %struct.int16x8x3_t %7, <8 x i16> %5, 0, 2 - ret %struct.int16x8x3_t %8 -} - -define %struct.int32x4x3_t @test_vld1q_s32_x3(i32* %a) { -; CHECK-LABEL: test_vld1q_s32_x3 -; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, -; [{{x[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %2 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x3.v4i32(i8* %1, i32 4) - %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 0 - %4 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 1 - %5 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 2 - %6 = insertvalue %struct.int32x4x3_t undef, <4 x i32> %3, 0, 0 - %7 = insertvalue %struct.int32x4x3_t %6, <4 x i32> %4, 0, 1 - %8 = insertvalue %struct.int32x4x3_t %7, <4 x i32> %5, 0, 2 - ret %struct.int32x4x3_t %8 -} - -define %struct.int64x2x3_t @test_vld1q_s64_x3(i64* %a) { -; CHECK-LABEL: test_vld1q_s64_x3 -; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, -; [{{x[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %2 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8* %1, i32 8) - %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 0 - %4 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 1 - %5 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 2 - %6 = insertvalue %struct.int64x2x3_t undef, <2 x i64> %3, 0, 0 - %7 = insertvalue %struct.int64x2x3_t %6, <2 x i64> %4, 0, 1 - %8 = insertvalue %struct.int64x2x3_t %7, <2 x i64> %5, 0, 2 - ret %struct.int64x2x3_t %8 -} - -define %struct.float32x4x3_t @test_vld1q_f32_x3(float* %a) { -; CHECK-LABEL: test_vld1q_f32_x3 -; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, -; [{{x[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %2 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x3.v4f32(i8* %1, i32 4) - %3 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 0 - %4 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 1 - %5 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 2 - %6 = insertvalue %struct.float32x4x3_t undef, <4 x float> %3, 0, 0 - %7 = insertvalue %struct.float32x4x3_t %6, <4 x float> %4, 0, 1 - %8 = insertvalue %struct.float32x4x3_t %7, <4 x float> %5, 0, 2 - ret %struct.float32x4x3_t %8 -} - - -define %struct.float64x2x3_t @test_vld1q_f64_x3(double* %a) { -; CHECK-LABEL: test_vld1q_f64_x3 -; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, -; [{{x[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %2 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x3.v2f64(i8* %1, i32 8) - %3 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 0 - %4 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 1 - %5 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 2 - %6 = insertvalue %struct.float64x2x3_t undef, <2 x double> %3, 0, 0 - %7 = insertvalue %struct.float64x2x3_t %6, <2 x double> %4, 0, 1 - %8 = insertvalue %struct.float64x2x3_t %7, <2 x double> %5, 0, 2 - ret %struct.float64x2x3_t %8 -} - -define %struct.int8x8x3_t @test_vld1_s8_x3(i8* %a) { -; CHECK-LABEL: test_vld1_s8_x3 -; CHECK: ld1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, -; [{{x[0-9]+|sp}}] - %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x3.v8i8(i8* %a, i32 1) - %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 0 - %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 1 - %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 2 - %5 = insertvalue %struct.int8x8x3_t undef, <8 x i8> %2, 0, 0 - %6 = insertvalue %struct.int8x8x3_t %5, <8 x i8> %3, 0, 1 - %7 = insertvalue %struct.int8x8x3_t %6, <8 x i8> %4, 0, 2 - ret %struct.int8x8x3_t %7 -} - -define %struct.int16x4x3_t @test_vld1_s16_x3(i16* %a) { -; CHECK-LABEL: test_vld1_s16_x3 -; CHECK: ld1 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, -; [{{x[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %2 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x3.v4i16(i8* %1, i32 2) - %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 0 - %4 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 1 - %5 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 2 - %6 = insertvalue %struct.int16x4x3_t undef, <4 x i16> %3, 0, 0 - %7 = insertvalue %struct.int16x4x3_t %6, <4 x i16> %4, 0, 1 - %8 = insertvalue %struct.int16x4x3_t %7, <4 x i16> %5, 0, 2 - ret %struct.int16x4x3_t %8 -} - -define %struct.int32x2x3_t @test_vld1_s32_x3(i32* %a) { - %1 = bitcast i32* %a to i8* -; CHECK-LABEL: test_vld1_s32_x3 -; CHECK: ld1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, -; [{{x[0-9]+|sp}}] - %2 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x3.v2i32(i8* %1, i32 4) - %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 0 - %4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 1 - %5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 2 - %6 = insertvalue %struct.int32x2x3_t undef, <2 x i32> %3, 0, 0 - %7 = insertvalue %struct.int32x2x3_t %6, <2 x i32> %4, 0, 1 - %8 = insertvalue %struct.int32x2x3_t %7, <2 x i32> %5, 0, 2 - ret %struct.int32x2x3_t %8 -} - -define %struct.int64x1x3_t @test_vld1_s64_x3(i64* %a) { -; CHECK-LABEL: test_vld1_s64_x3 -; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, -; [{{x[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %2 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x3.v1i64(i8* %1, i32 8) - %3 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 0 - %4 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 1 - %5 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 2 - %6 = insertvalue %struct.int64x1x3_t undef, <1 x i64> %3, 0, 0 - %7 = insertvalue %struct.int64x1x3_t %6, <1 x i64> %4, 0, 1 - %8 = insertvalue %struct.int64x1x3_t %7, <1 x i64> %5, 0, 2 - ret %struct.int64x1x3_t %8 -} - -define %struct.float32x2x3_t @test_vld1_f32_x3(float* %a) { -; CHECK-LABEL: test_vld1_f32_x3 -; CHECK: ld1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, -; [{{x[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %2 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x3.v2f32(i8* %1, i32 4) - %3 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 0 - %4 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 1 - %5 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 2 - %6 = insertvalue %struct.float32x2x3_t undef, <2 x float> %3, 0, 0 - %7 = insertvalue %struct.float32x2x3_t %6, <2 x float> %4, 0, 1 - %8 = insertvalue %struct.float32x2x3_t %7, <2 x float> %5, 0, 2 - ret %struct.float32x2x3_t %8 -} - - -define %struct.float64x1x3_t @test_vld1_f64_x3(double* %a) { -; CHECK-LABEL: test_vld1_f64_x3 -; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, -; [{{x[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %2 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x3.v1f64(i8* %1, i32 8) - %3 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 0 - %4 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 1 - %5 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 2 - %6 = insertvalue %struct.float64x1x3_t undef, <1 x double> %3, 0, 0 - %7 = insertvalue %struct.float64x1x3_t %6, <1 x double> %4, 0, 1 - %8 = insertvalue %struct.float64x1x3_t %7, <1 x double> %5, 0, 2 - ret %struct.float64x1x3_t %8 -} - -define %struct.int8x16x4_t @test_vld1q_s8_x4(i8* %a) { -; CHECK-LABEL: test_vld1q_s8_x4 -; CHECK: ld1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, -; v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}] - %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x4.v16i8(i8* %a, i32 1) - %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 0 - %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 1 - %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 2 - %5 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 3 - %6 = insertvalue %struct.int8x16x4_t undef, <16 x i8> %2, 0, 0 - %7 = insertvalue %struct.int8x16x4_t %6, <16 x i8> %3, 0, 1 - %8 = insertvalue %struct.int8x16x4_t %7, <16 x i8> %4, 0, 2 - %9 = insertvalue %struct.int8x16x4_t %8, <16 x i8> %5, 0, 3 - ret %struct.int8x16x4_t %9 -} - -define %struct.int16x8x4_t @test_vld1q_s16_x4(i16* %a) { -; CHECK-LABEL: test_vld1q_s16_x4 -; CHECK: ld1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, -; v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %2 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x4.v8i16(i8* %1, i32 2) - %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 0 - %4 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 1 - %5 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 2 - %6 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 3 - %7 = insertvalue %struct.int16x8x4_t undef, <8 x i16> %3, 0, 0 - %8 = insertvalue %struct.int16x8x4_t %7, <8 x i16> %4, 0, 1 - %9 = insertvalue %struct.int16x8x4_t %8, <8 x i16> %5, 0, 2 - %10 = insertvalue %struct.int16x8x4_t %9, <8 x i16> %6, 0, 3 - ret %struct.int16x8x4_t %10 -} - -define %struct.int32x4x4_t @test_vld1q_s32_x4(i32* %a) { -; CHECK-LABEL: test_vld1q_s32_x4 -; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, -; v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %2 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x4.v4i32(i8* %1, i32 4) - %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 0 - %4 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 1 - %5 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 2 - %6 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 3 - %7 = insertvalue %struct.int32x4x4_t undef, <4 x i32> %3, 0, 0 - %8 = insertvalue %struct.int32x4x4_t %7, <4 x i32> %4, 0, 1 - %9 = insertvalue %struct.int32x4x4_t %8, <4 x i32> %5, 0, 2 - %10 = insertvalue %struct.int32x4x4_t %9, <4 x i32> %6, 0, 3 - ret %struct.int32x4x4_t %10 -} - -define %struct.int64x2x4_t @test_vld1q_s64_x4(i64* %a) { -; CHECK-LABEL: test_vld1q_s64_x4 -; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, -; v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %2 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x4.v2i64(i8* %1, i32 8) - %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 0 - %4 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 1 - %5 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 2 - %6 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 3 - %7 = insertvalue %struct.int64x2x4_t undef, <2 x i64> %3, 0, 0 - %8 = insertvalue %struct.int64x2x4_t %7, <2 x i64> %4, 0, 1 - %9 = insertvalue %struct.int64x2x4_t %8, <2 x i64> %5, 0, 2 - %10 = insertvalue %struct.int64x2x4_t %9, <2 x i64> %6, 0, 3 - ret %struct.int64x2x4_t %10 -} - -define %struct.float32x4x4_t @test_vld1q_f32_x4(float* %a) { -; CHECK-LABEL: test_vld1q_f32_x4 -; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, -; v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %2 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8* %1, i32 4) - %3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 0 - %4 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 1 - %5 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 2 - %6 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 3 - %7 = insertvalue %struct.float32x4x4_t undef, <4 x float> %3, 0, 0 - %8 = insertvalue %struct.float32x4x4_t %7, <4 x float> %4, 0, 1 - %9 = insertvalue %struct.float32x4x4_t %8, <4 x float> %5, 0, 2 - %10 = insertvalue %struct.float32x4x4_t %9, <4 x float> %6, 0, 3 - ret %struct.float32x4x4_t %10 -} - -define %struct.float64x2x4_t @test_vld1q_f64_x4(double* %a) { -; CHECK-LABEL: test_vld1q_f64_x4 -; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, -; v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %2 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x4.v2f64(i8* %1, i32 8) - %3 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 0 - %4 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 1 - %5 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 2 - %6 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 3 - %7 = insertvalue %struct.float64x2x4_t undef, <2 x double> %3, 0, 0 - %8 = insertvalue %struct.float64x2x4_t %7, <2 x double> %4, 0, 1 - %9 = insertvalue %struct.float64x2x4_t %8, <2 x double> %5, 0, 2 - %10 = insertvalue %struct.float64x2x4_t %9, <2 x double> %6, 0, 3 - ret %struct.float64x2x4_t %10 -} - -define %struct.int8x8x4_t @test_vld1_s8_x4(i8* %a) { -; CHECK-LABEL: test_vld1_s8_x4 -; CHECK: ld1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, -; v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}] - %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8* %a, i32 1) - %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0 - %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 1 - %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 2 - %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 3 - %6 = insertvalue %struct.int8x8x4_t undef, <8 x i8> %2, 0, 0 - %7 = insertvalue %struct.int8x8x4_t %6, <8 x i8> %3, 0, 1 - %8 = insertvalue %struct.int8x8x4_t %7, <8 x i8> %4, 0, 2 - %9 = insertvalue %struct.int8x8x4_t %8, <8 x i8> %5, 0, 3 - ret %struct.int8x8x4_t %9 -} - -define %struct.int16x4x4_t @test_vld1_s16_x4(i16* %a) { -; CHECK-LABEL: test_vld1_s16_x4 -; CHECK: ld1 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, -; v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %2 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x4.v4i16(i8* %1, i32 2) - %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 0 - %4 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 1 - %5 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 2 - %6 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 3 - %7 = insertvalue %struct.int16x4x4_t undef, <4 x i16> %3, 0, 0 - %8 = insertvalue %struct.int16x4x4_t %7, <4 x i16> %4, 0, 1 - %9 = insertvalue %struct.int16x4x4_t %8, <4 x i16> %5, 0, 2 - %10 = insertvalue %struct.int16x4x4_t %9, <4 x i16> %6, 0, 3 - ret %struct.int16x4x4_t %10 -} - -define %struct.int32x2x4_t @test_vld1_s32_x4(i32* %a) { -; CHECK-LABEL: test_vld1_s32_x4 -; CHECK: ld1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, -; v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %2 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x4.v2i32(i8* %1, i32 4) - %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 0 - %4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 1 - %5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 2 - %6 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 3 - %7 = insertvalue %struct.int32x2x4_t undef, <2 x i32> %3, 0, 0 - %8 = insertvalue %struct.int32x2x4_t %7, <2 x i32> %4, 0, 1 - %9 = insertvalue %struct.int32x2x4_t %8, <2 x i32> %5, 0, 2 - %10 = insertvalue %struct.int32x2x4_t %9, <2 x i32> %6, 0, 3 - ret %struct.int32x2x4_t %10 -} - -define %struct.int64x1x4_t @test_vld1_s64_x4(i64* %a) { -; CHECK-LABEL: test_vld1_s64_x4 -; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, -; v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %2 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x4.v1i64(i8* %1, i32 8) - %3 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 0 - %4 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 1 - %5 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 2 - %6 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 3 - %7 = insertvalue %struct.int64x1x4_t undef, <1 x i64> %3, 0, 0 - %8 = insertvalue %struct.int64x1x4_t %7, <1 x i64> %4, 0, 1 - %9 = insertvalue %struct.int64x1x4_t %8, <1 x i64> %5, 0, 2 - %10 = insertvalue %struct.int64x1x4_t %9, <1 x i64> %6, 0, 3 - ret %struct.int64x1x4_t %10 -} - -define %struct.float32x2x4_t @test_vld1_f32_x4(float* %a) { -; CHECK-LABEL: test_vld1_f32_x4 -; CHECK: ld1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, -; v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %2 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x4.v2f32(i8* %1, i32 4) - %3 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 0 - %4 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 1 - %5 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 2 - %6 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 3 - %7 = insertvalue %struct.float32x2x4_t undef, <2 x float> %3, 0, 0 - %8 = insertvalue %struct.float32x2x4_t %7, <2 x float> %4, 0, 1 - %9 = insertvalue %struct.float32x2x4_t %8, <2 x float> %5, 0, 2 - %10 = insertvalue %struct.float32x2x4_t %9, <2 x float> %6, 0, 3 - ret %struct.float32x2x4_t %10 -} - - -define %struct.float64x1x4_t @test_vld1_f64_x4(double* %a) { -; CHECK-LABEL: test_vld1_f64_x4 -; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, -; v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %2 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x4.v1f64(i8* %1, i32 8) - %3 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 0 - %4 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 1 - %5 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 2 - %6 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 3 - %7 = insertvalue %struct.float64x1x4_t undef, <1 x double> %3, 0, 0 - %8 = insertvalue %struct.float64x1x4_t %7, <1 x double> %4, 0, 1 - %9 = insertvalue %struct.float64x1x4_t %8, <1 x double> %5, 0, 2 - %10 = insertvalue %struct.float64x1x4_t %9, <1 x double> %6, 0, 3 - ret %struct.float64x1x4_t %10 -} - -define void @test_vst1q_s8_x2(i8* %a, [2 x <16 x i8>] %b) { -; CHECK-LABEL: test_vst1q_s8_x2 -; CHECK: st1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}] - %1 = extractvalue [2 x <16 x i8>] %b, 0 - %2 = extractvalue [2 x <16 x i8>] %b, 1 - tail call void @llvm.aarch64.neon.vst1x2.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, i32 1) - ret void -} - -define void @test_vst1q_s16_x2(i16* %a, [2 x <8 x i16>] %b) { -; CHECK-LABEL: test_vst1q_s16_x2 -; CHECK: st1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}] - %1 = extractvalue [2 x <8 x i16>] %b, 0 - %2 = extractvalue [2 x <8 x i16>] %b, 1 - %3 = bitcast i16* %a to i8* - tail call void @llvm.aarch64.neon.vst1x2.v8i16(i8* %3, <8 x i16> %1, <8 x i16> %2, i32 2) - ret void -} - -define void @test_vst1q_s32_x2(i32* %a, [2 x <4 x i32>] %b) { -; CHECK-LABEL: test_vst1q_s32_x2 -; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %1 = extractvalue [2 x <4 x i32>] %b, 0 - %2 = extractvalue [2 x <4 x i32>] %b, 1 - %3 = bitcast i32* %a to i8* - tail call void @llvm.aarch64.neon.vst1x2.v4i32(i8* %3, <4 x i32> %1, <4 x i32> %2, i32 4) - ret void -} - -define void @test_vst1q_s64_x2(i64* %a, [2 x <2 x i64>] %b) { -; CHECK-LABEL: test_vst1q_s64_x2 -; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %1 = extractvalue [2 x <2 x i64>] %b, 0 - %2 = extractvalue [2 x <2 x i64>] %b, 1 - %3 = bitcast i64* %a to i8* - tail call void @llvm.aarch64.neon.vst1x2.v2i64(i8* %3, <2 x i64> %1, <2 x i64> %2, i32 8) - ret void -} - -define void @test_vst1q_f32_x2(float* %a, [2 x <4 x float>] %b) { -; CHECK-LABEL: test_vst1q_f32_x2 -; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %1 = extractvalue [2 x <4 x float>] %b, 0 - %2 = extractvalue [2 x <4 x float>] %b, 1 - %3 = bitcast float* %a to i8* - tail call void @llvm.aarch64.neon.vst1x2.v4f32(i8* %3, <4 x float> %1, <4 x float> %2, i32 4) - ret void -} - - -define void @test_vst1q_f64_x2(double* %a, [2 x <2 x double>] %b) { -; CHECK-LABEL: test_vst1q_f64_x2 -; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %1 = extractvalue [2 x <2 x double>] %b, 0 - %2 = extractvalue [2 x <2 x double>] %b, 1 - %3 = bitcast double* %a to i8* - tail call void @llvm.aarch64.neon.vst1x2.v2f64(i8* %3, <2 x double> %1, <2 x double> %2, i32 8) - ret void -} - -define void @test_vst1_s8_x2(i8* %a, [2 x <8 x i8>] %b) { -; CHECK-LABEL: test_vst1_s8_x2 -; CHECK: st1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}] - %1 = extractvalue [2 x <8 x i8>] %b, 0 - %2 = extractvalue [2 x <8 x i8>] %b, 1 - tail call void @llvm.aarch64.neon.vst1x2.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 1) - ret void -} - -define void @test_vst1_s16_x2(i16* %a, [2 x <4 x i16>] %b) { -; CHECK-LABEL: test_vst1_s16_x2 -; CHECK: st1 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}] - %1 = extractvalue [2 x <4 x i16>] %b, 0 - %2 = extractvalue [2 x <4 x i16>] %b, 1 - %3 = bitcast i16* %a to i8* - tail call void @llvm.aarch64.neon.vst1x2.v4i16(i8* %3, <4 x i16> %1, <4 x i16> %2, i32 2) - ret void -} - -define void @test_vst1_s32_x2(i32* %a, [2 x <2 x i32>] %b) { -; CHECK-LABEL: test_vst1_s32_x2 -; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %1 = extractvalue [2 x <2 x i32>] %b, 0 - %2 = extractvalue [2 x <2 x i32>] %b, 1 - %3 = bitcast i32* %a to i8* - tail call void @llvm.aarch64.neon.vst1x2.v2i32(i8* %3, <2 x i32> %1, <2 x i32> %2, i32 4) - ret void -} - -define void @test_vst1_s64_x2(i64* %a, [2 x <1 x i64>] %b) { -; CHECK-LABEL: test_vst1_s64_x2 -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %1 = extractvalue [2 x <1 x i64>] %b, 0 - %2 = extractvalue [2 x <1 x i64>] %b, 1 - %3 = bitcast i64* %a to i8* - tail call void @llvm.aarch64.neon.vst1x2.v1i64(i8* %3, <1 x i64> %1, <1 x i64> %2, i32 8) - ret void -} - -define void @test_vst1_f32_x2(float* %a, [2 x <2 x float>] %b) { -; CHECK-LABEL: test_vst1_f32_x2 -; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %1 = extractvalue [2 x <2 x float>] %b, 0 - %2 = extractvalue [2 x <2 x float>] %b, 1 - %3 = bitcast float* %a to i8* - tail call void @llvm.aarch64.neon.vst1x2.v2f32(i8* %3, <2 x float> %1, <2 x float> %2, i32 4) - ret void -} - -define void @test_vst1_f64_x2(double* %a, [2 x <1 x double>] %b) { -; CHECK-LABEL: test_vst1_f64_x2 -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %1 = extractvalue [2 x <1 x double>] %b, 0 - %2 = extractvalue [2 x <1 x double>] %b, 1 - %3 = bitcast double* %a to i8* - tail call void @llvm.aarch64.neon.vst1x2.v1f64(i8* %3, <1 x double> %1, <1 x double> %2, i32 8) - ret void -} - -define void @test_vst1q_s8_x3(i8* %a, [3 x <16 x i8>] %b) { -; CHECK-LABEL: test_vst1q_s8_x3 -; CHECK: st1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, -; [{{x[0-9]+|sp}}] - %1 = extractvalue [3 x <16 x i8>] %b, 0 - %2 = extractvalue [3 x <16 x i8>] %b, 1 - %3 = extractvalue [3 x <16 x i8>] %b, 2 - tail call void @llvm.aarch64.neon.vst1x3.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, <16 x i8> %3, i32 1) - ret void -} - -define void @test_vst1q_s16_x3(i16* %a, [3 x <8 x i16>] %b) { -; CHECK-LABEL: test_vst1q_s16_x3 -; CHECK: st1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, -; [{{x[0-9]+|sp}}] - %1 = extractvalue [3 x <8 x i16>] %b, 0 - %2 = extractvalue [3 x <8 x i16>] %b, 1 - %3 = extractvalue [3 x <8 x i16>] %b, 2 - %4 = bitcast i16* %a to i8* - tail call void @llvm.aarch64.neon.vst1x3.v8i16(i8* %4, <8 x i16> %1, <8 x i16> %2, <8 x i16> %3, i32 2) - ret void -} - -define void @test_vst1q_s32_x3(i32* %a, [3 x <4 x i32>] %b) { -; CHECK-LABEL: test_vst1q_s32_x3 -; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, -; [{{x[0-9]+|sp}}] - %1 = extractvalue [3 x <4 x i32>] %b, 0 - %2 = extractvalue [3 x <4 x i32>] %b, 1 - %3 = extractvalue [3 x <4 x i32>] %b, 2 - %4 = bitcast i32* %a to i8* - tail call void @llvm.aarch64.neon.vst1x3.v4i32(i8* %4, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3, i32 4) - ret void -} - -define void @test_vst1q_s64_x3(i64* %a, [3 x <2 x i64>] %b) { -; CHECK-LABEL: test_vst1q_s64_x3 -; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, -; [{{x[0-9]+|sp}}] - %1 = extractvalue [3 x <2 x i64>] %b, 0 - %2 = extractvalue [3 x <2 x i64>] %b, 1 - %3 = extractvalue [3 x <2 x i64>] %b, 2 - %4 = bitcast i64* %a to i8* - tail call void @llvm.aarch64.neon.vst1x3.v2i64(i8* %4, <2 x i64> %1, <2 x i64> %2, <2 x i64> %3, i32 8) - ret void -} - -define void @test_vst1q_f32_x3(float* %a, [3 x <4 x float>] %b) { -; CHECK-LABEL: test_vst1q_f32_x3 -; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, -; [{{x[0-9]+|sp}}] - %1 = extractvalue [3 x <4 x float>] %b, 0 - %2 = extractvalue [3 x <4 x float>] %b, 1 - %3 = extractvalue [3 x <4 x float>] %b, 2 - %4 = bitcast float* %a to i8* - tail call void @llvm.aarch64.neon.vst1x3.v4f32(i8* %4, <4 x float> %1, <4 x float> %2, <4 x float> %3, i32 4) - ret void -} - -define void @test_vst1q_f64_x3(double* %a, [3 x <2 x double>] %b) { -; CHECK-LABEL: test_vst1q_f64_x3 -; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, -; [{{x[0-9]+|sp}}] - %1 = extractvalue [3 x <2 x double>] %b, 0 - %2 = extractvalue [3 x <2 x double>] %b, 1 - %3 = extractvalue [3 x <2 x double>] %b, 2 - %4 = bitcast double* %a to i8* - tail call void @llvm.aarch64.neon.vst1x3.v2f64(i8* %4, <2 x double> %1, <2 x double> %2, <2 x double> %3, i32 8) - ret void -} - -define void @test_vst1_s8_x3(i8* %a, [3 x <8 x i8>] %b) { -; CHECK-LABEL: test_vst1_s8_x3 -; CHECK: st1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, -; [{{x[0-9]+|sp}}] - %1 = extractvalue [3 x <8 x i8>] %b, 0 - %2 = extractvalue [3 x <8 x i8>] %b, 1 - %3 = extractvalue [3 x <8 x i8>] %b, 2 - tail call void @llvm.aarch64.neon.vst1x3.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, i32 1) - ret void -} - -define void @test_vst1_s16_x3(i16* %a, [3 x <4 x i16>] %b) { -; CHECK-LABEL: test_vst1_s16_x3 -; CHECK: st1 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, -; [{{x[0-9]+|sp}}] - %1 = extractvalue [3 x <4 x i16>] %b, 0 - %2 = extractvalue [3 x <4 x i16>] %b, 1 - %3 = extractvalue [3 x <4 x i16>] %b, 2 - %4 = bitcast i16* %a to i8* - tail call void @llvm.aarch64.neon.vst1x3.v4i16(i8* %4, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 2) - ret void -} - -define void @test_vst1_s32_x3(i32* %a, [3 x <2 x i32>] %b) { -; CHECK-LABEL: test_vst1_s32_x3 -; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, -; [{{x[0-9]+|sp}}] - %1 = extractvalue [3 x <2 x i32>] %b, 0 - %2 = extractvalue [3 x <2 x i32>] %b, 1 - %3 = extractvalue [3 x <2 x i32>] %b, 2 - %4 = bitcast i32* %a to i8* - tail call void @llvm.aarch64.neon.vst1x3.v2i32(i8* %4, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, i32 4) - ret void -} - -define void @test_vst1_s64_x3(i64* %a, [3 x <1 x i64>] %b) { -; CHECK-LABEL: test_vst1_s64_x3 -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, -; [{{x[0-9]+|sp}}] - %1 = extractvalue [3 x <1 x i64>] %b, 0 - %2 = extractvalue [3 x <1 x i64>] %b, 1 - %3 = extractvalue [3 x <1 x i64>] %b, 2 - %4 = bitcast i64* %a to i8* - tail call void @llvm.aarch64.neon.vst1x3.v1i64(i8* %4, <1 x i64> %1, <1 x i64> %2, <1 x i64> %3, i32 8) - ret void -} - -define void @test_vst1_f32_x3(float* %a, [3 x <2 x float>] %b) { -; CHECK-LABEL: test_vst1_f32_x3 -; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, -; [{{x[0-9]+|sp}}] - %1 = extractvalue [3 x <2 x float>] %b, 0 - %2 = extractvalue [3 x <2 x float>] %b, 1 - %3 = extractvalue [3 x <2 x float>] %b, 2 - %4 = bitcast float* %a to i8* - tail call void @llvm.aarch64.neon.vst1x3.v2f32(i8* %4, <2 x float> %1, <2 x float> %2, <2 x float> %3, i32 4) - ret void -} - -define void @test_vst1_f64_x3(double* %a, [3 x <1 x double>] %b) { -; CHECK-LABEL: test_vst1_f64_x3 -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, -; [{{x[0-9]+|sp}}] - %1 = extractvalue [3 x <1 x double>] %b, 0 - %2 = extractvalue [3 x <1 x double>] %b, 1 - %3 = extractvalue [3 x <1 x double>] %b, 2 - %4 = bitcast double* %a to i8* - tail call void @llvm.aarch64.neon.vst1x3.v1f64(i8* %4, <1 x double> %1, <1 x double> %2, <1 x double> %3, i32 8) - ret void -} - -define void @test_vst1q_s8_x4(i8* %a, [4 x <16 x i8>] %b) { -; CHECK-LABEL: test_vst1q_s8_x4 -; CHECK: st1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, -; v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}] - %1 = extractvalue [4 x <16 x i8>] %b, 0 - %2 = extractvalue [4 x <16 x i8>] %b, 1 - %3 = extractvalue [4 x <16 x i8>] %b, 2 - %4 = extractvalue [4 x <16 x i8>] %b, 3 - tail call void @llvm.aarch64.neon.vst1x4.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, <16 x i8> %3, <16 x i8> %4, i32 1) - ret void -} - -define void @test_vst1q_s16_x4(i16* %a, [4 x <8 x i16>] %b) { -; CHECK-LABEL: test_vst1q_s16_x4 -; CHECK: st1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, -; v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}] - %1 = extractvalue [4 x <8 x i16>] %b, 0 - %2 = extractvalue [4 x <8 x i16>] %b, 1 - %3 = extractvalue [4 x <8 x i16>] %b, 2 - %4 = extractvalue [4 x <8 x i16>] %b, 3 - %5 = bitcast i16* %a to i8* - tail call void @llvm.aarch64.neon.vst1x4.v8i16(i8* %5, <8 x i16> %1, <8 x i16> %2, <8 x i16> %3, <8 x i16> %4, i32 2) - ret void -} - -define void @test_vst1q_s32_x4(i32* %a, [4 x <4 x i32>] %b) { -; CHECK-LABEL: test_vst1q_s32_x4 -; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, -; v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %1 = extractvalue [4 x <4 x i32>] %b, 0 - %2 = extractvalue [4 x <4 x i32>] %b, 1 - %3 = extractvalue [4 x <4 x i32>] %b, 2 - %4 = extractvalue [4 x <4 x i32>] %b, 3 - %5 = bitcast i32* %a to i8* - tail call void @llvm.aarch64.neon.vst1x4.v4i32(i8* %5, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, i32 4) - ret void -} - -define void @test_vst1q_s64_x4(i64* %a, [4 x <2 x i64>] %b) { -; CHECK-LABEL: test_vst1q_s64_x4 -; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, -; v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %1 = extractvalue [4 x <2 x i64>] %b, 0 - %2 = extractvalue [4 x <2 x i64>] %b, 1 - %3 = extractvalue [4 x <2 x i64>] %b, 2 - %4 = extractvalue [4 x <2 x i64>] %b, 3 - %5 = bitcast i64* %a to i8* - tail call void @llvm.aarch64.neon.vst1x4.v2i64(i8* %5, <2 x i64> %1, <2 x i64> %2, <2 x i64> %3, <2 x i64> %4, i32 8) - ret void -} - -define void @test_vst1q_f32_x4(float* %a, [4 x <4 x float>] %b) { -; CHECK-LABEL: test_vst1q_f32_x4 -; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, -; v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %1 = extractvalue [4 x <4 x float>] %b, 0 - %2 = extractvalue [4 x <4 x float>] %b, 1 - %3 = extractvalue [4 x <4 x float>] %b, 2 - %4 = extractvalue [4 x <4 x float>] %b, 3 - %5 = bitcast float* %a to i8* - tail call void @llvm.aarch64.neon.vst1x4.v4f32(i8* %5, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4, i32 4) - ret void -} - -define void @test_vst1q_f64_x4(double* %a, [4 x <2 x double>] %b) { -; CHECK-LABEL: test_vst1q_f64_x4 -; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, -; v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %1 = extractvalue [4 x <2 x double>] %b, 0 - %2 = extractvalue [4 x <2 x double>] %b, 1 - %3 = extractvalue [4 x <2 x double>] %b, 2 - %4 = extractvalue [4 x <2 x double>] %b, 3 - %5 = bitcast double* %a to i8* - tail call void @llvm.aarch64.neon.vst1x4.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 8) - ret void -} - -define void @test_vst1_s8_x4(i8* %a, [4 x <8 x i8>] %b) { -; CHECK-LABEL: test_vst1_s8_x4 -; CHECK: st1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, -; v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}] - %1 = extractvalue [4 x <8 x i8>] %b, 0 - %2 = extractvalue [4 x <8 x i8>] %b, 1 - %3 = extractvalue [4 x <8 x i8>] %b, 2 - %4 = extractvalue [4 x <8 x i8>] %b, 3 - tail call void @llvm.aarch64.neon.vst1x4.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, <8 x i8> %4, i32 1) - ret void -} - -define void @test_vst1_s16_x4(i16* %a, [4 x <4 x i16>] %b) { -; CHECK-LABEL: test_vst1_s16_x4 -; CHECK: st1 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, -; v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}] - %1 = extractvalue [4 x <4 x i16>] %b, 0 - %2 = extractvalue [4 x <4 x i16>] %b, 1 - %3 = extractvalue [4 x <4 x i16>] %b, 2 - %4 = extractvalue [4 x <4 x i16>] %b, 3 - %5 = bitcast i16* %a to i8* - tail call void @llvm.aarch64.neon.vst1x4.v4i16(i8* %5, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, <4 x i16> %4, i32 2) - ret void -} - -define void @test_vst1_s32_x4(i32* %a, [4 x <2 x i32>] %b) { -; CHECK-LABEL: test_vst1_s32_x4 -; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, -; v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %1 = extractvalue [4 x <2 x i32>] %b, 0 - %2 = extractvalue [4 x <2 x i32>] %b, 1 - %3 = extractvalue [4 x <2 x i32>] %b, 2 - %4 = extractvalue [4 x <2 x i32>] %b, 3 - %5 = bitcast i32* %a to i8* - tail call void @llvm.aarch64.neon.vst1x4.v2i32(i8* %5, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, <2 x i32> %4, i32 4) - ret void -} - -define void @test_vst1_s64_x4(i64* %a, [4 x <1 x i64>] %b) { -; CHECK-LABEL: test_vst1_s64_x4 -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, -; v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %1 = extractvalue [4 x <1 x i64>] %b, 0 - %2 = extractvalue [4 x <1 x i64>] %b, 1 - %3 = extractvalue [4 x <1 x i64>] %b, 2 - %4 = extractvalue [4 x <1 x i64>] %b, 3 - %5 = bitcast i64* %a to i8* - tail call void @llvm.aarch64.neon.vst1x4.v1i64(i8* %5, <1 x i64> %1, <1 x i64> %2, <1 x i64> %3, <1 x i64> %4, i32 8) - ret void -} - -define void @test_vst1_f32_x4(float* %a, [4 x <2 x float>] %b) { -; CHECK-LABEL: test_vst1_f32_x4 -; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, -; v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %1 = extractvalue [4 x <2 x float>] %b, 0 - %2 = extractvalue [4 x <2 x float>] %b, 1 - %3 = extractvalue [4 x <2 x float>] %b, 2 - %4 = extractvalue [4 x <2 x float>] %b, 3 - %5 = bitcast float* %a to i8* - tail call void @llvm.aarch64.neon.vst1x4.v2f32(i8* %5, <2 x float> %1, <2 x float> %2, <2 x float> %3, <2 x float> %4, i32 4) - ret void -} - -define void @test_vst1_f64_x4(double* %a, [4 x <1 x double>] %b) { -; CHECK-LABEL: test_vst1_f64_x4 -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, -; v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %1 = extractvalue [4 x <1 x double>] %b, 0 - %2 = extractvalue [4 x <1 x double>] %b, 1 - %3 = extractvalue [4 x <1 x double>] %b, 2 - %4 = extractvalue [4 x <1 x double>] %b, 3 - %5 = bitcast double* %a to i8* - tail call void @llvm.aarch64.neon.vst1x4.v1f64(i8* %5, <1 x double> %1, <1 x double> %2, <1 x double> %3, <1 x double> %4, i32 8) - ret void -} - -declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8*, i32) -declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8*, i32) -declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x2.v4i32(i8*, i32) -declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x2.v2i64(i8*, i32) -declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x2.v4f32(i8*, i32) -declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x2.v2f64(i8*, i32) -declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x2.v8i8(i8*, i32) -declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x2.v4i16(i8*, i32) -declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x2.v2i32(i8*, i32) -declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x2.v1i64(i8*, i32) -declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x2.v2f32(i8*, i32) -declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x2.v1f64(i8*, i32) -declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x3.v16i8(i8*, i32) -declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8*, i32) -declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x3.v4i32(i8*, i32) -declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8*, i32) -declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x3.v4f32(i8*, i32) -declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x3.v2f64(i8*, i32) -declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x3.v8i8(i8*, i32) -declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x3.v4i16(i8*, i32) -declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x3.v2i32(i8*, i32) -declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x3.v1i64(i8*, i32) -declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x3.v2f32(i8*, i32) -declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x3.v1f64(i8*, i32) -declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x4.v16i8(i8*, i32) -declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x4.v8i16(i8*, i32) -declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x4.v4i32(i8*, i32) -declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x4.v2i64(i8*, i32) -declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8*, i32) -declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x4.v2f64(i8*, i32) -declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8*, i32) -declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x4.v4i16(i8*, i32) -declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x4.v2i32(i8*, i32) -declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x4.v1i64(i8*, i32) -declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x4.v2f32(i8*, i32) -declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x4.v1f64(i8*, i32) -declare void @llvm.aarch64.neon.vst1x2.v16i8(i8*, <16 x i8>, <16 x i8>, i32) -declare void @llvm.aarch64.neon.vst1x2.v8i16(i8*, <8 x i16>, <8 x i16>, i32) -declare void @llvm.aarch64.neon.vst1x2.v4i32(i8*, <4 x i32>, <4 x i32>, i32) -declare void @llvm.aarch64.neon.vst1x2.v2i64(i8*, <2 x i64>, <2 x i64>, i32) -declare void @llvm.aarch64.neon.vst1x2.v4f32(i8*, <4 x float>, <4 x float>, i32) -declare void @llvm.aarch64.neon.vst1x2.v2f64(i8*, <2 x double>, <2 x double>, i32) -declare void @llvm.aarch64.neon.vst1x2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) -declare void @llvm.aarch64.neon.vst1x2.v4i16(i8*, <4 x i16>, <4 x i16>, i32) -declare void @llvm.aarch64.neon.vst1x2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) -declare void @llvm.aarch64.neon.vst1x2.v1i64(i8*, <1 x i64>, <1 x i64>, i32) -declare void @llvm.aarch64.neon.vst1x2.v2f32(i8*, <2 x float>, <2 x float>, i32) -declare void @llvm.aarch64.neon.vst1x2.v1f64(i8*, <1 x double>, <1 x double>, i32) -declare void @llvm.aarch64.neon.vst1x3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32) -declare void @llvm.aarch64.neon.vst1x3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) -declare void @llvm.aarch64.neon.vst1x3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) -declare void @llvm.aarch64.neon.vst1x3.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32) -declare void @llvm.aarch64.neon.vst1x3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) -declare void @llvm.aarch64.neon.vst1x3.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32) -declare void @llvm.aarch64.neon.vst1x3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) -declare void @llvm.aarch64.neon.vst1x3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) -declare void @llvm.aarch64.neon.vst1x3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) -declare void @llvm.aarch64.neon.vst1x3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32) -declare void @llvm.aarch64.neon.vst1x3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) -declare void @llvm.aarch64.neon.vst1x3.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32) -declare void @llvm.aarch64.neon.vst1x4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32) -declare void @llvm.aarch64.neon.vst1x4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) -declare void @llvm.aarch64.neon.vst1x4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) -declare void @llvm.aarch64.neon.vst1x4.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32) -declare void @llvm.aarch64.neon.vst1x4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) -declare void @llvm.aarch64.neon.vst1x4.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32) -declare void @llvm.aarch64.neon.vst1x4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) -declare void @llvm.aarch64.neon.vst1x4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) -declare void @llvm.aarch64.neon.vst1x4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) -declare void @llvm.aarch64.neon.vst1x4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32) -declare void @llvm.aarch64.neon.vst1x4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) -declare void @llvm.aarch64.neon.vst1x4.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32) diff --git a/test/CodeGen/AArch64/neon-simd-ldst-one.ll b/test/CodeGen/AArch64/neon-simd-ldst-one.ll deleted file mode 100644 index 75c2a82ab57e..000000000000 --- a/test/CodeGen/AArch64/neon-simd-ldst-one.ll +++ /dev/null @@ -1,2300 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; interesting parts copied into arm64 directory as aarch64-neon-simd-ldst-one.ll - -%struct.uint8x16x2_t = type { [2 x <16 x i8>] } -%struct.poly8x16x2_t = type { [2 x <16 x i8>] } -%struct.uint8x16x3_t = type { [3 x <16 x i8>] } -%struct.int8x16x2_t = type { [2 x <16 x i8>] } -%struct.int16x8x2_t = type { [2 x <8 x i16>] } -%struct.int32x4x2_t = type { [2 x <4 x i32>] } -%struct.int64x2x2_t = type { [2 x <2 x i64>] } -%struct.float32x4x2_t = type { [2 x <4 x float>] } -%struct.float64x2x2_t = type { [2 x <2 x double>] } -%struct.int8x8x2_t = type { [2 x <8 x i8>] } -%struct.int16x4x2_t = type { [2 x <4 x i16>] } -%struct.int32x2x2_t = type { [2 x <2 x i32>] } -%struct.int64x1x2_t = type { [2 x <1 x i64>] } -%struct.float32x2x2_t = type { [2 x <2 x float>] } -%struct.float64x1x2_t = type { [2 x <1 x double>] } -%struct.int8x16x3_t = type { [3 x <16 x i8>] } -%struct.int16x8x3_t = type { [3 x <8 x i16>] } -%struct.int32x4x3_t = type { [3 x <4 x i32>] } -%struct.int64x2x3_t = type { [3 x <2 x i64>] } -%struct.float32x4x3_t = type { [3 x <4 x float>] } -%struct.float64x2x3_t = type { [3 x <2 x double>] } -%struct.int8x8x3_t = type { [3 x <8 x i8>] } -%struct.int16x4x3_t = type { [3 x <4 x i16>] } -%struct.int32x2x3_t = type { [3 x <2 x i32>] } -%struct.int64x1x3_t = type { [3 x <1 x i64>] } -%struct.float32x2x3_t = type { [3 x <2 x float>] } -%struct.float64x1x3_t = type { [3 x <1 x double>] } -%struct.int8x16x4_t = type { [4 x <16 x i8>] } -%struct.int16x8x4_t = type { [4 x <8 x i16>] } -%struct.int32x4x4_t = type { [4 x <4 x i32>] } -%struct.int64x2x4_t = type { [4 x <2 x i64>] } -%struct.float32x4x4_t = type { [4 x <4 x float>] } -%struct.float64x2x4_t = type { [4 x <2 x double>] } -%struct.int8x8x4_t = type { [4 x <8 x i8>] } -%struct.int16x4x4_t = type { [4 x <4 x i16>] } -%struct.int32x2x4_t = type { [4 x <2 x i32>] } -%struct.int64x1x4_t = type { [4 x <1 x i64>] } -%struct.float32x2x4_t = type { [4 x <2 x float>] } -%struct.float64x1x4_t = type { [4 x <1 x double>] } - -define <16 x i8> @test_ld_from_poll_v16i8(<16 x i8> %a) { -; CHECK-LABEL: test_ld_from_poll_v16i8 -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] -entry: - %b = add <16 x i8> %a, - ret <16 x i8> %b -} - -define <8 x i16> @test_ld_from_poll_v8i16(<8 x i16> %a) { -; CHECK-LABEL: test_ld_from_poll_v8i16 -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] -entry: - %b = add <8 x i16> %a, - ret <8 x i16> %b -} - -define <4 x i32> @test_ld_from_poll_v4i32(<4 x i32> %a) { -; CHECK-LABEL: test_ld_from_poll_v4i32 -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] -entry: - %b = add <4 x i32> %a, - ret <4 x i32> %b -} - -define <2 x i64> @test_ld_from_poll_v2i64(<2 x i64> %a) { -; CHECK-LABEL: test_ld_from_poll_v2i64 -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] -entry: - %b = add <2 x i64> %a, - ret <2 x i64> %b -} - -define <4 x float> @test_ld_from_poll_v4f32(<4 x float> %a) { -; CHECK-LABEL: test_ld_from_poll_v4f32 -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] -entry: - %b = fadd <4 x float> %a, - ret <4 x float> %b -} - -define <2 x double> @test_ld_from_poll_v2f64(<2 x double> %a) { -; CHECK-LABEL: test_ld_from_poll_v2f64 -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] -entry: - %b = fadd <2 x double> %a, - ret <2 x double> %b -} - -define <8 x i8> @test_ld_from_poll_v8i8(<8 x i8> %a) { -; CHECK-LABEL: test_ld_from_poll_v8i8 -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] -entry: - %b = add <8 x i8> %a, - ret <8 x i8> %b -} - -define <4 x i16> @test_ld_from_poll_v4i16(<4 x i16> %a) { -; CHECK-LABEL: test_ld_from_poll_v4i16 -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] -entry: - %b = add <4 x i16> %a, - ret <4 x i16> %b -} - -define <2 x i32> @test_ld_from_poll_v2i32(<2 x i32> %a) { -; CHECK-LABEL: test_ld_from_poll_v2i32 -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] -entry: - %b = add <2 x i32> %a, - ret <2 x i32> %b -} - -define <16 x i8> @test_vld1q_dup_s8(i8* %a) { -; CHECK-LABEL: test_vld1q_dup_s8 -; CHECK: ld1r { {{v[0-9]+}}.16b }, [x0] -entry: - %0 = load i8* %a, align 1 - %1 = insertelement <16 x i8> undef, i8 %0, i32 0 - %lane = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer - ret <16 x i8> %lane -} - -define <8 x i16> @test_vld1q_dup_s16(i16* %a) { -; CHECK-LABEL: test_vld1q_dup_s16 -; CHECK: ld1r { {{v[0-9]+}}.8h }, [x0] -entry: - %0 = load i16* %a, align 2 - %1 = insertelement <8 x i16> undef, i16 %0, i32 0 - %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer - ret <8 x i16> %lane -} - -define <4 x i32> @test_vld1q_dup_s32(i32* %a) { -; CHECK-LABEL: test_vld1q_dup_s32 -; CHECK: ld1r { {{v[0-9]+}}.4s }, [x0] -entry: - %0 = load i32* %a, align 4 - %1 = insertelement <4 x i32> undef, i32 %0, i32 0 - %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer - ret <4 x i32> %lane -} - -define <2 x i64> @test_vld1q_dup_s64(i64* %a) { -; CHECK-LABEL: test_vld1q_dup_s64 -; CHECK: ld1r { {{v[0-9]+}}.2d }, [x0] -entry: - %0 = load i64* %a, align 8 - %1 = insertelement <2 x i64> undef, i64 %0, i32 0 - %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer - ret <2 x i64> %lane -} - -define <4 x float> @test_vld1q_dup_f32(float* %a) { -; CHECK-LABEL: test_vld1q_dup_f32 -; CHECK: ld1r { {{v[0-9]+}}.4s }, [x0] -entry: - %0 = load float* %a, align 4 - %1 = insertelement <4 x float> undef, float %0, i32 0 - %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer - ret <4 x float> %lane -} - -define <2 x double> @test_vld1q_dup_f64(double* %a) { -; CHECK-LABEL: test_vld1q_dup_f64 -; CHECK: ld1r { {{v[0-9]+}}.2d }, [x0] -entry: - %0 = load double* %a, align 8 - %1 = insertelement <2 x double> undef, double %0, i32 0 - %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer - ret <2 x double> %lane -} - -define <8 x i8> @test_vld1_dup_s8(i8* %a) { -; CHECK-LABEL: test_vld1_dup_s8 -; CHECK: ld1r { {{v[0-9]+}}.8b }, [x0] -entry: - %0 = load i8* %a, align 1 - %1 = insertelement <8 x i8> undef, i8 %0, i32 0 - %lane = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer - ret <8 x i8> %lane -} - -define <4 x i16> @test_vld1_dup_s16(i16* %a) { -; CHECK-LABEL: test_vld1_dup_s16 -; CHECK: ld1r { {{v[0-9]+}}.4h }, [x0] -entry: - %0 = load i16* %a, align 2 - %1 = insertelement <4 x i16> undef, i16 %0, i32 0 - %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer - ret <4 x i16> %lane -} - -define <2 x i32> @test_vld1_dup_s32(i32* %a) { -; CHECK-LABEL: test_vld1_dup_s32 -; CHECK: ld1r { {{v[0-9]+}}.2s }, [x0] -entry: - %0 = load i32* %a, align 4 - %1 = insertelement <2 x i32> undef, i32 %0, i32 0 - %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer - ret <2 x i32> %lane -} - -define <1 x i64> @test_vld1_dup_s64(i64* %a) { -; CHECK-LABEL: test_vld1_dup_s64 -; CHECK: ld1r { {{v[0-9]+}}.1d }, [x0] -entry: - %0 = load i64* %a, align 8 - %1 = insertelement <1 x i64> undef, i64 %0, i32 0 - ret <1 x i64> %1 -} - -define <2 x float> @test_vld1_dup_f32(float* %a) { -; CHECK-LABEL: test_vld1_dup_f32 -; CHECK: ld1r { {{v[0-9]+}}.2s }, [x0] -entry: - %0 = load float* %a, align 4 - %1 = insertelement <2 x float> undef, float %0, i32 0 - %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer - ret <2 x float> %lane -} - -define <1 x double> @test_vld1_dup_f64(double* %a) { -; CHECK-LABEL: test_vld1_dup_f64 -; CHECK: ld1r { {{v[0-9]+}}.1d }, [x0] -entry: - %0 = load double* %a, align 8 - %1 = insertelement <1 x double> undef, double %0, i32 0 - ret <1 x double> %1 -} - -define <1 x i64> @testDUP.v1i64(i64* %a, i64* %b) #0 { -; As there is a store operation depending on %1, LD1R pattern can't be selected. -; So LDR and FMOV should be emitted. -; CHECK-LABEL: testDUP.v1i64 -; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}] -; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} -; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}] - %1 = load i64* %a, align 8 - store i64 %1, i64* %b, align 8 - %vecinit.i = insertelement <1 x i64> undef, i64 %1, i32 0 - ret <1 x i64> %vecinit.i -} - -define <1 x double> @testDUP.v1f64(double* %a, double* %b) #0 { -; As there is a store operation depending on %1, LD1R pattern can't be selected. -; So LDR and FMOV should be emitted. -; CHECK-LABEL: testDUP.v1f64 -; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}] -; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}] - %1 = load double* %a, align 8 - store double %1, double* %b, align 8 - %vecinit.i = insertelement <1 x double> undef, double %1, i32 0 - ret <1 x double> %vecinit.i -} - -define %struct.int8x16x2_t @test_vld2q_dup_s8(i8* %a) { -; CHECK-LABEL: test_vld2q_dup_s8 -; CHECK: ld2r { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, [x0] -entry: - %vld_dup = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %a, <16 x i8> undef, <16 x i8> undef, i32 0, i32 1) - %0 = extractvalue { <16 x i8>, <16 x i8> } %vld_dup, 0 - %lane = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer - %1 = extractvalue { <16 x i8>, <16 x i8> } %vld_dup, 1 - %lane1 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %lane1, 0, 1 - ret %struct.int8x16x2_t %.fca.0.1.insert -} - -define %struct.int16x8x2_t @test_vld2q_dup_s16(i16* %a) { -; CHECK-LABEL: test_vld2q_dup_s16 -; CHECK: ld2r { {{v[0-9]+}}.8h, {{v[0-9]+}}.8h }, [x0] -entry: - %0 = bitcast i16* %a to i8* - %vld_dup = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16(i8* %0, <8 x i16> undef, <8 x i16> undef, i32 0, i32 2) - %1 = extractvalue { <8 x i16>, <8 x i16> } %vld_dup, 0 - %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer - %2 = extractvalue { <8 x i16>, <8 x i16> } %vld_dup, 1 - %lane1 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %lane1, 0, 1 - ret %struct.int16x8x2_t %.fca.0.1.insert -} - -define %struct.int32x4x2_t @test_vld2q_dup_s32(i32* %a) { -; CHECK-LABEL: test_vld2q_dup_s32 -; CHECK: ld2r { {{v[0-9]+}}.4s, {{v[0-9]+}}.4s }, [x0] -entry: - %0 = bitcast i32* %a to i8* - %vld_dup = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %0, <4 x i32> undef, <4 x i32> undef, i32 0, i32 4) - %1 = extractvalue { <4 x i32>, <4 x i32> } %vld_dup, 0 - %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer - %2 = extractvalue { <4 x i32>, <4 x i32> } %vld_dup, 1 - %lane1 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %lane1, 0, 1 - ret %struct.int32x4x2_t %.fca.0.1.insert -} - -define %struct.int64x2x2_t @test_vld2q_dup_s64(i64* %a) { -; CHECK-LABEL: test_vld2q_dup_s64 -; CHECK: ld2r { {{v[0-9]+}}.2d, {{v[0-9]+}}.2d }, [x0] -entry: - %0 = bitcast i64* %a to i8* - %vld_dup = tail call { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2lane.v2i64(i8* %0, <2 x i64> undef, <2 x i64> undef, i32 0, i32 8) - %1 = extractvalue { <2 x i64>, <2 x i64> } %vld_dup, 0 - %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer - %2 = extractvalue { <2 x i64>, <2 x i64> } %vld_dup, 1 - %lane1 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int64x2x2_t undef, <2 x i64> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x2x2_t %.fca.0.0.insert, <2 x i64> %lane1, 0, 1 - ret %struct.int64x2x2_t %.fca.0.1.insert -} - -define %struct.float32x4x2_t @test_vld2q_dup_f32(float* %a) { -; CHECK-LABEL: test_vld2q_dup_f32 -; CHECK: ld2r { {{v[0-9]+}}.4s, {{v[0-9]+}}.4s }, [x0] -entry: - %0 = bitcast float* %a to i8* - %vld_dup = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2lane.v4f32(i8* %0, <4 x float> undef, <4 x float> undef, i32 0, i32 4) - %1 = extractvalue { <4 x float>, <4 x float> } %vld_dup, 0 - %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer - %2 = extractvalue { <4 x float>, <4 x float> } %vld_dup, 1 - %lane1 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %lane1, 0, 1 - ret %struct.float32x4x2_t %.fca.0.1.insert -} - -define %struct.float64x2x2_t @test_vld2q_dup_f64(double* %a) { -; CHECK-LABEL: test_vld2q_dup_f64 -; CHECK: ld2r { {{v[0-9]+}}.2d, {{v[0-9]+}}.2d }, [x0] -entry: - %0 = bitcast double* %a to i8* - %vld_dup = tail call { <2 x double>, <2 x double> } @llvm.arm.neon.vld2lane.v2f64(i8* %0, <2 x double> undef, <2 x double> undef, i32 0, i32 8) - %1 = extractvalue { <2 x double>, <2 x double> } %vld_dup, 0 - %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer - %2 = extractvalue { <2 x double>, <2 x double> } %vld_dup, 1 - %lane1 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.float64x2x2_t undef, <2 x double> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x2x2_t %.fca.0.0.insert, <2 x double> %lane1, 0, 1 - ret %struct.float64x2x2_t %.fca.0.1.insert -} - -define %struct.int8x8x2_t @test_vld2_dup_s8(i8* %a) { -; CHECK-LABEL: test_vld2_dup_s8 -; CHECK: ld2r { {{v[0-9]+}}.8b, {{v[0-9]+}}.8b }, [x0] -entry: - %vld_dup = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8* %a, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) - %0 = extractvalue { <8 x i8>, <8 x i8> } %vld_dup, 0 - %lane = shufflevector <8 x i8> %0, <8 x i8> undef, <8 x i32> zeroinitializer - %1 = extractvalue { <8 x i8>, <8 x i8> } %vld_dup, 1 - %lane1 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %lane1, 0, 1 - ret %struct.int8x8x2_t %.fca.0.1.insert -} - -define %struct.int16x4x2_t @test_vld2_dup_s16(i16* %a) { -; CHECK-LABEL: test_vld2_dup_s16 -; CHECK: ld2r { {{v[0-9]+}}.4h, {{v[0-9]+}}.4h }, [x0] -entry: - %0 = bitcast i16* %a to i8* - %vld_dup = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16(i8* %0, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) - %1 = extractvalue { <4 x i16>, <4 x i16> } %vld_dup, 0 - %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer - %2 = extractvalue { <4 x i16>, <4 x i16> } %vld_dup, 1 - %lane1 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %lane1, 0, 1 - ret %struct.int16x4x2_t %.fca.0.1.insert -} - -define %struct.int32x2x2_t @test_vld2_dup_s32(i32* %a) { -; CHECK-LABEL: test_vld2_dup_s32 -; CHECK: ld2r { {{v[0-9]+}}.2s, {{v[0-9]+}}.2s }, [x0] -entry: - %0 = bitcast i32* %a to i8* - %vld_dup = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32(i8* %0, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) - %1 = extractvalue { <2 x i32>, <2 x i32> } %vld_dup, 0 - %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer - %2 = extractvalue { <2 x i32>, <2 x i32> } %vld_dup, 1 - %lane1 = shufflevector <2 x i32> %2, <2 x i32> undef, <2 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %lane1, 0, 1 - ret %struct.int32x2x2_t %.fca.0.1.insert -} - -define %struct.int64x1x2_t @test_vld2_dup_s64(i64* %a) { -; CHECK-LABEL: test_vld2_dup_s64 -; CHECK: ld1 { {{v[0-9]+}}.1d, {{v[0-9]+}}.1d }, [x0] -entry: - %0 = bitcast i64* %a to i8* - %vld_dup = tail call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8* %0, i32 8) - %vld_dup.fca.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld_dup, 0 - %vld_dup.fca.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld_dup, 1 - %.fca.0.0.insert = insertvalue %struct.int64x1x2_t undef, <1 x i64> %vld_dup.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x1x2_t %.fca.0.0.insert, <1 x i64> %vld_dup.fca.1.extract, 0, 1 - ret %struct.int64x1x2_t %.fca.0.1.insert -} - -define %struct.float32x2x2_t @test_vld2_dup_f32(float* %a) { -; CHECK-LABEL: test_vld2_dup_f32 -; CHECK: ld2r { {{v[0-9]+}}.2s, {{v[0-9]+}}.2s }, [x0] -entry: - %0 = bitcast float* %a to i8* - %vld_dup = tail call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32(i8* %0, <2 x float> undef, <2 x float> undef, i32 0, i32 4) - %1 = extractvalue { <2 x float>, <2 x float> } %vld_dup, 0 - %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer - %2 = extractvalue { <2 x float>, <2 x float> } %vld_dup, 1 - %lane1 = shufflevector <2 x float> %2, <2 x float> undef, <2 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %lane1, 0, 1 - ret %struct.float32x2x2_t %.fca.0.1.insert -} - -define %struct.float64x1x2_t @test_vld2_dup_f64(double* %a) { -; CHECK-LABEL: test_vld2_dup_f64 -; CHECK: ld1 { {{v[0-9]+}}.1d, {{v[0-9]+}}.1d }, [x0] -entry: - %0 = bitcast double* %a to i8* - %vld_dup = tail call { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8* %0, i32 8) - %vld_dup.fca.0.extract = extractvalue { <1 x double>, <1 x double> } %vld_dup, 0 - %vld_dup.fca.1.extract = extractvalue { <1 x double>, <1 x double> } %vld_dup, 1 - %.fca.0.0.insert = insertvalue %struct.float64x1x2_t undef, <1 x double> %vld_dup.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x1x2_t %.fca.0.0.insert, <1 x double> %vld_dup.fca.1.extract, 0, 1 - ret %struct.float64x1x2_t %.fca.0.1.insert -} - -define %struct.int8x16x3_t @test_vld3q_dup_s8(i8* %a) { -; CHECK-LABEL: test_vld3q_dup_s8 -; CHECK: ld3r { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, [x0] -entry: - %vld_dup = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3lane.v16i8(i8* %a, <16 x i8> undef, <16 x i8> undef, <16 x i8> undef, i32 0, i32 1) - %0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 0 - %lane = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer - %1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 1 - %lane1 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer - %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 2 - %lane2 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int8x16x3_t undef, <16 x i8> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x16x3_t %.fca.0.0.insert, <16 x i8> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int8x16x3_t %.fca.0.1.insert, <16 x i8> %lane2, 0, 2 - ret %struct.int8x16x3_t %.fca.0.2.insert -} - -define %struct.int16x8x3_t @test_vld3q_dup_s16(i16* %a) { -; CHECK-LABEL: test_vld3q_dup_s16 -; CHECK: ld3r { {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h }, [x0] -entry: - %0 = bitcast i16* %a to i8* - %vld_dup = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8* %0, <8 x i16> undef, <8 x i16> undef, <8 x i16> undef, i32 0, i32 2) - %1 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 0 - %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer - %2 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 1 - %lane1 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer - %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 2 - %lane2 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int16x8x3_t undef, <8 x i16> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x8x3_t %.fca.0.0.insert, <8 x i16> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int16x8x3_t %.fca.0.1.insert, <8 x i16> %lane2, 0, 2 - ret %struct.int16x8x3_t %.fca.0.2.insert -} - -define %struct.int32x4x3_t @test_vld3q_dup_s32(i32* %a) { -; CHECK-LABEL: test_vld3q_dup_s32 -; CHECK: ld3r { {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s }, [x0] -entry: - %0 = bitcast i32* %a to i8* - %vld_dup = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8* %0, <4 x i32> undef, <4 x i32> undef, <4 x i32> undef, i32 0, i32 4) - %1 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 0 - %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer - %2 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 1 - %lane1 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer - %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 2 - %lane2 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int32x4x3_t undef, <4 x i32> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x4x3_t %.fca.0.0.insert, <4 x i32> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int32x4x3_t %.fca.0.1.insert, <4 x i32> %lane2, 0, 2 - ret %struct.int32x4x3_t %.fca.0.2.insert -} - -define %struct.int64x2x3_t @test_vld3q_dup_s64(i64* %a) { -; CHECK-LABEL: test_vld3q_dup_s64 -; CHECK: ld3r { {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d }, [x0] -entry: - %0 = bitcast i64* %a to i8* - %vld_dup = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3lane.v2i64(i8* %0, <2 x i64> undef, <2 x i64> undef, <2 x i64> undef, i32 0, i32 8) - %1 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 0 - %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer - %2 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 1 - %lane1 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer - %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 2 - %lane2 = shufflevector <2 x i64> %3, <2 x i64> undef, <2 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int64x2x3_t undef, <2 x i64> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x2x3_t %.fca.0.0.insert, <2 x i64> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int64x2x3_t %.fca.0.1.insert, <2 x i64> %lane2, 0, 2 - ret %struct.int64x2x3_t %.fca.0.2.insert -} - -define %struct.float32x4x3_t @test_vld3q_dup_f32(float* %a) { -; CHECK-LABEL: test_vld3q_dup_f32 -; CHECK: ld3r { {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s }, [x0] -entry: - %0 = bitcast float* %a to i8* - %vld_dup = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3lane.v4f32(i8* %0, <4 x float> undef, <4 x float> undef, <4 x float> undef, i32 0, i32 4) - %1 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld_dup, 0 - %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer - %2 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld_dup, 1 - %lane1 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> zeroinitializer - %3 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld_dup, 2 - %lane2 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.float32x4x3_t undef, <4 x float> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x4x3_t %.fca.0.0.insert, <4 x float> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float32x4x3_t %.fca.0.1.insert, <4 x float> %lane2, 0, 2 - ret %struct.float32x4x3_t %.fca.0.2.insert -} - -define %struct.float64x2x3_t @test_vld3q_dup_f64(double* %a) { -; CHECK-LABEL: test_vld3q_dup_f64 -; CHECK: ld3r { {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d }, [x0] -entry: - %0 = bitcast double* %a to i8* - %vld_dup = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3lane.v2f64(i8* %0, <2 x double> undef, <2 x double> undef, <2 x double> undef, i32 0, i32 8) - %1 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld_dup, 0 - %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer - %2 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld_dup, 1 - %lane1 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer - %3 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld_dup, 2 - %lane2 = shufflevector <2 x double> %3, <2 x double> undef, <2 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.float64x2x3_t undef, <2 x double> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x2x3_t %.fca.0.0.insert, <2 x double> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float64x2x3_t %.fca.0.1.insert, <2 x double> %lane2, 0, 2 - ret %struct.float64x2x3_t %.fca.0.2.insert -} - -define %struct.int8x8x3_t @test_vld3_dup_s8(i8* %a) { -; CHECK-LABEL: test_vld3_dup_s8 -; CHECK: ld3r { {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b }, [x0] -entry: - %vld_dup = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) - %0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 0 - %lane = shufflevector <8 x i8> %0, <8 x i8> undef, <8 x i32> zeroinitializer - %1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 1 - %lane1 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer - %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 2 - %lane2 = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int8x8x3_t undef, <8 x i8> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x8x3_t %.fca.0.0.insert, <8 x i8> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int8x8x3_t %.fca.0.1.insert, <8 x i8> %lane2, 0, 2 - ret %struct.int8x8x3_t %.fca.0.2.insert -} - -define %struct.int16x4x3_t @test_vld3_dup_s16(i16* %a) { -; CHECK-LABEL: test_vld3_dup_s16 -; CHECK: ld3r { {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h }, [x0] -entry: - %0 = bitcast i16* %a to i8* - %vld_dup = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8* %0, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) - %1 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 0 - %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer - %2 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 1 - %lane1 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> zeroinitializer - %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 2 - %lane2 = shufflevector <4 x i16> %3, <4 x i16> undef, <4 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int16x4x3_t undef, <4 x i16> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x4x3_t %.fca.0.0.insert, <4 x i16> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int16x4x3_t %.fca.0.1.insert, <4 x i16> %lane2, 0, 2 - ret %struct.int16x4x3_t %.fca.0.2.insert -} - -define %struct.int32x2x3_t @test_vld3_dup_s32(i32* %a) { -; CHECK-LABEL: test_vld3_dup_s32 -; CHECK: ld3r { {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s }, [x0] -entry: - %0 = bitcast i32* %a to i8* - %vld_dup = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32(i8* %0, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) - %1 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 0 - %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer - %2 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 1 - %lane1 = shufflevector <2 x i32> %2, <2 x i32> undef, <2 x i32> zeroinitializer - %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 2 - %lane2 = shufflevector <2 x i32> %3, <2 x i32> undef, <2 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int32x2x3_t undef, <2 x i32> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x2x3_t %.fca.0.0.insert, <2 x i32> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int32x2x3_t %.fca.0.1.insert, <2 x i32> %lane2, 0, 2 - ret %struct.int32x2x3_t %.fca.0.2.insert -} - -define %struct.int64x1x3_t @test_vld3_dup_s64(i64* %a) { -; CHECK-LABEL: test_vld3_dup_s64 -; CHECK: ld1 { {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d }, [x0] -entry: - %0 = bitcast i64* %a to i8* - %vld_dup = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8* %0, i32 8) - %vld_dup.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 0 - %vld_dup.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 1 - %vld_dup.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 2 - %.fca.0.0.insert = insertvalue %struct.int64x1x3_t undef, <1 x i64> %vld_dup.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x1x3_t %.fca.0.0.insert, <1 x i64> %vld_dup.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int64x1x3_t %.fca.0.1.insert, <1 x i64> %vld_dup.fca.2.extract, 0, 2 - ret %struct.int64x1x3_t %.fca.0.2.insert -} - -define %struct.float32x2x3_t @test_vld3_dup_f32(float* %a) { -; CHECK-LABEL: test_vld3_dup_f32 -; CHECK: ld3r { {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s }, [x0] -entry: - %0 = bitcast float* %a to i8* - %vld_dup = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8* %0, <2 x float> undef, <2 x float> undef, <2 x float> undef, i32 0, i32 4) - %1 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld_dup, 0 - %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer - %2 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld_dup, 1 - %lane1 = shufflevector <2 x float> %2, <2 x float> undef, <2 x i32> zeroinitializer - %3 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld_dup, 2 - %lane2 = shufflevector <2 x float> %3, <2 x float> undef, <2 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.float32x2x3_t undef, <2 x float> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x2x3_t %.fca.0.0.insert, <2 x float> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float32x2x3_t %.fca.0.1.insert, <2 x float> %lane2, 0, 2 - ret %struct.float32x2x3_t %.fca.0.2.insert -} - -define %struct.float64x1x3_t @test_vld3_dup_f64(double* %a) { -; CHECK-LABEL: test_vld3_dup_f64 -; CHECK: ld1 { {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d }, [x0] -entry: - %0 = bitcast double* %a to i8* - %vld_dup = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8* %0, i32 8) - %vld_dup.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld_dup, 0 - %vld_dup.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld_dup, 1 - %vld_dup.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld_dup, 2 - %.fca.0.0.insert = insertvalue %struct.float64x1x3_t undef, <1 x double> %vld_dup.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x1x3_t %.fca.0.0.insert, <1 x double> %vld_dup.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float64x1x3_t %.fca.0.1.insert, <1 x double> %vld_dup.fca.2.extract, 0, 2 - ret %struct.float64x1x3_t %.fca.0.2.insert -} - -define %struct.int8x16x4_t @test_vld4q_dup_s8(i8* %a) { -; CHECK-LABEL: test_vld4q_dup_s8 -; CHECK: ld4r { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, [x0] -entry: - %vld_dup = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4lane.v16i8(i8* %a, <16 x i8> undef, <16 x i8> undef, <16 x i8> undef, <16 x i8> undef, i32 0, i32 1) - %0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 0 - %lane = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer - %1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 1 - %lane1 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer - %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 2 - %lane2 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer - %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 3 - %lane3 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int8x16x4_t undef, <16 x i8> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x16x4_t %.fca.0.0.insert, <16 x i8> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int8x16x4_t %.fca.0.1.insert, <16 x i8> %lane2, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int8x16x4_t %.fca.0.2.insert, <16 x i8> %lane3, 0, 3 - ret %struct.int8x16x4_t %.fca.0.3.insert -} - -define %struct.int16x8x4_t @test_vld4q_dup_s16(i16* %a) { -; CHECK-LABEL: test_vld4q_dup_s16 -; CHECK: ld4r { {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h }, [x0] -entry: - %0 = bitcast i16* %a to i8* - %vld_dup = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16(i8* %0, <8 x i16> undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> undef, i32 0, i32 2) - %1 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 0 - %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer - %2 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 1 - %lane1 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer - %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 2 - %lane2 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer - %4 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 3 - %lane3 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int16x8x4_t undef, <8 x i16> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x8x4_t %.fca.0.0.insert, <8 x i16> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int16x8x4_t %.fca.0.1.insert, <8 x i16> %lane2, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int16x8x4_t %.fca.0.2.insert, <8 x i16> %lane3, 0, 3 - ret %struct.int16x8x4_t %.fca.0.3.insert -} - -define %struct.int32x4x4_t @test_vld4q_dup_s32(i32* %a) { -; CHECK-LABEL: test_vld4q_dup_s32 -; CHECK: ld4r { {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s }, [x0] -entry: - %0 = bitcast i32* %a to i8* - %vld_dup = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8* %0, <4 x i32> undef, <4 x i32> undef, <4 x i32> undef, <4 x i32> undef, i32 0, i32 4) - %1 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 0 - %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer - %2 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 1 - %lane1 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer - %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 2 - %lane2 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer - %4 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 3 - %lane3 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int32x4x4_t undef, <4 x i32> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x4x4_t %.fca.0.0.insert, <4 x i32> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int32x4x4_t %.fca.0.1.insert, <4 x i32> %lane2, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int32x4x4_t %.fca.0.2.insert, <4 x i32> %lane3, 0, 3 - ret %struct.int32x4x4_t %.fca.0.3.insert -} - -define %struct.int64x2x4_t @test_vld4q_dup_s64(i64* %a) { -; CHECK-LABEL: test_vld4q_dup_s64 -; CHECK: ld4r { {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d }, [x0] -entry: - %0 = bitcast i64* %a to i8* - %vld_dup = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4lane.v2i64(i8* %0, <2 x i64> undef, <2 x i64> undef, <2 x i64> undef, <2 x i64> undef, i32 0, i32 8) - %1 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 0 - %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer - %2 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 1 - %lane1 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer - %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 2 - %lane2 = shufflevector <2 x i64> %3, <2 x i64> undef, <2 x i32> zeroinitializer - %4 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 3 - %lane3 = shufflevector <2 x i64> %4, <2 x i64> undef, <2 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int64x2x4_t undef, <2 x i64> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x2x4_t %.fca.0.0.insert, <2 x i64> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int64x2x4_t %.fca.0.1.insert, <2 x i64> %lane2, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int64x2x4_t %.fca.0.2.insert, <2 x i64> %lane3, 0, 3 - ret %struct.int64x2x4_t %.fca.0.3.insert -} - -define %struct.float32x4x4_t @test_vld4q_dup_f32(float* %a) { -; CHECK-LABEL: test_vld4q_dup_f32 -; CHECK: ld4r { {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s }, [x0] -entry: - %0 = bitcast float* %a to i8* - %vld_dup = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4lane.v4f32(i8* %0, <4 x float> undef, <4 x float> undef, <4 x float> undef, <4 x float> undef, i32 0, i32 4) - %1 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld_dup, 0 - %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer - %2 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld_dup, 1 - %lane1 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> zeroinitializer - %3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld_dup, 2 - %lane2 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> zeroinitializer - %4 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld_dup, 3 - %lane3 = shufflevector <4 x float> %4, <4 x float> undef, <4 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.float32x4x4_t undef, <4 x float> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x4x4_t %.fca.0.0.insert, <4 x float> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float32x4x4_t %.fca.0.1.insert, <4 x float> %lane2, 0, 2 - %.fca.0.3.insert = insertvalue %struct.float32x4x4_t %.fca.0.2.insert, <4 x float> %lane3, 0, 3 - ret %struct.float32x4x4_t %.fca.0.3.insert -} - -define %struct.float64x2x4_t @test_vld4q_dup_f64(double* %a) { -; CHECK-LABEL: test_vld4q_dup_f64 -; CHECK: ld4r { {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d }, [x0] -entry: - %0 = bitcast double* %a to i8* - %vld_dup = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8* %0, <2 x double> undef, <2 x double> undef, <2 x double> undef, <2 x double> undef, i32 0, i32 8) - %1 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld_dup, 0 - %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer - %2 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld_dup, 1 - %lane1 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer - %3 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld_dup, 2 - %lane2 = shufflevector <2 x double> %3, <2 x double> undef, <2 x i32> zeroinitializer - %4 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld_dup, 3 - %lane3 = shufflevector <2 x double> %4, <2 x double> undef, <2 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.float64x2x4_t undef, <2 x double> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x2x4_t %.fca.0.0.insert, <2 x double> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float64x2x4_t %.fca.0.1.insert, <2 x double> %lane2, 0, 2 - %.fca.0.3.insert = insertvalue %struct.float64x2x4_t %.fca.0.2.insert, <2 x double> %lane3, 0, 3 - ret %struct.float64x2x4_t %.fca.0.3.insert -} - -define %struct.int8x8x4_t @test_vld4_dup_s8(i8* %a) { -; CHECK-LABEL: test_vld4_dup_s8 -; CHECK: ld4r { {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b }, [x0] -entry: - %vld_dup = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) - %0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 0 - %lane = shufflevector <8 x i8> %0, <8 x i8> undef, <8 x i32> zeroinitializer - %1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 1 - %lane1 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer - %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 2 - %lane2 = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer - %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 3 - %lane3 = shufflevector <8 x i8> %3, <8 x i8> undef, <8 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int8x8x4_t undef, <8 x i8> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x8x4_t %.fca.0.0.insert, <8 x i8> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int8x8x4_t %.fca.0.1.insert, <8 x i8> %lane2, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int8x8x4_t %.fca.0.2.insert, <8 x i8> %lane3, 0, 3 - ret %struct.int8x8x4_t %.fca.0.3.insert -} - -define %struct.int16x4x4_t @test_vld4_dup_s16(i16* %a) { -; CHECK-LABEL: test_vld4_dup_s16 -; CHECK: ld4r { {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h }, [x0] -entry: - %0 = bitcast i16* %a to i8* - %vld_dup = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16(i8* %0, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) - %1 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 0 - %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer - %2 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 1 - %lane1 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> zeroinitializer - %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 2 - %lane2 = shufflevector <4 x i16> %3, <4 x i16> undef, <4 x i32> zeroinitializer - %4 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 3 - %lane3 = shufflevector <4 x i16> %4, <4 x i16> undef, <4 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int16x4x4_t undef, <4 x i16> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x4x4_t %.fca.0.0.insert, <4 x i16> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int16x4x4_t %.fca.0.1.insert, <4 x i16> %lane2, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int16x4x4_t %.fca.0.2.insert, <4 x i16> %lane3, 0, 3 - ret %struct.int16x4x4_t %.fca.0.3.insert -} - -define %struct.int32x2x4_t @test_vld4_dup_s32(i32* %a) { -; CHECK-LABEL: test_vld4_dup_s32 -; CHECK: ld4r { {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s }, [x0] -entry: - %0 = bitcast i32* %a to i8* - %vld_dup = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* %0, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) - %1 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 0 - %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer - %2 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 1 - %lane1 = shufflevector <2 x i32> %2, <2 x i32> undef, <2 x i32> zeroinitializer - %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 2 - %lane2 = shufflevector <2 x i32> %3, <2 x i32> undef, <2 x i32> zeroinitializer - %4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 3 - %lane3 = shufflevector <2 x i32> %4, <2 x i32> undef, <2 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int32x2x4_t undef, <2 x i32> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x2x4_t %.fca.0.0.insert, <2 x i32> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int32x2x4_t %.fca.0.1.insert, <2 x i32> %lane2, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int32x2x4_t %.fca.0.2.insert, <2 x i32> %lane3, 0, 3 - ret %struct.int32x2x4_t %.fca.0.3.insert -} - -define %struct.int64x1x4_t @test_vld4_dup_s64(i64* %a) { -; CHECK-LABEL: test_vld4_dup_s64 -; CHECK: ld1 { {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d }, [x0] -entry: - %0 = bitcast i64* %a to i8* - %vld_dup = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8* %0, i32 8) - %vld_dup.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 0 - %vld_dup.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 1 - %vld_dup.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 2 - %vld_dup.fca.3.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 3 - %.fca.0.0.insert = insertvalue %struct.int64x1x4_t undef, <1 x i64> %vld_dup.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x1x4_t %.fca.0.0.insert, <1 x i64> %vld_dup.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int64x1x4_t %.fca.0.1.insert, <1 x i64> %vld_dup.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int64x1x4_t %.fca.0.2.insert, <1 x i64> %vld_dup.fca.3.extract, 0, 3 - ret %struct.int64x1x4_t %.fca.0.3.insert -} - -define %struct.float32x2x4_t @test_vld4_dup_f32(float* %a) { -; CHECK-LABEL: test_vld4_dup_f32 -; CHECK: ld4r { {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s }, [x0] -entry: - %0 = bitcast float* %a to i8* - %vld_dup = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32(i8* %0, <2 x float> undef, <2 x float> undef, <2 x float> undef, <2 x float> undef, i32 0, i32 4) - %1 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld_dup, 0 - %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer - %2 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld_dup, 1 - %lane1 = shufflevector <2 x float> %2, <2 x float> undef, <2 x i32> zeroinitializer - %3 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld_dup, 2 - %lane2 = shufflevector <2 x float> %3, <2 x float> undef, <2 x i32> zeroinitializer - %4 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld_dup, 3 - %lane3 = shufflevector <2 x float> %4, <2 x float> undef, <2 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.float32x2x4_t undef, <2 x float> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x2x4_t %.fca.0.0.insert, <2 x float> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float32x2x4_t %.fca.0.1.insert, <2 x float> %lane2, 0, 2 - %.fca.0.3.insert = insertvalue %struct.float32x2x4_t %.fca.0.2.insert, <2 x float> %lane3, 0, 3 - ret %struct.float32x2x4_t %.fca.0.3.insert -} - -define %struct.float64x1x4_t @test_vld4_dup_f64(double* %a) { -; CHECK-LABEL: test_vld4_dup_f64 -; CHECK: ld1 { {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d }, [x0] -entry: - %0 = bitcast double* %a to i8* - %vld_dup = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8* %0, i32 8) - %vld_dup.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld_dup, 0 - %vld_dup.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld_dup, 1 - %vld_dup.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld_dup, 2 - %vld_dup.fca.3.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld_dup, 3 - %.fca.0.0.insert = insertvalue %struct.float64x1x4_t undef, <1 x double> %vld_dup.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x1x4_t %.fca.0.0.insert, <1 x double> %vld_dup.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float64x1x4_t %.fca.0.1.insert, <1 x double> %vld_dup.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.float64x1x4_t %.fca.0.2.insert, <1 x double> %vld_dup.fca.3.extract, 0, 3 - ret %struct.float64x1x4_t %.fca.0.3.insert -} - -define <16 x i8> @test_vld1q_lane_s8(i8* %a, <16 x i8> %b) { -; CHECK-LABEL: test_vld1q_lane_s8 -; CHECK: ld1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %0 = load i8* %a, align 1 - %vld1_lane = insertelement <16 x i8> %b, i8 %0, i32 15 - ret <16 x i8> %vld1_lane -} - -define <8 x i16> @test_vld1q_lane_s16(i16* %a, <8 x i16> %b) { -; CHECK-LABEL: test_vld1q_lane_s16 -; CHECK: ld1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %0 = load i16* %a, align 2 - %vld1_lane = insertelement <8 x i16> %b, i16 %0, i32 7 - ret <8 x i16> %vld1_lane -} - -define <4 x i32> @test_vld1q_lane_s32(i32* %a, <4 x i32> %b) { -; CHECK-LABEL: test_vld1q_lane_s32 -; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %0 = load i32* %a, align 4 - %vld1_lane = insertelement <4 x i32> %b, i32 %0, i32 3 - ret <4 x i32> %vld1_lane -} - -define <2 x i64> @test_vld1q_lane_s64(i64* %a, <2 x i64> %b) { -; CHECK-LABEL: test_vld1q_lane_s64 -; CHECK: ld1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %0 = load i64* %a, align 8 - %vld1_lane = insertelement <2 x i64> %b, i64 %0, i32 1 - ret <2 x i64> %vld1_lane -} - -define <4 x float> @test_vld1q_lane_f32(float* %a, <4 x float> %b) { -; CHECK-LABEL: test_vld1q_lane_f32 -; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %0 = load float* %a, align 4 - %vld1_lane = insertelement <4 x float> %b, float %0, i32 3 - ret <4 x float> %vld1_lane -} - -define <2 x double> @test_vld1q_lane_f64(double* %a, <2 x double> %b) { -; CHECK-LABEL: test_vld1q_lane_f64 -; CHECK: ld1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %0 = load double* %a, align 8 - %vld1_lane = insertelement <2 x double> %b, double %0, i32 1 - ret <2 x double> %vld1_lane -} - -define <8 x i8> @test_vld1_lane_s8(i8* %a, <8 x i8> %b) { -; CHECK-LABEL: test_vld1_lane_s8 -; CHECK: ld1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %0 = load i8* %a, align 1 - %vld1_lane = insertelement <8 x i8> %b, i8 %0, i32 7 - ret <8 x i8> %vld1_lane -} - -define <4 x i16> @test_vld1_lane_s16(i16* %a, <4 x i16> %b) { -; CHECK-LABEL: test_vld1_lane_s16 -; CHECK: ld1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %0 = load i16* %a, align 2 - %vld1_lane = insertelement <4 x i16> %b, i16 %0, i32 3 - ret <4 x i16> %vld1_lane -} - -define <2 x i32> @test_vld1_lane_s32(i32* %a, <2 x i32> %b) { -; CHECK-LABEL: test_vld1_lane_s32 -; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %0 = load i32* %a, align 4 - %vld1_lane = insertelement <2 x i32> %b, i32 %0, i32 1 - ret <2 x i32> %vld1_lane -} - -define <1 x i64> @test_vld1_lane_s64(i64* %a, <1 x i64> %b) { -; CHECK-LABEL: test_vld1_lane_s64 -; CHECK: ld1r { {{v[0-9]+}}.1d }, [x0] -entry: - %0 = load i64* %a, align 8 - %vld1_lane = insertelement <1 x i64> undef, i64 %0, i32 0 - ret <1 x i64> %vld1_lane -} - -define <2 x float> @test_vld1_lane_f32(float* %a, <2 x float> %b) { -; CHECK-LABEL: test_vld1_lane_f32 -; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %0 = load float* %a, align 4 - %vld1_lane = insertelement <2 x float> %b, float %0, i32 1 - ret <2 x float> %vld1_lane -} - -define <1 x double> @test_vld1_lane_f64(double* %a, <1 x double> %b) { -; CHECK-LABEL: test_vld1_lane_f64 -; CHECK: ld1r { {{v[0-9]+}}.1d }, [x0] -entry: - %0 = load double* %a, align 8 - %vld1_lane = insertelement <1 x double> undef, double %0, i32 0 - ret <1 x double> %vld1_lane -} - -define %struct.int16x8x2_t @test_vld2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) { -; CHECK-LABEL: test_vld2q_lane_s16 -; CHECK: ld2 { {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <8 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <8 x i16>] %b.coerce, 1 - %0 = bitcast i16* %a to i8* - %vld2_lane = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, i32 7, i32 2) - %vld2_lane.fca.0.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.int16x8x2_t %.fca.0.1.insert -} - -define %struct.int32x4x2_t @test_vld2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) { -; CHECK-LABEL: test_vld2q_lane_s32 -; CHECK: ld2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %b.coerce, 1 - %0 = bitcast i32* %a to i8* - %vld2_lane = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, i32 3, i32 4) - %vld2_lane.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.int32x4x2_t %.fca.0.1.insert -} - -define %struct.int64x2x2_t @test_vld2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) { -; CHECK-LABEL: test_vld2q_lane_s64 -; CHECK: ld2 { {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <2 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <2 x i64>] %b.coerce, 1 - %0 = bitcast i64* %a to i8* - %vld2_lane = tail call { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, i32 1, i32 8) - %vld2_lane.fca.0.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.int64x2x2_t undef, <2 x i64> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x2x2_t %.fca.0.0.insert, <2 x i64> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.int64x2x2_t %.fca.0.1.insert -} - -define %struct.float32x4x2_t @test_vld2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) { -; CHECK-LABEL: test_vld2q_lane_f32 -; CHECK: ld2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <4 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <4 x float>] %b.coerce, 1 - %0 = bitcast float* %a to i8* - %vld2_lane = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, i32 3, i32 4) - %vld2_lane.fca.0.extract = extractvalue { <4 x float>, <4 x float> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <4 x float>, <4 x float> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.float32x4x2_t %.fca.0.1.insert -} - -define %struct.float64x2x2_t @test_vld2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) { -; CHECK-LABEL: test_vld2q_lane_f64 -; CHECK: ld2 { {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <2 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <2 x double>] %b.coerce, 1 - %0 = bitcast double* %a to i8* - %vld2_lane = tail call { <2 x double>, <2 x double> } @llvm.arm.neon.vld2lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, i32 1, i32 8) - %vld2_lane.fca.0.extract = extractvalue { <2 x double>, <2 x double> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <2 x double>, <2 x double> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.float64x2x2_t undef, <2 x double> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x2x2_t %.fca.0.0.insert, <2 x double> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.float64x2x2_t %.fca.0.1.insert -} - -define %struct.int8x8x2_t @test_vld2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) { -; CHECK-LABEL: test_vld2_lane_s8 -; CHECK: ld2 { {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <8 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <8 x i8>] %b.coerce, 1 - %vld2_lane = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, i32 7, i32 1) - %vld2_lane.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.int8x8x2_t %.fca.0.1.insert -} - -define %struct.int16x4x2_t @test_vld2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) { -; CHECK-LABEL: test_vld2_lane_s16 -; CHECK: ld2 { {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <4 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <4 x i16>] %b.coerce, 1 - %0 = bitcast i16* %a to i8* - %vld2_lane = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, i32 3, i32 2) - %vld2_lane.fca.0.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.int16x4x2_t %.fca.0.1.insert -} - -define %struct.int32x2x2_t @test_vld2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) { -; CHECK-LABEL: test_vld2_lane_s32 -; CHECK: ld2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <2 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <2 x i32>] %b.coerce, 1 - %0 = bitcast i32* %a to i8* - %vld2_lane = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, i32 1, i32 4) - %vld2_lane.fca.0.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.int32x2x2_t %.fca.0.1.insert -} - -define %struct.int64x1x2_t @test_vld2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) { -; CHECK-LABEL: test_vld2_lane_s64 -; CHECK: ld2 { {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <1 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <1 x i64>] %b.coerce, 1 - %0 = bitcast i64* %a to i8* - %vld2_lane = tail call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, i32 0, i32 8) - %vld2_lane.fca.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.int64x1x2_t undef, <1 x i64> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x1x2_t %.fca.0.0.insert, <1 x i64> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.int64x1x2_t %.fca.0.1.insert -} - -define %struct.float32x2x2_t @test_vld2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) { -; CHECK-LABEL: test_vld2_lane_f32 -; CHECK: ld2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <2 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <2 x float>] %b.coerce, 1 - %0 = bitcast float* %a to i8* - %vld2_lane = tail call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, i32 1, i32 4) - %vld2_lane.fca.0.extract = extractvalue { <2 x float>, <2 x float> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <2 x float>, <2 x float> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.float32x2x2_t %.fca.0.1.insert -} - -define %struct.float64x1x2_t @test_vld2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) { -; CHECK-LABEL: test_vld2_lane_f64 -; CHECK: ld2 { {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <1 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <1 x double>] %b.coerce, 1 - %0 = bitcast double* %a to i8* - %vld2_lane = tail call { <1 x double>, <1 x double> } @llvm.arm.neon.vld2lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, i32 0, i32 8) - %vld2_lane.fca.0.extract = extractvalue { <1 x double>, <1 x double> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <1 x double>, <1 x double> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.float64x1x2_t undef, <1 x double> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x1x2_t %.fca.0.0.insert, <1 x double> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.float64x1x2_t %.fca.0.1.insert -} - -define %struct.int16x8x3_t @test_vld3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) { -; CHECK-LABEL: test_vld3q_lane_s16 -; CHECK: ld3 { {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <8 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <8 x i16>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <8 x i16>] %b.coerce, 2 - %0 = bitcast i16* %a to i8* - %vld3_lane = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, i32 7, i32 2) - %vld3_lane.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.int16x8x3_t undef, <8 x i16> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x8x3_t %.fca.0.0.insert, <8 x i16> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int16x8x3_t %.fca.0.1.insert, <8 x i16> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.int16x8x3_t %.fca.0.2.insert -} - -define %struct.int32x4x3_t @test_vld3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) { -; CHECK-LABEL: test_vld3q_lane_s32 -; CHECK: ld3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <4 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <4 x i32>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <4 x i32>] %b.coerce, 2 - %0 = bitcast i32* %a to i8* - %vld3_lane = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, i32 3, i32 4) - %vld3_lane.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.int32x4x3_t undef, <4 x i32> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x4x3_t %.fca.0.0.insert, <4 x i32> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int32x4x3_t %.fca.0.1.insert, <4 x i32> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.int32x4x3_t %.fca.0.2.insert -} - -define %struct.int64x2x3_t @test_vld3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) { -; CHECK-LABEL: test_vld3q_lane_s64 -; CHECK: ld3 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <2 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <2 x i64>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <2 x i64>] %b.coerce, 2 - %0 = bitcast i64* %a to i8* - %vld3_lane = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, i32 1, i32 8) - %vld3_lane.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.int64x2x3_t undef, <2 x i64> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x2x3_t %.fca.0.0.insert, <2 x i64> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int64x2x3_t %.fca.0.1.insert, <2 x i64> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.int64x2x3_t %.fca.0.2.insert -} - -define %struct.float32x4x3_t @test_vld3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) { -; CHECK-LABEL: test_vld3q_lane_f32 -; CHECK: ld3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <4 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <4 x float>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <4 x float>] %b.coerce, 2 - %0 = bitcast float* %a to i8* - %vld3_lane = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, i32 3, i32 4) - %vld3_lane.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.float32x4x3_t undef, <4 x float> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x4x3_t %.fca.0.0.insert, <4 x float> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float32x4x3_t %.fca.0.1.insert, <4 x float> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.float32x4x3_t %.fca.0.2.insert -} - -define %struct.float64x2x3_t @test_vld3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) { -; CHECK-LABEL: test_vld3q_lane_f64 -; CHECK: ld3 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <2 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <2 x double>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <2 x double>] %b.coerce, 2 - %0 = bitcast double* %a to i8* - %vld3_lane = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, i32 1, i32 8) - %vld3_lane.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.float64x2x3_t undef, <2 x double> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x2x3_t %.fca.0.0.insert, <2 x double> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float64x2x3_t %.fca.0.1.insert, <2 x double> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.float64x2x3_t %.fca.0.2.insert -} - -define %struct.int8x8x3_t @test_vld3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) { -; CHECK-LABEL: test_vld3_lane_s8 -; CHECK: ld3 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <8 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <8 x i8>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <8 x i8>] %b.coerce, 2 - %vld3_lane = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, i32 7, i32 1) - %vld3_lane.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.int8x8x3_t undef, <8 x i8> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x8x3_t %.fca.0.0.insert, <8 x i8> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int8x8x3_t %.fca.0.1.insert, <8 x i8> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.int8x8x3_t %.fca.0.2.insert -} - -define %struct.int16x4x3_t @test_vld3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) { -; CHECK-LABEL: test_vld3_lane_s16 -; CHECK: ld3 { {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <4 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <4 x i16>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <4 x i16>] %b.coerce, 2 - %0 = bitcast i16* %a to i8* - %vld3_lane = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, i32 3, i32 2) - %vld3_lane.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.int16x4x3_t undef, <4 x i16> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x4x3_t %.fca.0.0.insert, <4 x i16> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int16x4x3_t %.fca.0.1.insert, <4 x i16> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.int16x4x3_t %.fca.0.2.insert -} - -define %struct.int32x2x3_t @test_vld3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) { -; CHECK-LABEL: test_vld3_lane_s32 -; CHECK: ld3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <2 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <2 x i32>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <2 x i32>] %b.coerce, 2 - %0 = bitcast i32* %a to i8* - %vld3_lane = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, i32 1, i32 4) - %vld3_lane.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.int32x2x3_t undef, <2 x i32> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x2x3_t %.fca.0.0.insert, <2 x i32> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int32x2x3_t %.fca.0.1.insert, <2 x i32> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.int32x2x3_t %.fca.0.2.insert -} - -define %struct.int64x1x3_t @test_vld3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) { -; CHECK-LABEL: test_vld3_lane_s64 -; CHECK: ld3 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <1 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <1 x i64>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <1 x i64>] %b.coerce, 2 - %0 = bitcast i64* %a to i8* - %vld3_lane = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, i32 0, i32 8) - %vld3_lane.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.int64x1x3_t undef, <1 x i64> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x1x3_t %.fca.0.0.insert, <1 x i64> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int64x1x3_t %.fca.0.1.insert, <1 x i64> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.int64x1x3_t %.fca.0.2.insert -} - -define %struct.float32x2x3_t @test_vld3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) { -; CHECK-LABEL: test_vld3_lane_f32 -; CHECK: ld3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <2 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <2 x float>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <2 x float>] %b.coerce, 2 - %0 = bitcast float* %a to i8* - %vld3_lane = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, i32 1, i32 4) - %vld3_lane.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.float32x2x3_t undef, <2 x float> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x2x3_t %.fca.0.0.insert, <2 x float> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float32x2x3_t %.fca.0.1.insert, <2 x float> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.float32x2x3_t %.fca.0.2.insert -} - -define %struct.float64x1x3_t @test_vld3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) { -; CHECK-LABEL: test_vld3_lane_f64 -; CHECK: ld3 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <1 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <1 x double>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <1 x double>] %b.coerce, 2 - %0 = bitcast double* %a to i8* - %vld3_lane = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, i32 0, i32 8) - %vld3_lane.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.float64x1x3_t undef, <1 x double> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x1x3_t %.fca.0.0.insert, <1 x double> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float64x1x3_t %.fca.0.1.insert, <1 x double> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.float64x1x3_t %.fca.0.2.insert -} - -define %struct.int8x16x4_t @test_vld4q_lane_s8(i8* %a, [4 x <16 x i8>] %b.coerce) { -; CHECK-LABEL: test_vld4q_lane_s8 -; CHECK: ld4 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <16 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <16 x i8>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <16 x i8>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <16 x i8>] %b.coerce, 3 - %vld3_lane = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4lane.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, <16 x i8> %b.coerce.fca.3.extract, i32 15, i32 1) - %vld3_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 2 - %vld3_lane.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 3 - %.fca.0.0.insert = insertvalue %struct.int8x16x4_t undef, <16 x i8> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x16x4_t %.fca.0.0.insert, <16 x i8> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int8x16x4_t %.fca.0.1.insert, <16 x i8> %vld3_lane.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int8x16x4_t %.fca.0.2.insert, <16 x i8> %vld3_lane.fca.3.extract, 0, 3 - ret %struct.int8x16x4_t %.fca.0.3.insert -} - -define %struct.int16x8x4_t @test_vld4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) { -; CHECK-LABEL: test_vld4q_lane_s16 -; CHECK: ld4 { {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <8 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <8 x i16>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <8 x i16>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <8 x i16>] %b.coerce, 3 - %0 = bitcast i16* %a to i8* - %vld3_lane = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, <8 x i16> %b.coerce.fca.3.extract, i32 7, i32 2) - %vld3_lane.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 2 - %vld3_lane.fca.3.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 3 - %.fca.0.0.insert = insertvalue %struct.int16x8x4_t undef, <8 x i16> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x8x4_t %.fca.0.0.insert, <8 x i16> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int16x8x4_t %.fca.0.1.insert, <8 x i16> %vld3_lane.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int16x8x4_t %.fca.0.2.insert, <8 x i16> %vld3_lane.fca.3.extract, 0, 3 - ret %struct.int16x8x4_t %.fca.0.3.insert -} - -define %struct.int32x4x4_t @test_vld4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) { -; CHECK-LABEL: test_vld4q_lane_s32 -; CHECK: ld4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <4 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <4 x i32>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <4 x i32>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <4 x i32>] %b.coerce, 3 - %0 = bitcast i32* %a to i8* - %vld3_lane = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, <4 x i32> %b.coerce.fca.3.extract, i32 3, i32 4) - %vld3_lane.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 2 - %vld3_lane.fca.3.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 3 - %.fca.0.0.insert = insertvalue %struct.int32x4x4_t undef, <4 x i32> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x4x4_t %.fca.0.0.insert, <4 x i32> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int32x4x4_t %.fca.0.1.insert, <4 x i32> %vld3_lane.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int32x4x4_t %.fca.0.2.insert, <4 x i32> %vld3_lane.fca.3.extract, 0, 3 - ret %struct.int32x4x4_t %.fca.0.3.insert -} - -define %struct.int64x2x4_t @test_vld4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) { -; CHECK-LABEL: test_vld4q_lane_s64 -; CHECK: ld4 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <2 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <2 x i64>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <2 x i64>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <2 x i64>] %b.coerce, 3 - %0 = bitcast i64* %a to i8* - %vld3_lane = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, <2 x i64> %b.coerce.fca.3.extract, i32 1, i32 8) - %vld3_lane.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 2 - %vld3_lane.fca.3.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 3 - %.fca.0.0.insert = insertvalue %struct.int64x2x4_t undef, <2 x i64> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x2x4_t %.fca.0.0.insert, <2 x i64> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int64x2x4_t %.fca.0.1.insert, <2 x i64> %vld3_lane.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int64x2x4_t %.fca.0.2.insert, <2 x i64> %vld3_lane.fca.3.extract, 0, 3 - ret %struct.int64x2x4_t %.fca.0.3.insert -} - -define %struct.float32x4x4_t @test_vld4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) { -; CHECK-LABEL: test_vld4q_lane_f32 -; CHECK: ld4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <4 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <4 x float>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <4 x float>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <4 x float>] %b.coerce, 3 - %0 = bitcast float* %a to i8* - %vld3_lane = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, <4 x float> %b.coerce.fca.3.extract, i32 3, i32 4) - %vld3_lane.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 2 - %vld3_lane.fca.3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 3 - %.fca.0.0.insert = insertvalue %struct.float32x4x4_t undef, <4 x float> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x4x4_t %.fca.0.0.insert, <4 x float> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float32x4x4_t %.fca.0.1.insert, <4 x float> %vld3_lane.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.float32x4x4_t %.fca.0.2.insert, <4 x float> %vld3_lane.fca.3.extract, 0, 3 - ret %struct.float32x4x4_t %.fca.0.3.insert -} - -define %struct.float64x2x4_t @test_vld4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) { -; CHECK-LABEL: test_vld4q_lane_f64 -; CHECK: ld4 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <2 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <2 x double>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <2 x double>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <2 x double>] %b.coerce, 3 - %0 = bitcast double* %a to i8* - %vld3_lane = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, <2 x double> %b.coerce.fca.3.extract, i32 1, i32 8) - %vld3_lane.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 2 - %vld3_lane.fca.3.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 3 - %.fca.0.0.insert = insertvalue %struct.float64x2x4_t undef, <2 x double> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x2x4_t %.fca.0.0.insert, <2 x double> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float64x2x4_t %.fca.0.1.insert, <2 x double> %vld3_lane.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.float64x2x4_t %.fca.0.2.insert, <2 x double> %vld3_lane.fca.3.extract, 0, 3 - ret %struct.float64x2x4_t %.fca.0.3.insert -} - -define %struct.int8x8x4_t @test_vld4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) { -; CHECK-LABEL: test_vld4_lane_s8 -; CHECK: ld4 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <8 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <8 x i8>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <8 x i8>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <8 x i8>] %b.coerce, 3 - %vld3_lane = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, <8 x i8> %b.coerce.fca.3.extract, i32 7, i32 1) - %vld3_lane.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 2 - %vld3_lane.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 3 - %.fca.0.0.insert = insertvalue %struct.int8x8x4_t undef, <8 x i8> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x8x4_t %.fca.0.0.insert, <8 x i8> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int8x8x4_t %.fca.0.1.insert, <8 x i8> %vld3_lane.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int8x8x4_t %.fca.0.2.insert, <8 x i8> %vld3_lane.fca.3.extract, 0, 3 - ret %struct.int8x8x4_t %.fca.0.3.insert -} - -define %struct.int16x4x4_t @test_vld4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) { -; CHECK-LABEL: test_vld4_lane_s16 -; CHECK: ld4 { {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <4 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <4 x i16>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <4 x i16>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <4 x i16>] %b.coerce, 3 - %0 = bitcast i16* %a to i8* - %vld3_lane = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, <4 x i16> %b.coerce.fca.3.extract, i32 3, i32 2) - %vld3_lane.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 2 - %vld3_lane.fca.3.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 3 - %.fca.0.0.insert = insertvalue %struct.int16x4x4_t undef, <4 x i16> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x4x4_t %.fca.0.0.insert, <4 x i16> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int16x4x4_t %.fca.0.1.insert, <4 x i16> %vld3_lane.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int16x4x4_t %.fca.0.2.insert, <4 x i16> %vld3_lane.fca.3.extract, 0, 3 - ret %struct.int16x4x4_t %.fca.0.3.insert -} - -define %struct.int32x2x4_t @test_vld4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) { -; CHECK-LABEL: test_vld4_lane_s32 -; CHECK: ld4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <2 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <2 x i32>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <2 x i32>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <2 x i32>] %b.coerce, 3 - %0 = bitcast i32* %a to i8* - %vld3_lane = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, <2 x i32> %b.coerce.fca.3.extract, i32 1, i32 4) - %vld3_lane.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 2 - %vld3_lane.fca.3.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 3 - %.fca.0.0.insert = insertvalue %struct.int32x2x4_t undef, <2 x i32> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x2x4_t %.fca.0.0.insert, <2 x i32> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int32x2x4_t %.fca.0.1.insert, <2 x i32> %vld3_lane.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int32x2x4_t %.fca.0.2.insert, <2 x i32> %vld3_lane.fca.3.extract, 0, 3 - ret %struct.int32x2x4_t %.fca.0.3.insert -} - -define %struct.int64x1x4_t @test_vld4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) { -; CHECK-LABEL: test_vld4_lane_s64 -; CHECK: ld4 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <1 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <1 x i64>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <1 x i64>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <1 x i64>] %b.coerce, 3 - %0 = bitcast i64* %a to i8* - %vld3_lane = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, <1 x i64> %b.coerce.fca.3.extract, i32 0, i32 8) - %vld3_lane.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 2 - %vld3_lane.fca.3.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 3 - %.fca.0.0.insert = insertvalue %struct.int64x1x4_t undef, <1 x i64> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x1x4_t %.fca.0.0.insert, <1 x i64> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int64x1x4_t %.fca.0.1.insert, <1 x i64> %vld3_lane.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int64x1x4_t %.fca.0.2.insert, <1 x i64> %vld3_lane.fca.3.extract, 0, 3 - ret %struct.int64x1x4_t %.fca.0.3.insert -} - -define %struct.float32x2x4_t @test_vld4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) { -; CHECK-LABEL: test_vld4_lane_f32 -; CHECK: ld4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <2 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <2 x float>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <2 x float>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <2 x float>] %b.coerce, 3 - %0 = bitcast float* %a to i8* - %vld3_lane = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, <2 x float> %b.coerce.fca.3.extract, i32 1, i32 4) - %vld3_lane.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 2 - %vld3_lane.fca.3.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 3 - %.fca.0.0.insert = insertvalue %struct.float32x2x4_t undef, <2 x float> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x2x4_t %.fca.0.0.insert, <2 x float> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float32x2x4_t %.fca.0.1.insert, <2 x float> %vld3_lane.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.float32x2x4_t %.fca.0.2.insert, <2 x float> %vld3_lane.fca.3.extract, 0, 3 - ret %struct.float32x2x4_t %.fca.0.3.insert -} - -define %struct.float64x1x4_t @test_vld4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) { -; CHECK-LABEL: test_vld4_lane_f64 -; CHECK: ld4 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <1 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <1 x double>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <1 x double>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <1 x double>] %b.coerce, 3 - %0 = bitcast double* %a to i8* - %vld3_lane = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, <1 x double> %b.coerce.fca.3.extract, i32 0, i32 8) - %vld3_lane.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 2 - %vld3_lane.fca.3.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 3 - %.fca.0.0.insert = insertvalue %struct.float64x1x4_t undef, <1 x double> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x1x4_t %.fca.0.0.insert, <1 x double> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float64x1x4_t %.fca.0.1.insert, <1 x double> %vld3_lane.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.float64x1x4_t %.fca.0.2.insert, <1 x double> %vld3_lane.fca.3.extract, 0, 3 - ret %struct.float64x1x4_t %.fca.0.3.insert -} - -define void @test_vst1q_lane_s8(i8* %a, <16 x i8> %b) { -; CHECK-LABEL: test_vst1q_lane_s8 -; CHECK: st1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %0 = extractelement <16 x i8> %b, i32 15 - store i8 %0, i8* %a, align 1 - ret void -} - -define void @test_vst1q_lane_s16(i16* %a, <8 x i16> %b) { -; CHECK-LABEL: test_vst1q_lane_s16 -; CHECK: st1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %0 = extractelement <8 x i16> %b, i32 7 - store i16 %0, i16* %a, align 2 - ret void -} - -define void @test_vst1q_lane_s32(i32* %a, <4 x i32> %b) { -; CHECK-LABEL: test_vst1q_lane_s32 -; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %0 = extractelement <4 x i32> %b, i32 3 - store i32 %0, i32* %a, align 4 - ret void -} - -define void @test_vst1q_lane_s64(i64* %a, <2 x i64> %b) { -; CHECK-LABEL: test_vst1q_lane_s64 -; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %0 = extractelement <2 x i64> %b, i32 1 - store i64 %0, i64* %a, align 8 - ret void -} - -define void @test_vst1q_lane_f32(float* %a, <4 x float> %b) { -; CHECK-LABEL: test_vst1q_lane_f32 -; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %0 = extractelement <4 x float> %b, i32 3 - store float %0, float* %a, align 4 - ret void -} - -define void @test_vst1q_lane_f64(double* %a, <2 x double> %b) { -; CHECK-LABEL: test_vst1q_lane_f64 -; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %0 = extractelement <2 x double> %b, i32 1 - store double %0, double* %a, align 8 - ret void -} - -define void @test_vst1_lane_s8(i8* %a, <8 x i8> %b) { -; CHECK-LABEL: test_vst1_lane_s8 -; CHECK: st1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %0 = extractelement <8 x i8> %b, i32 7 - store i8 %0, i8* %a, align 1 - ret void -} - -define void @test_vst1_lane_s16(i16* %a, <4 x i16> %b) { -; CHECK-LABEL: test_vst1_lane_s16 -; CHECK: st1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %0 = extractelement <4 x i16> %b, i32 3 - store i16 %0, i16* %a, align 2 - ret void -} - -define void @test_vst1_lane_s32(i32* %a, <2 x i32> %b) { -; CHECK-LABEL: test_vst1_lane_s32 -; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %0 = extractelement <2 x i32> %b, i32 1 - store i32 %0, i32* %a, align 4 - ret void -} - -define void @test_vst1_lane_s64(i64* %a, <1 x i64> %b) { -; CHECK-LABEL: test_vst1_lane_s64 -; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %0 = extractelement <1 x i64> %b, i32 0 - store i64 %0, i64* %a, align 8 - ret void -} - -define void @test_vst1_lane_f32(float* %a, <2 x float> %b) { -; CHECK-LABEL: test_vst1_lane_f32 -; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %0 = extractelement <2 x float> %b, i32 1 - store float %0, float* %a, align 4 - ret void -} - -define void @test_vst1_lane_f64(double* %a, <1 x double> %b) { -; CHECK-LABEL: test_vst1_lane_f64 -; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %0 = extractelement <1 x double> %b, i32 0 - store double %0, double* %a, align 8 - ret void -} - -define void @test_vst2q_lane_s8(i8* %a, [2 x <16 x i8>] %b.coerce) { -; CHECK-LABEL: test_vst2q_lane_s8 -; CHECK: st2 { {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %b.coerce, 1 - tail call void @llvm.arm.neon.vst2lane.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, i32 15, i32 1) - ret void -} - -define void @test_vst2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) { -; CHECK-LABEL: test_vst2q_lane_s16 -; CHECK: st2 { {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <8 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <8 x i16>] %b.coerce, 1 - %0 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst2lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, i32 7, i32 2) - ret void -} - -define void @test_vst2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) { -; CHECK-LABEL: test_vst2q_lane_s32 -; CHECK: st2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %b.coerce, 1 - %0 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst2lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, i32 3, i32 4) - ret void -} - -define void @test_vst2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) { -; CHECK-LABEL: test_vst2q_lane_s64 -; CHECK: st2 { {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <2 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <2 x i64>] %b.coerce, 1 - %0 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst2lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, i32 1, i32 8) - ret void -} - -define void @test_vst2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) { -; CHECK-LABEL: test_vst2q_lane_f32 -; CHECK: st2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <4 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <4 x float>] %b.coerce, 1 - %0 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst2lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, i32 3, i32 4) - ret void -} - -define void @test_vst2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) { -; CHECK-LABEL: test_vst2q_lane_f64 -; CHECK: st2 { {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <2 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <2 x double>] %b.coerce, 1 - %0 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst2lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, i32 1, i32 8) - ret void -} - -define void @test_vst2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) { -; CHECK-LABEL: test_vst2_lane_s8 -; CHECK: st2 { {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <8 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <8 x i8>] %b.coerce, 1 - tail call void @llvm.arm.neon.vst2lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, i32 7, i32 1) - ret void -} - -define void @test_vst2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) { -; CHECK-LABEL: test_vst2_lane_s16 -; CHECK: st2 { {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <4 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <4 x i16>] %b.coerce, 1 - %0 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst2lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, i32 3, i32 2) - ret void -} - -define void @test_vst2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) { -; CHECK-LABEL: test_vst2_lane_s32 -; CHECK: st2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <2 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <2 x i32>] %b.coerce, 1 - %0 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst2lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, i32 1, i32 4) - ret void -} - -define void @test_vst2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) { -; CHECK-LABEL: test_vst2_lane_s64 -; CHECK: st2 { {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <1 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <1 x i64>] %b.coerce, 1 - %0 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst2lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, i32 0, i32 8) - ret void -} - -define void @test_vst2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) { -; CHECK-LABEL: test_vst2_lane_f32 -; CHECK: st2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <2 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <2 x float>] %b.coerce, 1 - %0 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst2lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, i32 1, i32 4) - ret void -} - -define void @test_vst2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) { -; CHECK-LABEL: test_vst2_lane_f64 -; CHECK: st2 { {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <1 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <1 x double>] %b.coerce, 1 - %0 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst2lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, i32 0, i32 8) - ret void -} - -define void @test_vst3q_lane_s8(i8* %a, [3 x <16 x i8>] %b.coerce) { -; CHECK-LABEL: test_vst3q_lane_s8 -; CHECK: st3 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %b.coerce, 2 - tail call void @llvm.arm.neon.vst3lane.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, i32 15, i32 1) - ret void -} - -define void @test_vst3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) { -; CHECK-LABEL: test_vst3q_lane_s16 -; CHECK: st3 { {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <8 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <8 x i16>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <8 x i16>] %b.coerce, 2 - %0 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst3lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, i32 7, i32 2) - ret void -} - -define void @test_vst3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) { -; CHECK-LABEL: test_vst3q_lane_s32 -; CHECK: st3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <4 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <4 x i32>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <4 x i32>] %b.coerce, 2 - %0 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst3lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, i32 3, i32 4) - ret void -} - -define void @test_vst3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) { -; CHECK-LABEL: test_vst3q_lane_s64 -; CHECK: st3 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <2 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <2 x i64>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <2 x i64>] %b.coerce, 2 - %0 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst3lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, i32 1, i32 8) - ret void -} - -define void @test_vst3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) { -; CHECK-LABEL: test_vst3q_lane_f32 -; CHECK: st3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <4 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <4 x float>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <4 x float>] %b.coerce, 2 - %0 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst3lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, i32 3, i32 4) - ret void -} - -define void @test_vst3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) { -; CHECK-LABEL: test_vst3q_lane_f64 -; CHECK: st3 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <2 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <2 x double>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <2 x double>] %b.coerce, 2 - %0 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst3lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, i32 1, i32 8) - ret void -} - -define void @test_vst3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) { -; CHECK-LABEL: test_vst3_lane_s8 -; CHECK: st3 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <8 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <8 x i8>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <8 x i8>] %b.coerce, 2 - tail call void @llvm.arm.neon.vst3lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, i32 7, i32 1) - ret void -} - -define void @test_vst3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) { -; CHECK-LABEL: test_vst3_lane_s16 -; CHECK: st3 { {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <4 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <4 x i16>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <4 x i16>] %b.coerce, 2 - %0 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst3lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, i32 3, i32 2) - ret void -} - -define void @test_vst3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) { -; CHECK-LABEL: test_vst3_lane_s32 -; CHECK: st3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <2 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <2 x i32>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <2 x i32>] %b.coerce, 2 - %0 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst3lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, i32 1, i32 4) - ret void -} - -define void @test_vst3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) { -; CHECK-LABEL: test_vst3_lane_s64 -; CHECK: st3 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <1 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <1 x i64>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <1 x i64>] %b.coerce, 2 - %0 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst3lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, i32 0, i32 8) - ret void -} - -define void @test_vst3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) { -; CHECK-LABEL: test_vst3_lane_f32 -; CHECK: st3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <2 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <2 x float>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <2 x float>] %b.coerce, 2 - %0 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst3lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, i32 1, i32 4) - ret void -} - -define void @test_vst3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) { -; CHECK-LABEL: test_vst3_lane_f64 -; CHECK: st3 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <1 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <1 x double>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <1 x double>] %b.coerce, 2 - %0 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst3lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, i32 0, i32 8) - ret void -} - -define void @test_vst4q_lane_s8(i16* %a, [4 x <16 x i8>] %b.coerce) { -; CHECK-LABEL: test_vst4q_lane_s8 -; CHECK: st4 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <16 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <16 x i8>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <16 x i8>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <16 x i8>] %b.coerce, 3 - %0 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v16i8(i8* %0, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, <16 x i8> %b.coerce.fca.3.extract, i32 15, i32 2) - ret void -} - -define void @test_vst4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) { -; CHECK-LABEL: test_vst4q_lane_s16 -; CHECK: st4 { {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <8 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <8 x i16>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <8 x i16>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <8 x i16>] %b.coerce, 3 - %0 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, <8 x i16> %b.coerce.fca.3.extract, i32 7, i32 2) - ret void -} - -define void @test_vst4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) { -; CHECK-LABEL: test_vst4q_lane_s32 -; CHECK: st4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <4 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <4 x i32>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <4 x i32>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <4 x i32>] %b.coerce, 3 - %0 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, <4 x i32> %b.coerce.fca.3.extract, i32 3, i32 4) - ret void -} - -define void @test_vst4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) { -; CHECK-LABEL: test_vst4q_lane_s64 -; CHECK: st4 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <2 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <2 x i64>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <2 x i64>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <2 x i64>] %b.coerce, 3 - %0 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, <2 x i64> %b.coerce.fca.3.extract, i32 1, i32 8) - ret void -} - -define void @test_vst4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) { -; CHECK-LABEL: test_vst4q_lane_f32 -; CHECK: st4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <4 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <4 x float>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <4 x float>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <4 x float>] %b.coerce, 3 - %0 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, <4 x float> %b.coerce.fca.3.extract, i32 3, i32 4) - ret void -} - -define void @test_vst4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) { -; CHECK-LABEL: test_vst4q_lane_f64 -; CHECK: st4 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <2 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <2 x double>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <2 x double>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <2 x double>] %b.coerce, 3 - %0 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, <2 x double> %b.coerce.fca.3.extract, i32 1, i32 8) - ret void -} - -define void @test_vst4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) { -; CHECK-LABEL: test_vst4_lane_s8 -; CHECK: st4 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <8 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <8 x i8>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <8 x i8>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <8 x i8>] %b.coerce, 3 - tail call void @llvm.arm.neon.vst4lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, <8 x i8> %b.coerce.fca.3.extract, i32 7, i32 1) - ret void -} - -define void @test_vst4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) { -; CHECK-LABEL: test_vst4_lane_s16 -; CHECK: st4 { {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <4 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <4 x i16>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <4 x i16>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <4 x i16>] %b.coerce, 3 - %0 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, <4 x i16> %b.coerce.fca.3.extract, i32 3, i32 2) - ret void -} - -define void @test_vst4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) { -; CHECK-LABEL: test_vst4_lane_s32 -; CHECK: st4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <2 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <2 x i32>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <2 x i32>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <2 x i32>] %b.coerce, 3 - %0 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, <2 x i32> %b.coerce.fca.3.extract, i32 1, i32 4) - ret void -} - -define void @test_vst4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) { -; CHECK-LABEL: test_vst4_lane_s64 -; CHECK: st4 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <1 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <1 x i64>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <1 x i64>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <1 x i64>] %b.coerce, 3 - %0 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, <1 x i64> %b.coerce.fca.3.extract, i32 0, i32 8) - ret void -} - -define void @test_vst4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) { -; CHECK-LABEL: test_vst4_lane_f32 -; CHECK: st4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <2 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <2 x float>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <2 x float>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <2 x float>] %b.coerce, 3 - %0 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, <2 x float> %b.coerce.fca.3.extract, i32 1, i32 4) - ret void -} - -define void @test_vst4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) { -; CHECK-LABEL: test_vst4_lane_f64 -; CHECK: st4 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <1 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <1 x double>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <1 x double>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <1 x double>] %b.coerce, 3 - %0 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, <1 x double> %b.coerce.fca.3.extract, i32 0, i32 8) - ret void -} - -declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32) -declare { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) -declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) -declare { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2lane.v2i64(i8*, <2 x i64>, <2 x i64>, i32, i32) -declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) -declare { <2 x double>, <2 x double> } @llvm.arm.neon.vld2lane.v2f64(i8*, <2 x double>, <2 x double>, i32, i32) -declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) -declare { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) -declare { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) -declare { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8*, i32) -declare { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) -declare { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8*, i32) -declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3lane.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32, i32) -declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) -declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) -declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3lane.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32, i32) -declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) -declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32, i32) -declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) -declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) -declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) -declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8*, i32) -declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) -declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8*, i32) -declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4lane.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32, i32) -declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) -declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) -declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4lane.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32, i32) -declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) -declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32, i32) -declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) -declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) -declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) -declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8*, i32) -declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) -declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8*, i32) -declare { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2lane.v1i64(i8*, <1 x i64>, <1 x i64>, i32, i32) -declare { <1 x double>, <1 x double> } @llvm.arm.neon.vld2lane.v1f64(i8*, <1 x double>, <1 x double>, i32, i32) -declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32) -declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32, i32) -declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32) -declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v2i64(i8*, <2 x i64>, <2 x i64>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v2f64(i8*, <2 x double>, <2 x double>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v1i64(i8*, <1 x i64>, <1 x i64>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v1f64(i8*, <1 x double>, <1 x double>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32, i32) - -define %struct.int8x16x2_t @test_vld2q_lane_s8(i8* readonly %ptr, [2 x <16 x i8>] %src.coerce) { -; CHECK-LABEL: test_vld2q_lane_s8 -; CHECK: ld2 { {{v[0-9]+}}.b, {{v[0-9]+}}.b }[15], [x0] -entry: - %src.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %src.coerce, 0 - %src.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %src.coerce, 1 - %vld2_lane = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, i32 15, i32 1) - %vld2_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.int8x16x2_t %.fca.0.1.insert -} - -define %struct.uint8x16x2_t @test_vld2q_lane_u8(i8* readonly %ptr, [2 x <16 x i8>] %src.coerce) { -; CHECK-LABEL: test_vld2q_lane_u8 -; CHECK: ld2 { {{v[0-9]+}}.b, {{v[0-9]+}}.b }[15], [x0] -entry: - %src.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %src.coerce, 0 - %src.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %src.coerce, 1 - %vld2_lane = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, i32 15, i32 1) - %vld2_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.uint8x16x2_t undef, <16 x i8> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.uint8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.uint8x16x2_t %.fca.0.1.insert -} - -define %struct.poly8x16x2_t @test_vld2q_lane_p8(i8* readonly %ptr, [2 x <16 x i8>] %src.coerce) { -; CHECK-LABEL: test_vld2q_lane_p8 -; CHECK: ld2 { {{v[0-9]+}}.b, {{v[0-9]+}}.b }[15], [x0] -entry: - %src.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %src.coerce, 0 - %src.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %src.coerce, 1 - %vld2_lane = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, i32 15, i32 1) - %vld2_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.poly8x16x2_t undef, <16 x i8> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.poly8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.poly8x16x2_t %.fca.0.1.insert -} - -define %struct.int8x16x3_t @test_vld3q_lane_s8(i8* readonly %ptr, [3 x <16 x i8>] %src.coerce) { -; CHECK-LABEL: test_vld3q_lane_s8 -; CHECK: ld3 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[15], [x0] -entry: - %src.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %src.coerce, 0 - %src.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %src.coerce, 1 - %src.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %src.coerce, 2 - %vld3_lane = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, <16 x i8> %src.coerce.fca.2.extract, i32 15, i32 1) - %vld3_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.int8x16x3_t undef, <16 x i8> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x16x3_t %.fca.0.0.insert, <16 x i8> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int8x16x3_t %.fca.0.1.insert, <16 x i8> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.int8x16x3_t %.fca.0.2.insert -} - -define %struct.uint8x16x3_t @test_vld3q_lane_u8(i8* readonly %ptr, [3 x <16 x i8>] %src.coerce) { -; CHECK-LABEL: test_vld3q_lane_u8 -; CHECK: ld3 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[15], [x0] -entry: - %src.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %src.coerce, 0 - %src.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %src.coerce, 1 - %src.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %src.coerce, 2 - %vld3_lane = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, <16 x i8> %src.coerce.fca.2.extract, i32 15, i32 1) - %vld3_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.uint8x16x3_t undef, <16 x i8> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.uint8x16x3_t %.fca.0.0.insert, <16 x i8> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.uint8x16x3_t %.fca.0.1.insert, <16 x i8> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.uint8x16x3_t %.fca.0.2.insert -} - diff --git a/test/CodeGen/AArch64/neon-simd-ldst.ll b/test/CodeGen/AArch64/neon-simd-ldst.ll deleted file mode 100644 index 7c78b6933426..000000000000 --- a/test/CodeGen/AArch64/neon-simd-ldst.ll +++ /dev/null @@ -1,165 +0,0 @@ -; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; Just intrinsic mashing. Duplicates existing arm64 tests. - -define void @test_ldstq_4v(i8* noalias %io, i32 %count) { -; CHECK-LABEL: test_ldstq_4v -; CHECK: ld4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0] -; CHECK: st4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0] -entry: - %tobool62 = icmp eq i32 %count, 0 - br i1 %tobool62, label %while.end, label %while.body - -while.body: ; preds = %entry, %while.body - %count.addr.063 = phi i32 [ %dec, %while.body ], [ %count, %entry ] - %dec = add i32 %count.addr.063, -1 - %vld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %io, i32 1) - %vld4.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 3 - tail call void @llvm.arm.neon.vst4.v16i8(i8* %io, <16 x i8> %vld4.fca.0.extract, <16 x i8> %vld4.fca.1.extract, <16 x i8> %vld4.fca.2.extract, <16 x i8> %vld4.fca.3.extract, i32 1) - %tobool = icmp eq i32 %dec, 0 - br i1 %tobool, label %while.end, label %while.body - -while.end: ; preds = %while.body, %entry - ret void -} - -declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32) - -declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32) - -define void @test_ldstq_3v(i8* noalias %io, i32 %count) { -; CHECK-LABEL: test_ldstq_3v -; CHECK: ld3 { v0.16b, v1.16b, v2.16b }, [x0] -; CHECK: st3 { v0.16b, v1.16b, v2.16b }, [x0] -entry: - %tobool47 = icmp eq i32 %count, 0 - br i1 %tobool47, label %while.end, label %while.body - -while.body: ; preds = %entry, %while.body - %count.addr.048 = phi i32 [ %dec, %while.body ], [ %count, %entry ] - %dec = add i32 %count.addr.048, -1 - %vld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %io, i32 1) - %vld3.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 2 - tail call void @llvm.arm.neon.vst3.v16i8(i8* %io, <16 x i8> %vld3.fca.0.extract, <16 x i8> %vld3.fca.1.extract, <16 x i8> %vld3.fca.2.extract, i32 1) - %tobool = icmp eq i32 %dec, 0 - br i1 %tobool, label %while.end, label %while.body - -while.end: ; preds = %while.body, %entry - ret void -} - -declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8*, i32) - -declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32) - -define void @test_ldstq_2v(i8* noalias %io, i32 %count) { -; CHECK-LABEL: test_ldstq_2v -; CHECK: ld2 { v0.16b, v1.16b }, [x0] -; CHECK: st2 { v0.16b, v1.16b }, [x0] -entry: - %tobool22 = icmp eq i32 %count, 0 - br i1 %tobool22, label %while.end, label %while.body - -while.body: ; preds = %entry, %while.body - %count.addr.023 = phi i32 [ %dec, %while.body ], [ %count, %entry ] - %dec = add i32 %count.addr.023, -1 - %vld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8* %io, i32 1) - %vld2.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 1 - tail call void @llvm.arm.neon.vst2.v16i8(i8* %io, <16 x i8> %vld2.fca.0.extract, <16 x i8> %vld2.fca.1.extract, i32 1) - %tobool = icmp eq i32 %dec, 0 - br i1 %tobool, label %while.end, label %while.body - -while.end: ; preds = %while.body, %entry - ret void -} - -declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8*, i32) - -declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32) - -define void @test_ldst_4v(i8* noalias %io, i32 %count) { -; CHECK-LABEL: test_ldst_4v -; CHECK: ld4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0] -; CHECK: st4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0] -entry: - %tobool42 = icmp eq i32 %count, 0 - br i1 %tobool42, label %while.end, label %while.body - -while.body: ; preds = %entry, %while.body - %count.addr.043 = phi i32 [ %dec, %while.body ], [ %count, %entry ] - %dec = add i32 %count.addr.043, -1 - %vld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8* %io, i32 1) - %vld4.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 3 - tail call void @llvm.arm.neon.vst4.v8i8(i8* %io, <8 x i8> %vld4.fca.0.extract, <8 x i8> %vld4.fca.1.extract, <8 x i8> %vld4.fca.2.extract, <8 x i8> %vld4.fca.3.extract, i32 1) - %tobool = icmp eq i32 %dec, 0 - br i1 %tobool, label %while.end, label %while.body - -while.end: ; preds = %while.body, %entry - ret void -} - -declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8*, i32) - -declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) - -define void @test_ldst_3v(i8* noalias %io, i32 %count) { -; CHECK-LABEL: test_ldst_3v -; CHECK: ld3 { v0.8b, v1.8b, v2.8b }, [x0] -; CHECK: st3 { v0.8b, v1.8b, v2.8b }, [x0] -entry: - %tobool32 = icmp eq i32 %count, 0 - br i1 %tobool32, label %while.end, label %while.body - -while.body: ; preds = %entry, %while.body - %count.addr.033 = phi i32 [ %dec, %while.body ], [ %count, %entry ] - %dec = add i32 %count.addr.033, -1 - %vld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8* %io, i32 1) - %vld3.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 2 - tail call void @llvm.arm.neon.vst3.v8i8(i8* %io, <8 x i8> %vld3.fca.0.extract, <8 x i8> %vld3.fca.1.extract, <8 x i8> %vld3.fca.2.extract, i32 1) - %tobool = icmp eq i32 %dec, 0 - br i1 %tobool, label %while.end, label %while.body - -while.end: ; preds = %while.body, %entry - ret void -} - -declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8*, i32) - -declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) - -define void @test_ldst_2v(i8* noalias %io, i32 %count) { -; CHECK-LABEL: test_ldst_2v -; CHECK: ld2 { v0.8b, v1.8b }, [x0] -; CHECK: st2 { v0.8b, v1.8b }, [x0] -entry: - %tobool22 = icmp eq i32 %count, 0 - br i1 %tobool22, label %while.end, label %while.body - -while.body: ; preds = %entry, %while.body - %count.addr.023 = phi i32 [ %dec, %while.body ], [ %count, %entry ] - %dec = add i32 %count.addr.023, -1 - %vld2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8* %io, i32 1) - %vld2.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 1 - tail call void @llvm.arm.neon.vst2.v8i8(i8* %io, <8 x i8> %vld2.fca.0.extract, <8 x i8> %vld2.fca.1.extract, i32 1) - %tobool = icmp eq i32 %dec, 0 - br i1 %tobool, label %while.end, label %while.body - -while.end: ; preds = %while.body, %entry - ret void -} - -declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8*, i32) - -declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) - diff --git a/test/CodeGen/AArch64/neon-simd-post-ldst-multi-elem.ll b/test/CodeGen/AArch64/neon-simd-post-ldst-multi-elem.ll deleted file mode 100644 index 181c69c89b26..000000000000 --- a/test/CodeGen/AArch64/neon-simd-post-ldst-multi-elem.ll +++ /dev/null @@ -1,355 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has equivalent tests to these in various files. - -;Check for a post-increment updating load. -define <4 x i16> @test_vld1_fx_update(i16** %ptr) nounwind { -; CHECK: test_vld1_fx_update -; CHECK: ld1 { v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}], #8 - %A = load i16** %ptr - %tmp0 = bitcast i16* %A to i8* - %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 2) - %tmp2 = getelementptr i16* %A, i32 4 - store i16* %tmp2, i16** %ptr - ret <4 x i16> %tmp1 -} - -;Check for a post-increment updating load with register increment. -define <2 x i32> @test_vld1_reg_update(i32** %ptr, i32 %inc) nounwind { -; CHECK: test_vld1_reg_update -; CHECK: ld1 { v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %A = load i32** %ptr - %tmp0 = bitcast i32* %A to i8* - %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 4) - %tmp2 = getelementptr i32* %A, i32 %inc - store i32* %tmp2, i32** %ptr - ret <2 x i32> %tmp1 -} - -define <2 x float> @test_vld2_fx_update(float** %ptr) nounwind { -; CHECK: test_vld2_fx_update -; CHECK: ld2 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}], #16 - %A = load float** %ptr - %tmp0 = bitcast float* %A to i8* - %tmp1 = call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 4) - %tmp2 = extractvalue { <2 x float>, <2 x float> } %tmp1, 0 - %tmp3 = getelementptr float* %A, i32 4 - store float* %tmp3, float** %ptr - ret <2 x float> %tmp2 -} - -define <16 x i8> @test_vld2_reg_update(i8** %ptr, i32 %inc) nounwind { -; CHECK: test_vld2_reg_update -; CHECK: ld2 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %A = load i8** %ptr - %tmp0 = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8* %A, i32 1) - %tmp1 = extractvalue { <16 x i8>, <16 x i8> } %tmp0, 0 - %tmp2 = getelementptr i8* %A, i32 %inc - store i8* %tmp2, i8** %ptr - ret <16 x i8> %tmp1 -} - -define <4 x i32> @test_vld3_fx_update(i32** %ptr) nounwind { -; CHECK: test_vld3_fx_update -; CHECK: ld3 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}], #48 - %A = load i32** %ptr - %tmp0 = bitcast i32* %A to i8* - %tmp1 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 4) - %tmp2 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %tmp1, 0 - %tmp3 = getelementptr i32* %A, i32 12 - store i32* %tmp3, i32** %ptr - ret <4 x i32> %tmp2 -} - -define <4 x i16> @test_vld3_reg_update(i16** %ptr, i32 %inc) nounwind { -; CHECK: test_vld3_reg_update -; CHECK: ld3 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %A = load i16** %ptr - %tmp0 = bitcast i16* %A to i8* - %tmp1 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 2) - %tmp2 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %tmp1, 0 - %tmp3 = getelementptr i16* %A, i32 %inc - store i16* %tmp3, i16** %ptr - ret <4 x i16> %tmp2 -} - -define <8 x i16> @test_vld4_fx_update(i16** %ptr) nounwind { -; CHECK: test_vld4_fx_update -; CHECK: ld4 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}], #64 - %A = load i16** %ptr - %tmp0 = bitcast i16* %A to i8* - %tmp1 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 8) - %tmp2 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %tmp1, 0 - %tmp3 = getelementptr i16* %A, i32 32 - store i16* %tmp3, i16** %ptr - ret <8 x i16> %tmp2 -} - -define <8 x i8> @test_vld4_reg_update(i8** %ptr, i32 %inc) nounwind { -; CHECK: test_vld4_reg_update -; CHECK: ld4 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %A = load i8** %ptr - %tmp0 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8* %A, i32 1) - %tmp1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %tmp0, 0 - %tmp2 = getelementptr i8* %A, i32 %inc - store i8* %tmp2, i8** %ptr - ret <8 x i8> %tmp1 -} - -define void @test_vst1_fx_update(float** %ptr, <2 x float> %B) nounwind { -; CHECK: test_vst1_fx_update -; CHECK: st1 { v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}], #8 - %A = load float** %ptr - %tmp0 = bitcast float* %A to i8* - call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %B, i32 4) - %tmp2 = getelementptr float* %A, i32 2 - store float* %tmp2, float** %ptr - ret void -} - -define void @test_vst1_reg_update(i16** %ptr, <8 x i16> %B, i32 %inc) nounwind { -; CHECK: test_vst1_reg_update -; CHECK: st1 { v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}], x{{[0-9]+}} - %A = load i16** %ptr - %tmp0 = bitcast i16* %A to i8* - call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %B, i32 2) - %tmp1 = getelementptr i16* %A, i32 %inc - store i16* %tmp1, i16** %ptr - ret void -} - -define void @test_vst2_fx_update(i64** %ptr, <1 x i64> %B) nounwind { -; CHECK: test_vst2_fx_update -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}], #16 - %A = load i64** %ptr - %tmp0 = bitcast i64* %A to i8* - call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %B, <1 x i64> %B, i32 8) - %tmp1 = getelementptr i64* %A, i32 2 - store i64* %tmp1, i64** %ptr - ret void -} - -define void @test_vst2_reg_update(i8** %ptr, <8 x i8> %B, i32 %inc) nounwind { -; CHECK: test_vst2_reg_update -; CHECK: st2 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}], x{{[0-9]+}} - %A = load i8** %ptr - call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %B, <8 x i8> %B, i32 4) - %tmp0 = getelementptr i8* %A, i32 %inc - store i8* %tmp0, i8** %ptr - ret void -} - -define void @test_vst3_fx_update(i32** %ptr, <2 x i32> %B) nounwind { -; CHECK: test_vst3_fx_update -; CHECK: st3 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}], #24 - %A = load i32** %ptr - %tmp0 = bitcast i32* %A to i8* - call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %B, <2 x i32> %B, <2 x i32> %B, i32 4) - %tmp1 = getelementptr i32* %A, i32 6 - store i32* %tmp1, i32** %ptr - ret void -} - -define void @test_vst3_reg_update(i16** %ptr, <8 x i16> %B, i32 %inc) nounwind { -; CHECK: test_vst3_reg_update -; CHECK: st3 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}], x{{[0-9]+}} - %A = load i16** %ptr - %tmp0 = bitcast i16* %A to i8* - call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %B, <8 x i16> %B, <8 x i16> %B, i32 2) - %tmp1 = getelementptr i16* %A, i32 %inc - store i16* %tmp1, i16** %ptr - ret void -} - -define void @test_vst4_fx_update(float** %ptr, <4 x float> %B) nounwind { -; CHECK: test_vst4_fx_update -; CHECK: st4 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}], #64 - %A = load float** %ptr - %tmp0 = bitcast float* %A to i8* - call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %B, <4 x float> %B, <4 x float> %B, <4 x float> %B, i32 4) - %tmp1 = getelementptr float* %A, i32 16 - store float* %tmp1, float** %ptr - ret void -} - -define void @test_vst4_reg_update(i8** %ptr, <8 x i8> %B, i32 %inc) nounwind { -; CHECK: test_vst4_reg_update -; CHECK: st4 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}], x{{[0-9]+}} - %A = load i8** %ptr - call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %B, <8 x i8> %B, <8 x i8> %B, <8 x i8> %B, i32 1) - %tmp0 = getelementptr i8* %A, i32 %inc - store i8* %tmp0, i8** %ptr - ret void -} - - -declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32) -declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32) -declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8*, i32) -declare { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8*, i32) -declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8*, i32) -declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8*, i32) -declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8*, i32) -declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8*, i32) - -declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32) -declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) -declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>, i32) -declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) -declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) -declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) -declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) -declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) - -define <16 x i8> @test_vld1x2_fx_update(i8* %a, i8** %ptr) { -; CHECK: test_vld1x2_fx_update -; CHECK: ld1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}], #32 - %1 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8* %a, i32 1) - %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0 - %tmp1 = getelementptr i8* %a, i32 32 - store i8* %tmp1, i8** %ptr - ret <16 x i8> %2 -} - -define <8 x i16> @test_vld1x2_reg_update(i16* %a, i16** %ptr, i32 %inc) { -; CHECK: test_vld1x2_reg_update -; CHECK: ld1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = bitcast i16* %a to i8* - %2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8* %1, i32 2) - %3 = extractvalue { <8 x i16>, <8 x i16> } %2, 0 - %tmp1 = getelementptr i16* %a, i32 %inc - store i16* %tmp1, i16** %ptr - ret <8 x i16> %3 -} - -define <2 x i64> @test_vld1x3_fx_update(i64* %a, i64** %ptr) { -; CHECK: test_vld1x3_fx_update -; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}], #48 - %1 = bitcast i64* %a to i8* - %2 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8* %1, i32 8) - %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 0 - %tmp1 = getelementptr i64* %a, i32 6 - store i64* %tmp1, i64** %ptr - ret <2 x i64> %3 -} - -define <8 x i16> @test_vld1x3_reg_update(i16* %a, i16** %ptr, i32 %inc) { -; CHECK: test_vld1x3_reg_update -; CHECK: ld1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = bitcast i16* %a to i8* - %2 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8* %1, i32 2) - %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 0 - %tmp1 = getelementptr i16* %a, i32 %inc - store i16* %tmp1, i16** %ptr - ret <8 x i16> %3 -} - -define <4 x float> @test_vld1x4_fx_update(float* %a, float** %ptr) { -; CHECK: test_vld1x4_fx_update -; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}], #64 - %1 = bitcast float* %a to i8* - %2 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8* %1, i32 4) - %3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 0 - %tmp1 = getelementptr float* %a, i32 16 - store float* %tmp1, float** %ptr - ret <4 x float> %3 -} - -define <8 x i8> @test_vld1x4_reg_update(i8* readonly %a, i8** %ptr, i32 %inc) #0 { -; CHECK: test_vld1x4_reg_update -; CHECK: ld1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8* %a, i32 1) - %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0 - %tmp1 = getelementptr i8* %a, i32 %inc - store i8* %tmp1, i8** %ptr - ret <8 x i8> %2 -} - -define void @test_vst1x2_fx_update(i8* %a, [2 x <16 x i8>] %b.coerce, i8** %ptr) #2 { -; CHECK: test_vst1x2_fx_update -; CHECK: st1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}], #32 - %1 = extractvalue [2 x <16 x i8>] %b.coerce, 0 - %2 = extractvalue [2 x <16 x i8>] %b.coerce, 1 - tail call void @llvm.aarch64.neon.vst1x2.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, i32 1) - %tmp1 = getelementptr i8* %a, i32 32 - store i8* %tmp1, i8** %ptr - ret void -} - -define void @test_vst1x2_reg_update(i16* %a, [2 x <8 x i16>] %b.coerce, i16** %ptr, i32 %inc) #2 { -; CHECK: test_vst1x2_reg_update -; CHECK: st1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = extractvalue [2 x <8 x i16>] %b.coerce, 0 - %2 = extractvalue [2 x <8 x i16>] %b.coerce, 1 - %3 = bitcast i16* %a to i8* - tail call void @llvm.aarch64.neon.vst1x2.v8i16(i8* %3, <8 x i16> %1, <8 x i16> %2, i32 2) - %tmp1 = getelementptr i16* %a, i32 %inc - store i16* %tmp1, i16** %ptr - ret void -} - -define void @test_vst1x3_fx_update(i32* %a, [3 x <2 x i32>] %b.coerce, i32** %ptr) #2 { -; CHECK: test_vst1x3_fx_update -; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}], #24 - %1 = extractvalue [3 x <2 x i32>] %b.coerce, 0 - %2 = extractvalue [3 x <2 x i32>] %b.coerce, 1 - %3 = extractvalue [3 x <2 x i32>] %b.coerce, 2 - %4 = bitcast i32* %a to i8* - tail call void @llvm.aarch64.neon.vst1x3.v2i32(i8* %4, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, i32 4) - %tmp1 = getelementptr i32* %a, i32 6 - store i32* %tmp1, i32** %ptr - ret void -} - -define void @test_vst1x3_reg_update(i64* %a, [3 x <1 x i64>] %b.coerce, i64** %ptr, i32 %inc) #2 { -; CHECK: test_vst1x3_reg_update -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = extractvalue [3 x <1 x i64>] %b.coerce, 0 - %2 = extractvalue [3 x <1 x i64>] %b.coerce, 1 - %3 = extractvalue [3 x <1 x i64>] %b.coerce, 2 - %4 = bitcast i64* %a to i8* - tail call void @llvm.aarch64.neon.vst1x3.v1i64(i8* %4, <1 x i64> %1, <1 x i64> %2, <1 x i64> %3, i32 8) - %tmp1 = getelementptr i64* %a, i32 %inc - store i64* %tmp1, i64** %ptr - ret void -} - -define void @test_vst1x4_fx_update(float* %a, [4 x <4 x float>] %b.coerce, float** %ptr) #2 { -; CHECK: test_vst1x4_fx_update -; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}], #64 - %1 = extractvalue [4 x <4 x float>] %b.coerce, 0 - %2 = extractvalue [4 x <4 x float>] %b.coerce, 1 - %3 = extractvalue [4 x <4 x float>] %b.coerce, 2 - %4 = extractvalue [4 x <4 x float>] %b.coerce, 3 - %5 = bitcast float* %a to i8* - tail call void @llvm.aarch64.neon.vst1x4.v4f32(i8* %5, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4, i32 4) - %tmp1 = getelementptr float* %a, i32 16 - store float* %tmp1, float** %ptr - ret void -} - -define void @test_vst1x4_reg_update(double* %a, [4 x <2 x double>] %b.coerce, double** %ptr, i32 %inc) #2 { -; CHECK: test_vst1x4_reg_update -; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = extractvalue [4 x <2 x double>] %b.coerce, 0 - %2 = extractvalue [4 x <2 x double>] %b.coerce, 1 - %3 = extractvalue [4 x <2 x double>] %b.coerce, 2 - %4 = extractvalue [4 x <2 x double>] %b.coerce, 3 - %5 = bitcast double* %a to i8* - tail call void @llvm.aarch64.neon.vst1x4.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 8) - %tmp1 = getelementptr double* %a, i32 %inc - store double* %tmp1, double** %ptr - ret void -} - -declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8*, i32) -declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8*, i32) -declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8*, i32) -declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8*, i32) -declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8*, i32) -declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8*, i32) -declare void @llvm.aarch64.neon.vst1x2.v16i8(i8*, <16 x i8>, <16 x i8>, i32) -declare void @llvm.aarch64.neon.vst1x2.v8i16(i8*, <8 x i16>, <8 x i16>, i32) -declare void @llvm.aarch64.neon.vst1x3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) -declare void @llvm.aarch64.neon.vst1x3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32) -declare void @llvm.aarch64.neon.vst1x4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) #3 -declare void @llvm.aarch64.neon.vst1x4.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32) #3 diff --git a/test/CodeGen/AArch64/neon-simd-post-ldst-one.ll b/test/CodeGen/AArch64/neon-simd-post-ldst-one.ll deleted file mode 100644 index 75f57c5d2ea9..000000000000 --- a/test/CodeGen/AArch64/neon-simd-post-ldst-one.ll +++ /dev/null @@ -1,320 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has equivalents of these tests separately. - -define { [2 x <16 x i8>] } @test_vld2q_dup_fx_update(i8* %a, i8** %ptr) { -; CHECK-LABEL: test_vld2q_dup_fx_update -; CHECK: ld2r { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}], #2 - %1 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %a, <16 x i8> undef, <16 x i8> undef, i32 0, i32 1) - %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0 - %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer - %4 = extractvalue { <16 x i8>, <16 x i8> } %1, 1 - %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> zeroinitializer - %6 = insertvalue { [2 x <16 x i8>] } undef, <16 x i8> %3, 0, 0 - %7 = insertvalue { [2 x <16 x i8>] } %6, <16 x i8> %5, 0, 1 - %tmp1 = getelementptr i8* %a, i32 2 - store i8* %tmp1, i8** %ptr - ret { [2 x <16 x i8>] } %7 -} - -define { [2 x <4 x i32>] } @test_vld2q_dup_reg_update(i32* %a, i32** %ptr, i32 %inc) { -; CHECK-LABEL: test_vld2q_dup_reg_update -; CHECK: ld2r { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = bitcast i32* %a to i8* - %2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %1, <4 x i32> undef, <4 x i32> undef, i32 0, i32 4) - %3 = extractvalue { <4 x i32>, <4 x i32> } %2, 0 - %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer - %5 = extractvalue { <4 x i32>, <4 x i32> } %2, 1 - %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <4 x i32> zeroinitializer - %7 = insertvalue { [2 x <4 x i32>] } undef, <4 x i32> %4, 0, 0 - %8 = insertvalue { [2 x <4 x i32>] } %7, <4 x i32> %6, 0, 1 - %tmp1 = getelementptr i32* %a, i32 %inc - store i32* %tmp1, i32** %ptr - ret { [2 x <4 x i32>] } %8 -} - -define { [3 x <4 x i16>] } @test_vld3_dup_fx_update(i16* %a, i16** %ptr) { -; CHECK-LABEL: test_vld3_dup_fx_update -; CHECK: ld3r { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}], #6 - %1 = bitcast i16* %a to i8* - %2 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8* %1, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) - %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 0 - %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <4 x i32> zeroinitializer - %5 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 1 - %6 = shufflevector <4 x i16> %5, <4 x i16> undef, <4 x i32> zeroinitializer - %7 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 2 - %8 = shufflevector <4 x i16> %7, <4 x i16> undef, <4 x i32> zeroinitializer - %9 = insertvalue { [3 x <4 x i16>] } undef, <4 x i16> %4, 0, 0 - %10 = insertvalue { [3 x <4 x i16>] } %9, <4 x i16> %6, 0, 1 - %11 = insertvalue { [3 x <4 x i16>] } %10, <4 x i16> %8, 0, 2 - %tmp1 = getelementptr i16* %a, i32 3 - store i16* %tmp1, i16** %ptr - ret { [3 x <4 x i16>] } %11 -} - -define { [3 x <8 x i8>] } @test_vld3_dup_reg_update(i8* %a, i8** %ptr, i32 %inc) { -; CHECK-LABEL: test_vld3_dup_reg_update -; CHECK: ld3r { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) - %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 0 - %3 = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer - %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 1 - %5 = shufflevector <8 x i8> %4, <8 x i8> undef, <8 x i32> zeroinitializer - %6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 2 - %7 = shufflevector <8 x i8> %6, <8 x i8> undef, <8 x i32> zeroinitializer - %8 = insertvalue { [3 x <8 x i8>] } undef, <8 x i8> %3, 0, 0 - %9 = insertvalue { [3 x <8 x i8>] } %8, <8 x i8> %5, 0, 1 - %10 = insertvalue { [3 x <8 x i8>] } %9, <8 x i8> %7, 0, 2 - %tmp1 = getelementptr i8* %a, i32 %inc - store i8* %tmp1, i8** %ptr - ret { [3 x <8 x i8>] }%10 -} - -define { [4 x <2 x i32>] } @test_vld4_dup_fx_update(i32* %a, i32** %ptr) #0 { -; CHECK-LABEL: test_vld4_dup_fx_update -; CHECK: ld4r { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}], #16 - %1 = bitcast i32* %a to i8* - %2 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* %1, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) - %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 0 - %4 = shufflevector <2 x i32> %3, <2 x i32> undef, <2 x i32> zeroinitializer - %5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 1 - %6 = shufflevector <2 x i32> %5, <2 x i32> undef, <2 x i32> zeroinitializer - %7 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 2 - %8 = shufflevector <2 x i32> %7, <2 x i32> undef, <2 x i32> zeroinitializer - %9 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 3 - %10 = shufflevector <2 x i32> %9, <2 x i32> undef, <2 x i32> zeroinitializer - %11 = insertvalue { [4 x <2 x i32>] } undef, <2 x i32> %4, 0, 0 - %12 = insertvalue { [4 x <2 x i32>] } %11, <2 x i32> %6, 0, 1 - %13 = insertvalue { [4 x <2 x i32>] } %12, <2 x i32> %8, 0, 2 - %14 = insertvalue { [4 x <2 x i32>] } %13, <2 x i32> %10, 0, 3 - %tmp1 = getelementptr i32* %a, i32 4 - store i32* %tmp1, i32** %ptr - ret { [4 x <2 x i32>] } %14 -} - -define { [4 x <2 x double>] } @test_vld4_dup_reg_update(double* %a, double** %ptr, i32 %inc) { -; CHECK-LABEL: test_vld4_dup_reg_update -; CHECK: ld4r { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = bitcast double* %a to i8* - %2 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8* %1, <2 x double> undef, <2 x double> undef, <2 x double> undef, <2 x double> undef, i32 0, i32 8) - %3 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 0 - %4 = shufflevector <2 x double> %3, <2 x double> undef, <2 x i32> zeroinitializer - %5 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 1 - %6 = shufflevector <2 x double> %5, <2 x double> undef, <2 x i32> zeroinitializer - %7 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 2 - %8 = shufflevector <2 x double> %7, <2 x double> undef, <2 x i32> zeroinitializer - %9 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 3 - %10 = shufflevector <2 x double> %9, <2 x double> undef, <2 x i32> zeroinitializer - %11 = insertvalue { [4 x <2 x double>] } undef, <2 x double> %4, 0, 0 - %12 = insertvalue { [4 x <2 x double>] } %11, <2 x double> %6, 0, 1 - %13 = insertvalue { [4 x <2 x double>] } %12, <2 x double> %8, 0, 2 - %14 = insertvalue { [4 x <2 x double>] } %13, <2 x double> %10, 0, 3 - %tmp1 = getelementptr double* %a, i32 %inc - store double* %tmp1, double** %ptr - ret { [4 x <2 x double>] } %14 -} - -define { [2 x <8 x i8>] } @test_vld2_lane_fx_update(i8* %a, [2 x <8 x i8>] %b, i8** %ptr) { -; CHECK-LABEL: test_vld2_lane_fx_update -; CHECK: ld2 { v{{[0-9]+}}.b, v{{[0-9]+}}.b }[7], [x{{[0-9]+|sp}}], #2 - %1 = extractvalue [2 x <8 x i8>] %b, 0 - %2 = extractvalue [2 x <8 x i8>] %b, 1 - %3 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 7, i32 1) - %4 = extractvalue { <8 x i8>, <8 x i8> } %3, 0 - %5 = extractvalue { <8 x i8>, <8 x i8> } %3, 1 - %6 = insertvalue { [2 x <8 x i8>] } undef, <8 x i8> %4, 0, 0 - %7 = insertvalue { [2 x <8 x i8>] } %6, <8 x i8> %5, 0, 1 - %tmp1 = getelementptr i8* %a, i32 2 - store i8* %tmp1, i8** %ptr - ret { [2 x <8 x i8>] } %7 -} - -define { [2 x <8 x i8>] } @test_vld2_lane_reg_update(i8* %a, [2 x <8 x i8>] %b, i8** %ptr, i32 %inc) { -; CHECK-LABEL: test_vld2_lane_reg_update -; CHECK: ld2 { v{{[0-9]+}}.b, v{{[0-9]+}}.b }[6], [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = extractvalue [2 x <8 x i8>] %b, 0 - %2 = extractvalue [2 x <8 x i8>] %b, 1 - %3 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 6, i32 1) - %4 = extractvalue { <8 x i8>, <8 x i8> } %3, 0 - %5 = extractvalue { <8 x i8>, <8 x i8> } %3, 1 - %6 = insertvalue { [2 x <8 x i8>] } undef, <8 x i8> %4, 0, 0 - %7 = insertvalue { [2 x <8 x i8>] } %6, <8 x i8> %5, 0, 1 - %tmp1 = getelementptr i8* %a, i32 %inc - store i8* %tmp1, i8** %ptr - ret { [2 x <8 x i8>] } %7 -} - -define { [3 x <2 x float>] } @test_vld3_lane_fx_update(float* %a, [3 x <2 x float>] %b, float** %ptr) { -; CHECK-LABEL: test_vld3_lane_fx_update -; CHECK: ld3 { v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s }[1], [x{{[0-9]+|sp}}], #12 - %1 = extractvalue [3 x <2 x float>] %b, 0 - %2 = extractvalue [3 x <2 x float>] %b, 1 - %3 = extractvalue [3 x <2 x float>] %b, 2 - %4 = bitcast float* %a to i8* - %5 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8* %4, <2 x float> %1, <2 x float> %2, <2 x float> %3, i32 1, i32 4) - %6 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %5, 0 - %7 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %5, 1 - %8 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %5, 2 - %9 = insertvalue { [3 x <2 x float>] } undef, <2 x float> %6, 0, 0 - %10 = insertvalue { [3 x <2 x float>] } %9, <2 x float> %7, 0, 1 - %11 = insertvalue { [3 x <2 x float>] } %10, <2 x float> %8, 0, 2 - %tmp1 = getelementptr float* %a, i32 3 - store float* %tmp1, float** %ptr - ret { [3 x <2 x float>] } %11 -} - -define { [3 x <4 x i16>] } @test_vld3_lane_reg_update(i16* %a, [3 x <4 x i16>] %b, i16** %ptr, i32 %inc) { -; CHECK-LABEL: test_vld3_lane_reg_update -; CHECK: ld3 { v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h }[3], [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = extractvalue [3 x <4 x i16>] %b, 0 - %2 = extractvalue [3 x <4 x i16>] %b, 1 - %3 = extractvalue [3 x <4 x i16>] %b, 2 - %4 = bitcast i16* %a to i8* - %5 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8* %4, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 3, i32 2) - %6 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %5, 0 - %7 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %5, 1 - %8 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %5, 2 - %9 = insertvalue { [3 x <4 x i16>] } undef, <4 x i16> %6, 0, 0 - %10 = insertvalue { [3 x <4 x i16>] } %9, <4 x i16> %7, 0, 1 - %11 = insertvalue { [3 x <4 x i16>] } %10, <4 x i16> %8, 0, 2 - %tmp1 = getelementptr i16* %a, i32 %inc - store i16* %tmp1, i16** %ptr - ret { [3 x <4 x i16>] } %11 -} - -define { [4 x <2 x i32>] } @test_vld4_lane_fx_update(i32* readonly %a, [4 x <2 x i32>] %b, i32** %ptr) { -; CHECK-LABEL: test_vld4_lane_fx_update -; CHECK: ld4 { v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s }[1], [x{{[0-9]+|sp}}], #16 - %1 = extractvalue [4 x <2 x i32>] %b, 0 - %2 = extractvalue [4 x <2 x i32>] %b, 1 - %3 = extractvalue [4 x <2 x i32>] %b, 2 - %4 = extractvalue [4 x <2 x i32>] %b, 3 - %5 = bitcast i32* %a to i8* - %6 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* %5, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, <2 x i32> %4, i32 1, i32 4) - %7 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %6, 0 - %8 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %6, 1 - %9 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %6, 2 - %10 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %6, 3 - %11 = insertvalue { [4 x <2 x i32>] } undef, <2 x i32> %7, 0, 0 - %12 = insertvalue { [4 x <2 x i32>] } %11, <2 x i32> %8, 0, 1 - %13 = insertvalue { [4 x <2 x i32>] } %12, <2 x i32> %9, 0, 2 - %14 = insertvalue { [4 x <2 x i32>] } %13, <2 x i32> %10, 0, 3 - %tmp1 = getelementptr i32* %a, i32 4 - store i32* %tmp1, i32** %ptr - ret { [4 x <2 x i32>] } %14 -} - -define { [4 x <2 x double>] } @test_vld4_lane_reg_update(double* readonly %a, [4 x <2 x double>] %b, double** %ptr, i32 %inc) { -; CHECK-LABEL: test_vld4_lane_reg_update -; CHECK: ld4 { v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d }[1], [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = extractvalue [4 x <2 x double>] %b, 0 - %2 = extractvalue [4 x <2 x double>] %b, 1 - %3 = extractvalue [4 x <2 x double>] %b, 2 - %4 = extractvalue [4 x <2 x double>] %b, 3 - %5 = bitcast double* %a to i8* - %6 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 1, i32 8) - %7 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %6, 0 - %8 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %6, 1 - %9 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %6, 2 - %10 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %6, 3 - %11 = insertvalue { [4 x <2 x double>] } undef, <2 x double> %7, 0, 0 - %12 = insertvalue { [4 x <2 x double>] } %11, <2 x double> %8, 0, 1 - %13 = insertvalue { [4 x <2 x double>] } %12, <2 x double> %9, 0, 2 - %14 = insertvalue { [4 x <2 x double>] } %13, <2 x double> %10, 0, 3 - %tmp1 = getelementptr double* %a, i32 %inc - store double* %tmp1, double** %ptr - ret { [4 x <2 x double>] } %14 -} - -define void @test_vst2_lane_fx_update(i8* %a, [2 x <8 x i8>] %b, i8** %ptr) { -; CHECK-LABEL: test_vst2_lane_fx_update -; CHECK: st2 { v{{[0-9]+}}.b, v{{[0-9]+}}.b }[7], [x{{[0-9]+|sp}}], #2 - %1 = extractvalue [2 x <8 x i8>] %b, 0 - %2 = extractvalue [2 x <8 x i8>] %b, 1 - call void @llvm.arm.neon.vst2lane.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 7, i32 1) - %tmp1 = getelementptr i8* %a, i32 2 - store i8* %tmp1, i8** %ptr - ret void -} - -define void @test_vst2_lane_reg_update(i32* %a, [2 x <2 x i32>] %b.coerce, i32** %ptr, i32 %inc) { -; CHECK-LABEL: test_vst2_lane_reg_update -; CHECK: st2 { v{{[0-9]+}}.s, v{{[0-9]+}}.s }[1], [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = extractvalue [2 x <2 x i32>] %b.coerce, 0 - %2 = extractvalue [2 x <2 x i32>] %b.coerce, 1 - %3 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst2lane.v2i32(i8* %3, <2 x i32> %1, <2 x i32> %2, i32 1, i32 4) - %tmp1 = getelementptr i32* %a, i32 %inc - store i32* %tmp1, i32** %ptr - ret void -} - -define void @test_vst3_lane_fx_update(float* %a, [3 x <4 x float>] %b, float** %ptr) { -; CHECK-LABEL: test_vst3_lane_fx_update -; CHECK: st3 { v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s }[3], [x{{[0-9]+|sp}}], #12 - %1 = extractvalue [3 x <4 x float>] %b, 0 - %2 = extractvalue [3 x <4 x float>] %b, 1 - %3 = extractvalue [3 x <4 x float>] %b, 2 - %4 = bitcast float* %a to i8* - call void @llvm.arm.neon.vst3lane.v4f32(i8* %4, <4 x float> %1, <4 x float> %2, <4 x float> %3, i32 3, i32 4) - %tmp1 = getelementptr float* %a, i32 3 - store float* %tmp1, float** %ptr - ret void -} - -; Function Attrs: nounwind -define void @test_vst3_lane_reg_update(i16* %a, [3 x <4 x i16>] %b, i16** %ptr, i32 %inc) { -; CHECK-LABEL: test_vst3_lane_reg_update -; CHECK: st3 { v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h }[3], [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = extractvalue [3 x <4 x i16>] %b, 0 - %2 = extractvalue [3 x <4 x i16>] %b, 1 - %3 = extractvalue [3 x <4 x i16>] %b, 2 - %4 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst3lane.v4i16(i8* %4, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 3, i32 2) - %tmp1 = getelementptr i16* %a, i32 %inc - store i16* %tmp1, i16** %ptr - ret void -} - -define void @test_vst4_lane_fx_update(double* %a, [4 x <2 x double>] %b.coerce, double** %ptr) { -; CHECK-LABEL: test_vst4_lane_fx_update -; CHECK: st4 { v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d }[1], [x{{[0-9]+|sp}}], #32 - %1 = extractvalue [4 x <2 x double>] %b.coerce, 0 - %2 = extractvalue [4 x <2 x double>] %b.coerce, 1 - %3 = extractvalue [4 x <2 x double>] %b.coerce, 2 - %4 = extractvalue [4 x <2 x double>] %b.coerce, 3 - %5 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 1, i32 8) - %tmp1 = getelementptr double* %a, i32 4 - store double* %tmp1, double** %ptr - ret void -} - - -define void @test_vst4_lane_reg_update(float* %a, [4 x <2 x float>] %b.coerce, float** %ptr, i32 %inc) { -; CHECK-LABEL: test_vst4_lane_reg_update -; CHECK: st4 { v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s }[1], [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = extractvalue [4 x <2 x float>] %b.coerce, 0 - %2 = extractvalue [4 x <2 x float>] %b.coerce, 1 - %3 = extractvalue [4 x <2 x float>] %b.coerce, 2 - %4 = extractvalue [4 x <2 x float>] %b.coerce, 3 - %5 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v2f32(i8* %5, <2 x float> %1, <2 x float> %2, <2 x float> %3, <2 x float> %4, i32 1, i32 4) - %tmp1 = getelementptr float* %a, i32 %inc - store float* %tmp1, float** %ptr - ret void -} - -declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) -declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32) -declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) -declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) -declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) -declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) -declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32, i32) -declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32, i32) diff --git a/test/CodeGen/AArch64/neon-simd-shift.ll b/test/CodeGen/AArch64/neon-simd-shift.ll deleted file mode 100644 index 5615e3c8361b..000000000000 --- a/test/CodeGen/AArch64/neon-simd-shift.ll +++ /dev/null @@ -1,1557 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has separate copy of parts that aren't pure intrinsic wrangling. - -define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) { -; CHECK: test_vshr_n_s8 -; CHECK: sshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vshr_n = ashr <8 x i8> %a, - ret <8 x i8> %vshr_n -} - -define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) { -; CHECK: test_vshr_n_s16 -; CHECK: sshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %vshr_n = ashr <4 x i16> %a, - ret <4 x i16> %vshr_n -} - -define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) { -; CHECK: test_vshr_n_s32 -; CHECK: sshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %vshr_n = ashr <2 x i32> %a, - ret <2 x i32> %vshr_n -} - -define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) { -; CHECK: test_vshrq_n_s8 -; CHECK: sshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vshr_n = ashr <16 x i8> %a, - ret <16 x i8> %vshr_n -} - -define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) { -; CHECK: test_vshrq_n_s16 -; CHECK: sshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %vshr_n = ashr <8 x i16> %a, - ret <8 x i16> %vshr_n -} - -define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) { -; CHECK: test_vshrq_n_s32 -; CHECK: sshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %vshr_n = ashr <4 x i32> %a, - ret <4 x i32> %vshr_n -} - -define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) { -; CHECK: test_vshrq_n_s64 -; CHECK: sshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %vshr_n = ashr <2 x i64> %a, - ret <2 x i64> %vshr_n -} - -define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) { -; CHECK: test_vshr_n_u8 -; CHECK: ushr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vshr_n = lshr <8 x i8> %a, - ret <8 x i8> %vshr_n -} - -define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) { -; CHECK: test_vshr_n_u16 -; CHECK: ushr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %vshr_n = lshr <4 x i16> %a, - ret <4 x i16> %vshr_n -} - -define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) { -; CHECK: test_vshr_n_u32 -; CHECK: ushr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %vshr_n = lshr <2 x i32> %a, - ret <2 x i32> %vshr_n -} - -define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) { -; CHECK: test_vshrq_n_u8 -; CHECK: ushr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vshr_n = lshr <16 x i8> %a, - ret <16 x i8> %vshr_n -} - -define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) { -; CHECK: test_vshrq_n_u16 -; CHECK: ushr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %vshr_n = lshr <8 x i16> %a, - ret <8 x i16> %vshr_n -} - -define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) { -; CHECK: test_vshrq_n_u32 -; CHECK: ushr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %vshr_n = lshr <4 x i32> %a, - ret <4 x i32> %vshr_n -} - -define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) { -; CHECK: test_vshrq_n_u64 -; CHECK: ushr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %vshr_n = lshr <2 x i64> %a, - ret <2 x i64> %vshr_n -} - -define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vsra_n_s8 -; CHECK: ssra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vsra_n = ashr <8 x i8> %b, - %1 = add <8 x i8> %vsra_n, %a - ret <8 x i8> %1 -} - -define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vsra_n_s16 -; CHECK: ssra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %vsra_n = ashr <4 x i16> %b, - %1 = add <4 x i16> %vsra_n, %a - ret <4 x i16> %1 -} - -define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vsra_n_s32 -; CHECK: ssra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %vsra_n = ashr <2 x i32> %b, - %1 = add <2 x i32> %vsra_n, %a - ret <2 x i32> %1 -} - -define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vsraq_n_s8 -; CHECK: ssra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vsra_n = ashr <16 x i8> %b, - %1 = add <16 x i8> %vsra_n, %a - ret <16 x i8> %1 -} - -define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vsraq_n_s16 -; CHECK: ssra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %vsra_n = ashr <8 x i16> %b, - %1 = add <8 x i16> %vsra_n, %a - ret <8 x i16> %1 -} - -define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vsraq_n_s32 -; CHECK: ssra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %vsra_n = ashr <4 x i32> %b, - %1 = add <4 x i32> %vsra_n, %a - ret <4 x i32> %1 -} - -define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vsraq_n_s64 -; CHECK: ssra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %vsra_n = ashr <2 x i64> %b, - %1 = add <2 x i64> %vsra_n, %a - ret <2 x i64> %1 -} - -define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vsra_n_u8 -; CHECK: usra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vsra_n = lshr <8 x i8> %b, - %1 = add <8 x i8> %vsra_n, %a - ret <8 x i8> %1 -} - -define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vsra_n_u16 -; CHECK: usra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %vsra_n = lshr <4 x i16> %b, - %1 = add <4 x i16> %vsra_n, %a - ret <4 x i16> %1 -} - -define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vsra_n_u32 -; CHECK: usra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %vsra_n = lshr <2 x i32> %b, - %1 = add <2 x i32> %vsra_n, %a - ret <2 x i32> %1 -} - -define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vsraq_n_u8 -; CHECK: usra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vsra_n = lshr <16 x i8> %b, - %1 = add <16 x i8> %vsra_n, %a - ret <16 x i8> %1 -} - -define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vsraq_n_u16 -; CHECK: usra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %vsra_n = lshr <8 x i16> %b, - %1 = add <8 x i16> %vsra_n, %a - ret <8 x i16> %1 -} - -define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vsraq_n_u32 -; CHECK: usra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %vsra_n = lshr <4 x i32> %b, - %1 = add <4 x i32> %vsra_n, %a - ret <4 x i32> %1 -} - -define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vsraq_n_u64 -; CHECK: usra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %vsra_n = lshr <2 x i64> %b, - %1 = add <2 x i64> %vsra_n, %a - ret <2 x i64> %1 -} - -define <8 x i8> @test_vrshr_n_s8(<8 x i8> %a) { -; CHECK: test_vrshr_n_s8 -; CHECK: srshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vrshr_n = tail call <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8> %a, i32 3) - ret <8 x i8> %vrshr_n -} - - -define <4 x i16> @test_vrshr_n_s16(<4 x i16> %a) { -; CHECK: test_vrshr_n_s16 -; CHECK: srshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %vrshr_n = tail call <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16> %a, i32 3) - ret <4 x i16> %vrshr_n -} - - -define <2 x i32> @test_vrshr_n_s32(<2 x i32> %a) { -; CHECK: test_vrshr_n_s32 -; CHECK: srshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %vrshr_n = tail call <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32> %a, i32 3) - ret <2 x i32> %vrshr_n -} - - -define <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) { -; CHECK: test_vrshrq_n_s8 -; CHECK: srshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vrshr_n = tail call <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8> %a, i32 3) - ret <16 x i8> %vrshr_n -} - - -define <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) { -; CHECK: test_vrshrq_n_s16 -; CHECK: srshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %vrshr_n = tail call <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16> %a, i32 3) - ret <8 x i16> %vrshr_n -} - - -define <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) { -; CHECK: test_vrshrq_n_s32 -; CHECK: srshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %vrshr_n = tail call <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32> %a, i32 3) - ret <4 x i32> %vrshr_n -} - - -define <2 x i64> @test_vrshrq_n_s64(<2 x i64> %a) { -; CHECK: test_vrshrq_n_s64 -; CHECK: srshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %vrshr_n = tail call <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64> %a, i32 3) - ret <2 x i64> %vrshr_n -} - - -define <8 x i8> @test_vrshr_n_u8(<8 x i8> %a) { -; CHECK: test_vrshr_n_u8 -; CHECK: urshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vrshr_n = tail call <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8> %a, i32 3) - ret <8 x i8> %vrshr_n -} - - -define <4 x i16> @test_vrshr_n_u16(<4 x i16> %a) { -; CHECK: test_vrshr_n_u16 -; CHECK: urshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %vrshr_n = tail call <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16> %a, i32 3) - ret <4 x i16> %vrshr_n -} - - -define <2 x i32> @test_vrshr_n_u32(<2 x i32> %a) { -; CHECK: test_vrshr_n_u32 -; CHECK: urshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %vrshr_n = tail call <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32> %a, i32 3) - ret <2 x i32> %vrshr_n -} - - -define <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) { -; CHECK: test_vrshrq_n_u8 -; CHECK: urshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vrshr_n = tail call <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8> %a, i32 3) - ret <16 x i8> %vrshr_n -} - - -define <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) { -; CHECK: test_vrshrq_n_u16 -; CHECK: urshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %vrshr_n = tail call <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16> %a, i32 3) - ret <8 x i16> %vrshr_n -} - - -define <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) { -; CHECK: test_vrshrq_n_u32 -; CHECK: urshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %vrshr_n = tail call <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32> %a, i32 3) - ret <4 x i32> %vrshr_n -} - - -define <2 x i64> @test_vrshrq_n_u64(<2 x i64> %a) { -; CHECK: test_vrshrq_n_u64 -; CHECK: urshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %vrshr_n = tail call <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64> %a, i32 3) - ret <2 x i64> %vrshr_n -} - - -define <8 x i8> @test_vrsra_n_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vrsra_n_s8 -; CHECK: srsra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %1 = tail call <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8> %b, i32 3) - %vrsra_n = add <8 x i8> %1, %a - ret <8 x i8> %vrsra_n -} - -define <4 x i16> @test_vrsra_n_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vrsra_n_s16 -; CHECK: srsra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %1 = tail call <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16> %b, i32 3) - %vrsra_n = add <4 x i16> %1, %a - ret <4 x i16> %vrsra_n -} - -define <2 x i32> @test_vrsra_n_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vrsra_n_s32 -; CHECK: srsra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %1 = tail call <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32> %b, i32 3) - %vrsra_n = add <2 x i32> %1, %a - ret <2 x i32> %vrsra_n -} - -define <16 x i8> @test_vrsraq_n_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vrsraq_n_s8 -; CHECK: srsra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %1 = tail call <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8> %b, i32 3) - %vrsra_n = add <16 x i8> %1, %a - ret <16 x i8> %vrsra_n -} - -define <8 x i16> @test_vrsraq_n_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vrsraq_n_s16 -; CHECK: srsra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %1 = tail call <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16> %b, i32 3) - %vrsra_n = add <8 x i16> %1, %a - ret <8 x i16> %vrsra_n -} - -define <4 x i32> @test_vrsraq_n_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vrsraq_n_s32 -; CHECK: srsra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %1 = tail call <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32> %b, i32 3) - %vrsra_n = add <4 x i32> %1, %a - ret <4 x i32> %vrsra_n -} - -define <2 x i64> @test_vrsraq_n_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vrsraq_n_s64 -; CHECK: srsra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %1 = tail call <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64> %b, i32 3) - %vrsra_n = add <2 x i64> %1, %a - ret <2 x i64> %vrsra_n -} - -define <8 x i8> @test_vrsra_n_u8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vrsra_n_u8 -; CHECK: ursra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %1 = tail call <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8> %b, i32 3) - %vrsra_n = add <8 x i8> %1, %a - ret <8 x i8> %vrsra_n -} - -define <4 x i16> @test_vrsra_n_u16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vrsra_n_u16 -; CHECK: ursra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %1 = tail call <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16> %b, i32 3) - %vrsra_n = add <4 x i16> %1, %a - ret <4 x i16> %vrsra_n -} - -define <2 x i32> @test_vrsra_n_u32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vrsra_n_u32 -; CHECK: ursra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %1 = tail call <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32> %b, i32 3) - %vrsra_n = add <2 x i32> %1, %a - ret <2 x i32> %vrsra_n -} - -define <16 x i8> @test_vrsraq_n_u8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vrsraq_n_u8 -; CHECK: ursra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %1 = tail call <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8> %b, i32 3) - %vrsra_n = add <16 x i8> %1, %a - ret <16 x i8> %vrsra_n -} - -define <8 x i16> @test_vrsraq_n_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vrsraq_n_u16 -; CHECK: ursra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %1 = tail call <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16> %b, i32 3) - %vrsra_n = add <8 x i16> %1, %a - ret <8 x i16> %vrsra_n -} - -define <4 x i32> @test_vrsraq_n_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vrsraq_n_u32 -; CHECK: ursra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %1 = tail call <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32> %b, i32 3) - %vrsra_n = add <4 x i32> %1, %a - ret <4 x i32> %vrsra_n -} - -define <2 x i64> @test_vrsraq_n_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vrsraq_n_u64 -; CHECK: ursra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %1 = tail call <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64> %b, i32 3) - %vrsra_n = add <2 x i64> %1, %a - ret <2 x i64> %vrsra_n -} - -define <8 x i8> @test_vsri_n_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vsri_n_s8 -; CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vsri_n = tail call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) - ret <8 x i8> %vsri_n -} - - -define <4 x i16> @test_vsri_n_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vsri_n_s16 -; CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %vsri = tail call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> %a, <4 x i16> %b, i32 3) - ret <4 x i16> %vsri -} - - -define <2 x i32> @test_vsri_n_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vsri_n_s32 -; CHECK: sri {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %vsri = tail call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> %a, <2 x i32> %b, i32 3) - ret <2 x i32> %vsri -} - - -define <16 x i8> @test_vsriq_n_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vsriq_n_s8 -; CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vsri_n = tail call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) - ret <16 x i8> %vsri_n -} - - -define <8 x i16> @test_vsriq_n_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vsriq_n_s16 -; CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %vsri = tail call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 3) - ret <8 x i16> %vsri -} - - -define <4 x i32> @test_vsriq_n_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vsriq_n_s32 -; CHECK: sri {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %vsri = tail call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> %a, <4 x i32> %b, i32 3) - ret <4 x i32> %vsri -} - - -define <2 x i64> @test_vsriq_n_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vsriq_n_s64 -; CHECK: sri {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %vsri = tail call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> %a, <2 x i64> %b, i32 3) - ret <2 x i64> %vsri -} - -define <8 x i8> @test_vsri_n_p8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vsri_n_p8 -; CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vsri_n = tail call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) - ret <8 x i8> %vsri_n -} - -define <4 x i16> @test_vsri_n_p16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vsri_n_p16 -; CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15 - %vsri = tail call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> %a, <4 x i16> %b, i32 15) - ret <4 x i16> %vsri -} - -define <16 x i8> @test_vsriq_n_p8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vsriq_n_p8 -; CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vsri_n = tail call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) - ret <16 x i8> %vsri_n -} - -define <8 x i16> @test_vsriq_n_p16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vsriq_n_p16 -; CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15 - %vsri = tail call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 15) - ret <8 x i16> %vsri -} - -define <8 x i8> @test_vsli_n_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vsli_n_s8 -; CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vsli_n = tail call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) - ret <8 x i8> %vsli_n -} - -define <4 x i16> @test_vsli_n_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vsli_n_s16 -; CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %vsli = tail call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %a, <4 x i16> %b, i32 3) - ret <4 x i16> %vsli -} - -define <2 x i32> @test_vsli_n_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vsli_n_s32 -; CHECK: sli {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %vsli = tail call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %a, <2 x i32> %b, i32 3) - ret <2 x i32> %vsli -} - -define <16 x i8> @test_vsliq_n_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vsliq_n_s8 -; CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vsli_n = tail call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) - ret <16 x i8> %vsli_n -} - -define <8 x i16> @test_vsliq_n_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vsliq_n_s16 -; CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %vsli = tail call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 3) - ret <8 x i16> %vsli -} - -define <4 x i32> @test_vsliq_n_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vsliq_n_s32 -; CHECK: sli {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %vsli = tail call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %a, <4 x i32> %b, i32 3) - ret <4 x i32> %vsli -} - -define <2 x i64> @test_vsliq_n_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vsliq_n_s64 -; CHECK: sli {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %vsli = tail call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %a, <2 x i64> %b, i32 3) - ret <2 x i64> %vsli -} - -define <8 x i8> @test_vsli_n_p8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vsli_n_p8 -; CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vsli_n = tail call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) - ret <8 x i8> %vsli_n -} - -define <4 x i16> @test_vsli_n_p16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vsli_n_p16 -; CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15 - %vsli = tail call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %a, <4 x i16> %b, i32 15) - ret <4 x i16> %vsli -} - -define <16 x i8> @test_vsliq_n_p8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vsliq_n_p8 -; CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vsli_n = tail call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) - ret <16 x i8> %vsli_n -} - -define <8 x i16> @test_vsliq_n_p16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vsliq_n_p16 -; CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15 - %vsli = tail call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 15) - ret <8 x i16> %vsli -} - -define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) { -; CHECK: test_vqshl_n_s8 -; CHECK: sqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vqshl = tail call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> ) - ret <8 x i8> %vqshl -} - - -define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) { -; CHECK: test_vqshl_n_s16 -; CHECK: sqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %vqshl = tail call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> ) - ret <4 x i16> %vqshl -} - - -define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) { -; CHECK: test_vqshl_n_s32 -; CHECK: sqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %vqshl = tail call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> ) - ret <2 x i32> %vqshl -} - - -define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) { -; CHECK: test_vqshlq_n_s8 -; CHECK: sqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> ) - ret <16 x i8> %vqshl_n -} - - -define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) { -; CHECK: test_vqshlq_n_s16 -; CHECK: sqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %vqshl = tail call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> ) - ret <8 x i16> %vqshl -} - - -define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) { -; CHECK: test_vqshlq_n_s32 -; CHECK: sqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %vqshl = tail call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> ) - ret <4 x i32> %vqshl -} - - -define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) { -; CHECK: test_vqshlq_n_s64 -; CHECK: sqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %vqshl = tail call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> ) - ret <2 x i64> %vqshl -} - - -define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) { -; CHECK: test_vqshl_n_u8 -; CHECK: uqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vqshl_n = tail call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> ) - ret <8 x i8> %vqshl_n -} - - -define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) { -; CHECK: test_vqshl_n_u16 -; CHECK: uqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %vqshl = tail call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> ) - ret <4 x i16> %vqshl -} - - -define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) { -; CHECK: test_vqshl_n_u32 -; CHECK: uqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %vqshl = tail call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> ) - ret <2 x i32> %vqshl -} - - -define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) { -; CHECK: test_vqshlq_n_u8 -; CHECK: uqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> ) - ret <16 x i8> %vqshl_n -} - - -define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) { -; CHECK: test_vqshlq_n_u16 -; CHECK: uqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %vqshl = tail call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> ) - ret <8 x i16> %vqshl -} - - -define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) { -; CHECK: test_vqshlq_n_u32 -; CHECK: uqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %vqshl = tail call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> ) - ret <4 x i32> %vqshl -} - - -define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) { -; CHECK: test_vqshlq_n_u64 -; CHECK: uqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %vqshl = tail call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> ) - ret <2 x i64> %vqshl -} - -define <8 x i8> @test_vqshlu_n_s8(<8 x i8> %a) { -; CHECK: test_vqshlu_n_s8 -; CHECK: sqshlu {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vqshlu = tail call <8 x i8> @llvm.aarch64.neon.vsqshlu.v8i8(<8 x i8> %a, i32 3) - ret <8 x i8> %vqshlu -} - - -define <4 x i16> @test_vqshlu_n_s16(<4 x i16> %a) { -; CHECK: test_vqshlu_n_s16 -; CHECK: sqshlu {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %vqshlu = tail call <4 x i16> @llvm.aarch64.neon.vsqshlu.v4i16(<4 x i16> %a, i32 3) - ret <4 x i16> %vqshlu -} - - -define <2 x i32> @test_vqshlu_n_s32(<2 x i32> %a) { -; CHECK: test_vqshlu_n_s32 -; CHECK: sqshlu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %vqshlu = tail call <2 x i32> @llvm.aarch64.neon.vsqshlu.v2i32(<2 x i32> %a, i32 3) - ret <2 x i32> %vqshlu -} - - -define <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) { -; CHECK: test_vqshluq_n_s8 -; CHECK: sqshlu {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vqshlu = tail call <16 x i8> @llvm.aarch64.neon.vsqshlu.v16i8(<16 x i8> %a, i32 3) - ret <16 x i8> %vqshlu -} - - -define <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) { -; CHECK: test_vqshluq_n_s16 -; CHECK: sqshlu {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %vqshlu = tail call <8 x i16> @llvm.aarch64.neon.vsqshlu.v8i16(<8 x i16> %a, i32 3) - ret <8 x i16> %vqshlu -} - - -define <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) { -; CHECK: test_vqshluq_n_s32 -; CHECK: sqshlu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %vqshlu = tail call <4 x i32> @llvm.aarch64.neon.vsqshlu.v4i32(<4 x i32> %a, i32 3) - ret <4 x i32> %vqshlu -} - - -define <2 x i64> @test_vqshluq_n_s64(<2 x i64> %a) { -; CHECK: test_vqshluq_n_s64 -; CHECK: sqshlu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %vqshlu = tail call <2 x i64> @llvm.aarch64.neon.vsqshlu.v2i64(<2 x i64> %a, i32 3) - ret <2 x i64> %vqshlu -} - - -define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) { -; CHECK: test_vshrn_n_s16 -; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 - %1 = ashr <8 x i16> %a, - %vshrn_n = trunc <8 x i16> %1 to <8 x i8> - ret <8 x i8> %vshrn_n -} - -define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) { -; CHECK: test_vshrn_n_s32 -; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 - %1 = ashr <4 x i32> %a, - %vshrn_n = trunc <4 x i32> %1 to <4 x i16> - ret <4 x i16> %vshrn_n -} - -define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) { -; CHECK: test_vshrn_n_s64 -; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 - %1 = ashr <2 x i64> %a, - %vshrn_n = trunc <2 x i64> %1 to <2 x i32> - ret <2 x i32> %vshrn_n -} - -define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) { -; CHECK: test_vshrn_n_u16 -; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 - %1 = lshr <8 x i16> %a, - %vshrn_n = trunc <8 x i16> %1 to <8 x i8> - ret <8 x i8> %vshrn_n -} - -define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) { -; CHECK: test_vshrn_n_u32 -; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 - %1 = lshr <4 x i32> %a, - %vshrn_n = trunc <4 x i32> %1 to <4 x i16> - ret <4 x i16> %vshrn_n -} - -define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) { -; CHECK: test_vshrn_n_u64 -; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 - %1 = lshr <2 x i64> %a, - %vshrn_n = trunc <2 x i64> %1 to <2 x i32> - ret <2 x i32> %vshrn_n -} - -define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { -; CHECK: test_vshrn_high_n_s16 -; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 - %1 = ashr <8 x i16> %b, - %vshrn_n = trunc <8 x i16> %1 to <8 x i8> - %2 = bitcast <8 x i8> %a to <1 x i64> - %3 = bitcast <8 x i8> %vshrn_n to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> - %4 = bitcast <2 x i64> %shuffle.i to <16 x i8> - ret <16 x i8> %4 -} - -define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) { -; CHECK: test_vshrn_high_n_s32 -; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 - %1 = ashr <4 x i32> %b, - %vshrn_n = trunc <4 x i32> %1 to <4 x i16> - %2 = bitcast <4 x i16> %a to <1 x i64> - %3 = bitcast <4 x i16> %vshrn_n to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> - %4 = bitcast <2 x i64> %shuffle.i to <8 x i16> - ret <8 x i16> %4 -} - -define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { -; CHECK: test_vshrn_high_n_s64 -; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 - %1 = bitcast <2 x i32> %a to <1 x i64> - %2 = ashr <2 x i64> %b, - %vshrn_n = trunc <2 x i64> %2 to <2 x i32> - %3 = bitcast <2 x i32> %vshrn_n to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> - %4 = bitcast <2 x i64> %shuffle.i to <4 x i32> - ret <4 x i32> %4 -} - -define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) { -; CHECK: test_vshrn_high_n_u16 -; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 - %1 = lshr <8 x i16> %b, - %vshrn_n = trunc <8 x i16> %1 to <8 x i8> - %2 = bitcast <8 x i8> %a to <1 x i64> - %3 = bitcast <8 x i8> %vshrn_n to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> - %4 = bitcast <2 x i64> %shuffle.i to <16 x i8> - ret <16 x i8> %4 -} - -define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) { -; CHECK: test_vshrn_high_n_u32 -; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 - %1 = lshr <4 x i32> %b, - %vshrn_n = trunc <4 x i32> %1 to <4 x i16> - %2 = bitcast <4 x i16> %a to <1 x i64> - %3 = bitcast <4 x i16> %vshrn_n to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> - %4 = bitcast <2 x i64> %shuffle.i to <8 x i16> - ret <8 x i16> %4 -} - -define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) { -; CHECK: test_vshrn_high_n_u64 -; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 - %1 = bitcast <2 x i32> %a to <1 x i64> - %2 = lshr <2 x i64> %b, - %vshrn_n = trunc <2 x i64> %2 to <2 x i32> - %3 = bitcast <2 x i32> %vshrn_n to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> - %4 = bitcast <2 x i64> %shuffle.i to <4 x i32> - ret <4 x i32> %4 -} - -define <8 x i8> @test_vqshrun_n_s16(<8 x i16> %a) { -; CHECK: test_vqshrun_n_s16 -; CHECK: sqshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 - %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16> %a, i32 3) - ret <8 x i8> %vqshrun -} - - -define <4 x i16> @test_vqshrun_n_s32(<4 x i32> %a) { -; CHECK: test_vqshrun_n_s32 -; CHECK: sqshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 - %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32> %a, i32 9) - ret <4 x i16> %vqshrun -} - -define <2 x i32> @test_vqshrun_n_s64(<2 x i64> %a) { -; CHECK: test_vqshrun_n_s64 -; CHECK: sqshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 - %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64> %a, i32 19) - ret <2 x i32> %vqshrun -} - -define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) { -; CHECK: test_vqshrun_high_n_s16 -; CHECK: sqshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 - %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16> %b, i32 3) - %1 = bitcast <8 x i8> %a to <1 x i64> - %2 = bitcast <8 x i8> %vqshrun to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> - %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> - ret <16 x i8> %3 -} - -define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) { -; CHECK: test_vqshrun_high_n_s32 -; CHECK: sqshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 - %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32> %b, i32 9) - %1 = bitcast <4 x i16> %a to <1 x i64> - %2 = bitcast <4 x i16> %vqshrun to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> - %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> - ret <8 x i16> %3 -} - -define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) { -; CHECK: test_vqshrun_high_n_s64 -; CHECK: sqshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 - %1 = bitcast <2 x i32> %a to <1 x i64> - %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64> %b, i32 19) - %2 = bitcast <2 x i32> %vqshrun to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> - %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> - ret <4 x i32> %3 -} - -define <8 x i8> @test_vrshrn_n_s16(<8 x i16> %a) { -; CHECK: test_vrshrn_n_s16 -; CHECK: rshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 - %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16> %a, i32 3) - ret <8 x i8> %vrshrn -} - - -define <4 x i16> @test_vrshrn_n_s32(<4 x i32> %a) { -; CHECK: test_vrshrn_n_s32 -; CHECK: rshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 - %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32> %a, i32 9) - ret <4 x i16> %vrshrn -} - - -define <2 x i32> @test_vrshrn_n_s64(<2 x i64> %a) { -; CHECK: test_vrshrn_n_s64 -; CHECK: rshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 - %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64> %a, i32 19) - ret <2 x i32> %vrshrn -} - -define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { -; CHECK: test_vrshrn_high_n_s16 -; CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 - %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16> %b, i32 3) - %1 = bitcast <8 x i8> %a to <1 x i64> - %2 = bitcast <8 x i8> %vrshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> - %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> - ret <16 x i8> %3 -} - -define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) { -; CHECK: test_vrshrn_high_n_s32 -; CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 - %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32> %b, i32 9) - %1 = bitcast <4 x i16> %a to <1 x i64> - %2 = bitcast <4 x i16> %vrshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> - %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> - ret <8 x i16> %3 -} - -define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { -; CHECK: test_vrshrn_high_n_s64 -; CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 - %1 = bitcast <2 x i32> %a to <1 x i64> - %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64> %b, i32 19) - %2 = bitcast <2 x i32> %vrshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> - %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> - ret <4 x i32> %3 -} - -define <8 x i8> @test_vqrshrun_n_s16(<8 x i16> %a) { -; CHECK: test_vqrshrun_n_s16 -; CHECK: sqrshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 - %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16> %a, i32 3) - ret <8 x i8> %vqrshrun -} - -define <4 x i16> @test_vqrshrun_n_s32(<4 x i32> %a) { -; CHECK: test_vqrshrun_n_s32 -; CHECK: sqrshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 - %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32> %a, i32 9) - ret <4 x i16> %vqrshrun -} - -define <2 x i32> @test_vqrshrun_n_s64(<2 x i64> %a) { -; CHECK: test_vqrshrun_n_s64 -; CHECK: sqrshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 - %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64> %a, i32 19) - ret <2 x i32> %vqrshrun -} - -define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) { -; CHECK: test_vqrshrun_high_n_s16 -; CHECK: sqrshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 - %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16> %b, i32 3) - %1 = bitcast <8 x i8> %a to <1 x i64> - %2 = bitcast <8 x i8> %vqrshrun to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> - %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> - ret <16 x i8> %3 -} - -define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) { -; CHECK: test_vqrshrun_high_n_s32 -; CHECK: sqrshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 - %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32> %b, i32 9) - %1 = bitcast <4 x i16> %a to <1 x i64> - %2 = bitcast <4 x i16> %vqrshrun to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> - %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> - ret <8 x i16> %3 -} - -define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) { -; CHECK: test_vqrshrun_high_n_s64 -; CHECK: sqrshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 - %1 = bitcast <2 x i32> %a to <1 x i64> - %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64> %b, i32 19) - %2 = bitcast <2 x i32> %vqrshrun to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> - %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> - ret <4 x i32> %3 -} - -define <8 x i8> @test_vqshrn_n_s16(<8 x i16> %a) { -; CHECK: test_vqshrn_n_s16 -; CHECK: sqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 - %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16> %a, i32 3) - ret <8 x i8> %vqshrn -} - - -define <4 x i16> @test_vqshrn_n_s32(<4 x i32> %a) { -; CHECK: test_vqshrn_n_s32 -; CHECK: sqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 - %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32> %a, i32 9) - ret <4 x i16> %vqshrn -} - - -define <2 x i32> @test_vqshrn_n_s64(<2 x i64> %a) { -; CHECK: test_vqshrn_n_s64 -; CHECK: sqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 - %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64> %a, i32 19) - ret <2 x i32> %vqshrn -} - - -define <8 x i8> @test_vqshrn_n_u16(<8 x i16> %a) { -; CHECK: test_vqshrn_n_u16 -; CHECK: uqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 - %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16> %a, i32 3) - ret <8 x i8> %vqshrn -} - - -define <4 x i16> @test_vqshrn_n_u32(<4 x i32> %a) { -; CHECK: test_vqshrn_n_u32 -; CHECK: uqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 - %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32> %a, i32 9) - ret <4 x i16> %vqshrn -} - - -define <2 x i32> @test_vqshrn_n_u64(<2 x i64> %a) { -; CHECK: test_vqshrn_n_u64 -; CHECK: uqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 - %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64> %a, i32 19) - ret <2 x i32> %vqshrn -} - - -define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { -; CHECK: test_vqshrn_high_n_s16 -; CHECK: sqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 - %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16> %b, i32 3) - %1 = bitcast <8 x i8> %a to <1 x i64> - %2 = bitcast <8 x i8> %vqshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> - %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> - ret <16 x i8> %3 -} - -define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) { -; CHECK: test_vqshrn_high_n_s32 -; CHECK: sqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 - %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32> %b, i32 9) - %1 = bitcast <4 x i16> %a to <1 x i64> - %2 = bitcast <4 x i16> %vqshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> - %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> - ret <8 x i16> %3 -} - -define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { -; CHECK: test_vqshrn_high_n_s64 -; CHECK: sqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 - %1 = bitcast <2 x i32> %a to <1 x i64> - %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64> %b, i32 19) - %2 = bitcast <2 x i32> %vqshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> - %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> - ret <4 x i32> %3 -} - -define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) { -; CHECK: test_vqshrn_high_n_u16 -; CHECK: uqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 - %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16> %b, i32 3) - %1 = bitcast <8 x i8> %a to <1 x i64> - %2 = bitcast <8 x i8> %vqshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> - %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> - ret <16 x i8> %3 -} - -define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) { -; CHECK: test_vqshrn_high_n_u32 -; CHECK: uqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 - %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32> %b, i32 9) - %1 = bitcast <4 x i16> %a to <1 x i64> - %2 = bitcast <4 x i16> %vqshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> - %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> - ret <8 x i16> %3 -} - -define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) { -; CHECK: test_vqshrn_high_n_u64 -; CHECK: uqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 - %1 = bitcast <2 x i32> %a to <1 x i64> - %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64> %b, i32 19) - %2 = bitcast <2 x i32> %vqshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> - %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> - ret <4 x i32> %3 -} - -define <8 x i8> @test_vqrshrn_n_s16(<8 x i16> %a) { -; CHECK: test_vqrshrn_n_s16 -; CHECK: sqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 - %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16> %a, i32 3) - ret <8 x i8> %vqrshrn -} - - -define <4 x i16> @test_vqrshrn_n_s32(<4 x i32> %a) { -; CHECK: test_vqrshrn_n_s32 -; CHECK: sqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 - %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32> %a, i32 9) - ret <4 x i16> %vqrshrn -} - - -define <2 x i32> @test_vqrshrn_n_s64(<2 x i64> %a) { -; CHECK: test_vqrshrn_n_s64 -; CHECK: sqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 - %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64> %a, i32 19) - ret <2 x i32> %vqrshrn -} - - -define <8 x i8> @test_vqrshrn_n_u16(<8 x i16> %a) { -; CHECK: test_vqrshrn_n_u16 -; CHECK: uqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 - %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16> %a, i32 3) - ret <8 x i8> %vqrshrn -} - - -define <4 x i16> @test_vqrshrn_n_u32(<4 x i32> %a) { -; CHECK: test_vqrshrn_n_u32 -; CHECK: uqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 - %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32> %a, i32 9) - ret <4 x i16> %vqrshrn -} - - -define <2 x i32> @test_vqrshrn_n_u64(<2 x i64> %a) { -; CHECK: test_vqrshrn_n_u64 -; CHECK: uqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 - %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64> %a, i32 19) - ret <2 x i32> %vqrshrn -} - - -define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { -; CHECK: test_vqrshrn_high_n_s16 -; CHECK: sqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 - %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16> %b, i32 3) - %1 = bitcast <8 x i8> %a to <1 x i64> - %2 = bitcast <8 x i8> %vqrshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> - %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> - ret <16 x i8> %3 -} - -define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) { -; CHECK: test_vqrshrn_high_n_s32 -; CHECK: sqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 - %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32> %b, i32 9) - %1 = bitcast <4 x i16> %a to <1 x i64> - %2 = bitcast <4 x i16> %vqrshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> - %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> - ret <8 x i16> %3 -} - -define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { -; CHECK: test_vqrshrn_high_n_s64 -; CHECK: sqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 - %1 = bitcast <2 x i32> %a to <1 x i64> - %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64> %b, i32 19) - %2 = bitcast <2 x i32> %vqrshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> - %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> - ret <4 x i32> %3 -} - -define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) { -; CHECK: test_vqrshrn_high_n_u16 -; CHECK: uqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 - %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16> %b, i32 3) - %1 = bitcast <8 x i8> %a to <1 x i64> - %2 = bitcast <8 x i8> %vqrshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> - %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> - ret <16 x i8> %3 -} - -define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) { -; CHECK: test_vqrshrn_high_n_u32 -; CHECK: uqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 - %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32> %b, i32 9) - %1 = bitcast <4 x i16> %a to <1 x i64> - %2 = bitcast <4 x i16> %vqrshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> - %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> - ret <8 x i16> %3 -} - -define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) { -; CHECK: test_vqrshrn_high_n_u64 -; CHECK: uqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 - %1 = bitcast <2 x i32> %a to <1 x i64> - %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64> %b, i32 19) - %2 = bitcast <2 x i32> %vqrshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> - %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> - ret <4 x i32> %3 -} - -define <2 x float> @test_vcvt_n_f32_s32(<2 x i32> %a) { -; CHECK: test_vcvt_n_f32_s32 -; CHECK: scvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31 - %vcvt = tail call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %a, i32 31) - ret <2 x float> %vcvt -} - -define <4 x float> @test_vcvtq_n_f32_s32(<4 x i32> %a) { -; CHECK: test_vcvtq_n_f32_s32 -; CHECK: scvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31 - %vcvt = tail call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %a, i32 31) - ret <4 x float> %vcvt -} - -define <2 x double> @test_vcvtq_n_f64_s64(<2 x i64> %a) { -; CHECK: test_vcvtq_n_f64_s64 -; CHECK: scvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50 - %vcvt = tail call <2 x double> @llvm.arm.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> %a, i32 50) - ret <2 x double> %vcvt -} - -define <2 x float> @test_vcvt_n_f32_u32(<2 x i32> %a) { -; CHECK: test_vcvt_n_f32_u32 -; CHECK: ucvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31 - %vcvt = tail call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %a, i32 31) - ret <2 x float> %vcvt -} - -define <4 x float> @test_vcvtq_n_f32_u32(<4 x i32> %a) { -; CHECK: test_vcvtq_n_f32_u32 -; CHECK: ucvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31 - %vcvt = tail call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %a, i32 31) - ret <4 x float> %vcvt -} - -define <2 x double> @test_vcvtq_n_f64_u64(<2 x i64> %a) { -; CHECK: test_vcvtq_n_f64_u64 -; CHECK: ucvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50 - %vcvt = tail call <2 x double> @llvm.arm.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> %a, i32 50) - ret <2 x double> %vcvt -} - -define <2 x i32> @test_vcvt_n_s32_f32(<2 x float> %a) { -; CHECK: test_vcvt_n_s32_f32 -; CHECK: fcvtzs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31 - %vcvt = tail call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %a, i32 31) - ret <2 x i32> %vcvt -} - -define <4 x i32> @test_vcvtq_n_s32_f32(<4 x float> %a) { -; CHECK: test_vcvtq_n_s32_f32 -; CHECK: fcvtzs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31 - %vcvt = tail call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %a, i32 31) - ret <4 x i32> %vcvt -} - -define <2 x i64> @test_vcvtq_n_s64_f64(<2 x double> %a) { -; CHECK: test_vcvtq_n_s64_f64 -; CHECK: fcvtzs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50 - %vcvt = tail call <2 x i64> @llvm.arm.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> %a, i32 50) - ret <2 x i64> %vcvt -} - -define <2 x i32> @test_vcvt_n_u32_f32(<2 x float> %a) { -; CHECK: test_vcvt_n_u32_f32 -; CHECK: fcvtzu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31 - %vcvt = tail call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %a, i32 31) - ret <2 x i32> %vcvt -} - -define <4 x i32> @test_vcvtq_n_u32_f32(<4 x float> %a) { -; CHECK: test_vcvt_n_u32_f32 -; CHECK: fcvtzu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31 - %vcvt = tail call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %a, i32 31) - ret <4 x i32> %vcvt -} - -define <2 x i64> @test_vcvtq_n_u64_f64(<2 x double> %a) { -; CHECK: test_vcvtq_n_u64_f64 -; CHECK: fcvtzu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50 - %vcvt = tail call <2 x i64> @llvm.arm.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> %a, i32 50) - ret <2 x i64> %vcvt -} - -declare <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8>, i32) - -declare <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16>, i32) - -declare <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32>, i32) - -declare <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8>, i32) - -declare <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16>, i32) - -declare <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32>, i32) - -declare <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64>, i32) - -declare <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8>, i32) - -declare <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16>, i32) - -declare <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32>, i32) - -declare <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8>, i32) - -declare <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16>, i32) - -declare <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32>, i32) - -declare <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64>, i32) - -declare <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8>, <8 x i8>, i32) - -declare <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16>, <4 x i16>, i32) - -declare <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32>, <2 x i32>, i32) - -declare <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8>, <16 x i8>, i32) - -declare <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16>, <8 x i16>, i32) - -declare <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32>, <4 x i32>, i32) - -declare <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64>, <2 x i64>, i32) - -declare <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32) - -declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32) - -declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32) - -declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32) - -declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) - -declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) - -declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) - -declare <8 x i8> @llvm.aarch64.neon.vsqshlu.v8i8(<8 x i8>, i32) - -declare <4 x i16> @llvm.aarch64.neon.vsqshlu.v4i16(<4 x i16>, i32) - -declare <2 x i32> @llvm.aarch64.neon.vsqshlu.v2i32(<2 x i32>, i32) - -declare <16 x i8> @llvm.aarch64.neon.vsqshlu.v16i8(<16 x i8>, i32) - -declare <8 x i16> @llvm.aarch64.neon.vsqshlu.v8i16(<8 x i16>, i32) - -declare <4 x i32> @llvm.aarch64.neon.vsqshlu.v4i32(<4 x i32>, i32) - -declare <2 x i64> @llvm.aarch64.neon.vsqshlu.v2i64(<2 x i64>, i32) - -declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>) - -declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>) - -declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>) - -declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>) - -declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>) - -declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>) - -declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>) - -declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>) - -declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>) - -declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>) - -declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>) - -declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>) - -declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>) - -declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>) - -declare <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16>, i32) - -declare <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32>, i32) - -declare <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64>, i32) - -declare <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16>, i32) - -declare <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32>, i32) - -declare <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64>, i32) - -declare <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16>, i32) - -declare <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32>, i32) - -declare <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64>, i32) - -declare <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16>, i32) - -declare <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32>, i32) - -declare <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64>, i32) - -declare <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16>, i32) - -declare <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32>, i32) - -declare <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64>, i32) - -declare <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16>, i32) - -declare <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32>, i32) - -declare <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64>, i32) - -declare <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16>, i32) - -declare <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32>, i32) - -declare <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64>, i32) - -declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) - -declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) - -declare <2 x double> @llvm.arm.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32) - -declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) - -declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) - -declare <2 x double> @llvm.arm.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32) - -declare <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) - -declare <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) - -declare <2 x i64> @llvm.arm.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32) - -declare <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) - -declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) - -declare <2 x i64> @llvm.arm.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32) - -define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvt_n_s64_f64 -; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}, #64 - %1 = tail call <1 x i64> @llvm.arm.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> %a, i32 64) - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvt_n_u64_f64 -; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}, #64 - %1 = tail call <1 x i64> @llvm.arm.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> %a, i32 64) - ret <1 x i64> %1 -} - -define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) { -; CHECK-LABEL: test_vcvt_n_f64_s64 -; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}, #64 - %1 = tail call <1 x double> @llvm.arm.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> %a, i32 64) - ret <1 x double> %1 -} - -define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) { -; CHECK-LABEL: test_vcvt_n_f64_u64 -; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}, #64 - %1 = tail call <1 x double> @llvm.arm.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> %a, i32 64) - ret <1 x double> %1 -} - -declare <1 x i64> @llvm.arm.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double>, i32) -declare <1 x i64> @llvm.arm.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double>, i32) -declare <1 x double> @llvm.arm.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64>, i32) -declare <1 x double> @llvm.arm.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64>, i32) diff --git a/test/CodeGen/AArch64/neon-simd-tbl.ll b/test/CodeGen/AArch64/neon-simd-tbl.ll deleted file mode 100644 index 53924923f795..000000000000 --- a/test/CodeGen/AArch64/neon-simd-tbl.ll +++ /dev/null @@ -1,829 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; This test is just intrinsic pumping. arm64 has its own tbl/tbx tests. - -declare <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) - -declare <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) - -declare <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) - -declare <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) - -declare <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) - -declare <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) - -declare <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) - -declare <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) - -declare <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) - -declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) - -declare <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8>, <8 x i8>) - -declare <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) - -declare <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) - -declare <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) - -declare <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8(<16 x i8>, <16 x i8>) - -declare <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) - -declare <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) - -define <8 x i8> @test_vtbl1_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vtbl1_s8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> - %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %b) - ret <8 x i8> %vtbl11.i -} - -define <8 x i8> @test_vqtbl1_s8(<16 x i8> %a, <8 x i8> %b) { -; CHECK: test_vqtbl1_s8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %a, <8 x i8> %b) - ret <8 x i8> %vtbl1.i -} - -define <8 x i8> @test_vtbl2_s8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vtbl2_s8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1 - %vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> - %vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %b) - ret <8 x i8> %vtbl17.i -} - -define <8 x i8> @test_vqtbl2_s8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vqtbl2_s8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1 - %vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b) - ret <8 x i8> %vtbl2.i -} - -define <8 x i8> @test_vtbl3_s8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vtbl3_s8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2 - %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> - %vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> - %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b) - ret <8 x i8> %vtbl212.i -} - -define <8 x i8> @test_vqtbl3_s8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vqtbl3_s8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2 - %vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b) - ret <8 x i8> %vtbl3.i -} - -define <8 x i8> @test_vtbl4_s8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vtbl4_s8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 2 - %__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3 - %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> - %vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> - %vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b) - ret <8 x i8> %vtbl216.i -} - -define <8 x i8> @test_vqtbl4_s8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vqtbl4_s8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2 - %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3 - %vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b) - ret <8 x i8> %vtbl4.i -} - -define <16 x i8> @test_vqtbl1q_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vqtbl1q_s8: -; CHECK: tbl {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8(<16 x i8> %a, <16 x i8> %b) - ret <16 x i8> %vtbl1.i -} - -define <16 x i8> @test_vqtbl2q_s8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) { -; CHECK: test_vqtbl2q_s8: -; CHECK: tbl {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1 - %vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b) - ret <16 x i8> %vtbl2.i -} - -define <16 x i8> @test_vqtbl3q_s8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) { -; CHECK: test_vqtbl3q_s8: -; CHECK: tbl {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2 - %vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b) - ret <16 x i8> %vtbl3.i -} - -define <16 x i8> @test_vqtbl4q_s8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) { -; CHECK: test_vqtbl4q_s8: -; CHECK: tbl {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2 - %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3 - %vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b) - ret <16 x i8> %vtbl4.i -} - -define <8 x i8> @test_vtbx1_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { -; CHECK: test_vtbx1_s8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> - %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %c) - %0 = icmp uge <8 x i8> %c, - %1 = sext <8 x i1> %0 to <8 x i8> - %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i) - ret <8 x i8> %vbsl.i -} - -define <8 x i8> @test_vtbx2_s8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vtbx2_s8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1 - %vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> - %vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c) - ret <8 x i8> %vtbx17.i -} - -define <8 x i8> @test_vtbx3_s8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vtbx3_s8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2 - %vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> - %vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> - %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c) - %0 = icmp uge <8 x i8> %c, - %1 = sext <8 x i1> %0 to <8 x i8> - %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i) - ret <8 x i8> %vbsl.i -} - -define <8 x i8> @test_vtbx4_s8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vtbx4_s8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 2 - %__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3 - %vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> - %vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> - %vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c) - ret <8 x i8> %vtbx216.i -} - -define <8 x i8> @test_vqtbx1_s8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) { -; CHECK: test_vqtbx1_s8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) - ret <8 x i8> %vtbx1.i -} - -define <8 x i8> @test_vqtbx2_s8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vqtbx2_s8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1 - %vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c) - ret <8 x i8> %vtbx2.i -} - -define <8 x i8> @test_vqtbx3_s8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vqtbx3_s8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2 - %vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c) - ret <8 x i8> %vtbx3.i -} - -define <8 x i8> @test_vqtbx4_s8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vqtbx4_s8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2 - %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3 - %vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c) - ret <8 x i8> %vtbx4.i -} - -define <16 x i8> @test_vqtbx1q_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { -; CHECK: test_vqtbx1q_s8: -; CHECK: tbx {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) - ret <16 x i8> %vtbx1.i -} - -define <16 x i8> @test_vqtbx2q_s8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) { -; CHECK: test_vqtbx2q_s8: -; CHECK: tbx {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1 - %vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c) - ret <16 x i8> %vtbx2.i -} - -define <16 x i8> @test_vqtbx3q_s8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) { -; CHECK: test_vqtbx3q_s8: -; CHECK: tbx {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2 - %vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c) - ret <16 x i8> %vtbx3.i -} - -define <16 x i8> @test_vqtbx4q_s8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) { -; CHECK: test_vqtbx4q_s8: -; CHECK: tbx {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2 - %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3 - %vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c) - ret <16 x i8> %vtbx4.i -} - -define <8 x i8> @test_vtbl1_u8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vtbl1_u8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> - %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %b) - ret <8 x i8> %vtbl11.i -} - -define <8 x i8> @test_vqtbl1_u8(<16 x i8> %a, <8 x i8> %b) { -; CHECK: test_vqtbl1_u8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %a, <8 x i8> %b) - ret <8 x i8> %vtbl1.i -} - -define <8 x i8> @test_vtbl2_u8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vtbl2_u8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1 - %vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> - %vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %b) - ret <8 x i8> %vtbl17.i -} - -define <8 x i8> @test_vqtbl2_u8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vqtbl2_u8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1 - %vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b) - ret <8 x i8> %vtbl2.i -} - -define <8 x i8> @test_vtbl3_u8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vtbl3_u8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2 - %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> - %vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> - %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b) - ret <8 x i8> %vtbl212.i -} - -define <8 x i8> @test_vqtbl3_u8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vqtbl3_u8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2 - %vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b) - ret <8 x i8> %vtbl3.i -} - -define <8 x i8> @test_vtbl4_u8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vtbl4_u8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 2 - %__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3 - %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> - %vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> - %vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b) - ret <8 x i8> %vtbl216.i -} - -define <8 x i8> @test_vqtbl4_u8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vqtbl4_u8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2 - %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3 - %vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b) - ret <8 x i8> %vtbl4.i -} - -define <16 x i8> @test_vqtbl1q_u8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vqtbl1q_u8: -; CHECK: tbl {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8(<16 x i8> %a, <16 x i8> %b) - ret <16 x i8> %vtbl1.i -} - -define <16 x i8> @test_vqtbl2q_u8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) { -; CHECK: test_vqtbl2q_u8: -; CHECK: tbl {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1 - %vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b) - ret <16 x i8> %vtbl2.i -} - -define <16 x i8> @test_vqtbl3q_u8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) { -; CHECK: test_vqtbl3q_u8: -; CHECK: tbl {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2 - %vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b) - ret <16 x i8> %vtbl3.i -} - -define <16 x i8> @test_vqtbl4q_u8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) { -; CHECK: test_vqtbl4q_u8: -; CHECK: tbl {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2 - %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3 - %vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b) - ret <16 x i8> %vtbl4.i -} - -define <8 x i8> @test_vtbx1_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { -; CHECK: test_vtbx1_u8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> - %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %c) - %0 = icmp uge <8 x i8> %c, - %1 = sext <8 x i1> %0 to <8 x i8> - %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i) - ret <8 x i8> %vbsl.i -} - -define <8 x i8> @test_vtbx2_u8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vtbx2_u8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1 - %vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> - %vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c) - ret <8 x i8> %vtbx17.i -} - -define <8 x i8> @test_vtbx3_u8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vtbx3_u8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2 - %vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> - %vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> - %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c) - %0 = icmp uge <8 x i8> %c, - %1 = sext <8 x i1> %0 to <8 x i8> - %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i) - ret <8 x i8> %vbsl.i -} - -define <8 x i8> @test_vtbx4_u8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vtbx4_u8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 2 - %__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3 - %vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> - %vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> - %vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c) - ret <8 x i8> %vtbx216.i -} - -define <8 x i8> @test_vqtbx1_u8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) { -; CHECK: test_vqtbx1_u8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) - ret <8 x i8> %vtbx1.i -} - -define <8 x i8> @test_vqtbx2_u8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vqtbx2_u8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1 - %vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c) - ret <8 x i8> %vtbx2.i -} - -define <8 x i8> @test_vqtbx3_u8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vqtbx3_u8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2 - %vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c) - ret <8 x i8> %vtbx3.i -} - -define <8 x i8> @test_vqtbx4_u8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vqtbx4_u8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2 - %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3 - %vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c) - ret <8 x i8> %vtbx4.i -} - -define <16 x i8> @test_vqtbx1q_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { -; CHECK: test_vqtbx1q_u8: -; CHECK: tbx {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) - ret <16 x i8> %vtbx1.i -} - -define <16 x i8> @test_vqtbx2q_u8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) { -; CHECK: test_vqtbx2q_u8: -; CHECK: tbx {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1 - %vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c) - ret <16 x i8> %vtbx2.i -} - -define <16 x i8> @test_vqtbx3q_u8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) { -; CHECK: test_vqtbx3q_u8: -; CHECK: tbx {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2 - %vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c) - ret <16 x i8> %vtbx3.i -} - -define <16 x i8> @test_vqtbx4q_u8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) { -; CHECK: test_vqtbx4q_u8: -; CHECK: tbx {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2 - %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3 - %vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c) - ret <16 x i8> %vtbx4.i -} - -define <8 x i8> @test_vtbl1_p8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vtbl1_p8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> - %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %b) - ret <8 x i8> %vtbl11.i -} - -define <8 x i8> @test_vqtbl1_p8(<16 x i8> %a, <8 x i8> %b) { -; CHECK: test_vqtbl1_p8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %a, <8 x i8> %b) - ret <8 x i8> %vtbl1.i -} - -define <8 x i8> @test_vtbl2_p8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vtbl2_p8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1 - %vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> - %vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %b) - ret <8 x i8> %vtbl17.i -} - -define <8 x i8> @test_vqtbl2_p8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vqtbl2_p8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1 - %vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b) - ret <8 x i8> %vtbl2.i -} - -define <8 x i8> @test_vtbl3_p8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vtbl3_p8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2 - %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> - %vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> - %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b) - ret <8 x i8> %vtbl212.i -} - -define <8 x i8> @test_vqtbl3_p8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vqtbl3_p8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2 - %vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b) - ret <8 x i8> %vtbl3.i -} - -define <8 x i8> @test_vtbl4_p8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vtbl4_p8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 2 - %__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3 - %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> - %vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> - %vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b) - ret <8 x i8> %vtbl216.i -} - -define <8 x i8> @test_vqtbl4_p8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vqtbl4_p8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2 - %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3 - %vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b) - ret <8 x i8> %vtbl4.i -} - -define <16 x i8> @test_vqtbl1q_p8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vqtbl1q_p8: -; CHECK: tbl {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8(<16 x i8> %a, <16 x i8> %b) - ret <16 x i8> %vtbl1.i -} - -define <16 x i8> @test_vqtbl2q_p8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) { -; CHECK: test_vqtbl2q_p8: -; CHECK: tbl {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1 - %vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b) - ret <16 x i8> %vtbl2.i -} - -define <16 x i8> @test_vqtbl3q_p8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) { -; CHECK: test_vqtbl3q_p8: -; CHECK: tbl {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2 - %vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b) - ret <16 x i8> %vtbl3.i -} - -define <16 x i8> @test_vqtbl4q_p8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) { -; CHECK: test_vqtbl4q_p8: -; CHECK: tbl {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2 - %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3 - %vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b) - ret <16 x i8> %vtbl4.i -} - -define <8 x i8> @test_vtbx1_p8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { -; CHECK: test_vtbx1_p8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> - %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %c) - %0 = icmp uge <8 x i8> %c, - %1 = sext <8 x i1> %0 to <8 x i8> - %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i) - ret <8 x i8> %vbsl.i -} - -define <8 x i8> @test_vtbx2_p8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vtbx2_p8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1 - %vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> - %vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c) - ret <8 x i8> %vtbx17.i -} - -define <8 x i8> @test_vtbx3_p8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vtbx3_p8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2 - %vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> - %vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> - %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c) - %0 = icmp uge <8 x i8> %c, - %1 = sext <8 x i1> %0 to <8 x i8> - %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i) - ret <8 x i8> %vbsl.i -} - -define <8 x i8> @test_vtbx4_p8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vtbx4_p8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 2 - %__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3 - %vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> - %vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> - %vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c) - ret <8 x i8> %vtbx216.i -} - -define <8 x i8> @test_vqtbx1_p8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) { -; CHECK: test_vqtbx1_p8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) - ret <8 x i8> %vtbx1.i -} - -define <8 x i8> @test_vqtbx2_p8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vqtbx2_p8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1 - %vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c) - ret <8 x i8> %vtbx2.i -} - -define <8 x i8> @test_vqtbx3_p8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vqtbx3_p8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2 - %vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c) - ret <8 x i8> %vtbx3.i -} - -define <8 x i8> @test_vqtbx4_p8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vqtbx4_p8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2 - %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3 - %vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c) - ret <8 x i8> %vtbx4.i -} - -define <16 x i8> @test_vqtbx1q_p8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { -; CHECK: test_vqtbx1q_p8: -; CHECK: tbx {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) - ret <16 x i8> %vtbx1.i -} - -define <16 x i8> @test_vqtbx2q_p8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) { -; CHECK: test_vqtbx2q_p8: -; CHECK: tbx {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1 - %vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c) - ret <16 x i8> %vtbx2.i -} - -define <16 x i8> @test_vqtbx3q_p8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) { -; CHECK: test_vqtbx3q_p8: -; CHECK: tbx {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2 - %vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c) - ret <16 x i8> %vtbx3.i -} - -define <16 x i8> @test_vqtbx4q_p8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) { -; CHECK: test_vqtbx4q_p8: -; CHECK: tbx {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2 - %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3 - %vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c) - ret <16 x i8> %vtbx4.i -} - diff --git a/test/CodeGen/AArch64/neon-simd-vget.ll b/test/CodeGen/AArch64/neon-simd-vget.ll deleted file mode 100644 index 93d5e2ad3455..000000000000 --- a/test/CodeGen/AArch64/neon-simd-vget.ll +++ /dev/null @@ -1,226 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has its own copy: aarch64-neon-simd-vget.ll - -define <8 x i8> @test_vget_high_s8(<16 x i8> %a) { -; CHECK-LABEL: test_vget_high_s8: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> - ret <8 x i8> %shuffle.i -} - -define <4 x i16> @test_vget_high_s16(<8 x i16> %a) { -; CHECK-LABEL: test_vget_high_s16: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - ret <4 x i16> %shuffle.i -} - -define <2 x i32> @test_vget_high_s32(<4 x i32> %a) { -; CHECK-LABEL: test_vget_high_s32: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - ret <2 x i32> %shuffle.i -} - -define <1 x i64> @test_vget_high_s64(<2 x i64> %a) { -; CHECK-LABEL: test_vget_high_s64: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> - ret <1 x i64> %shuffle.i -} - -define <8 x i8> @test_vget_high_u8(<16 x i8> %a) { -; CHECK-LABEL: test_vget_high_u8: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> - ret <8 x i8> %shuffle.i -} - -define <4 x i16> @test_vget_high_u16(<8 x i16> %a) { -; CHECK-LABEL: test_vget_high_u16: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - ret <4 x i16> %shuffle.i -} - -define <2 x i32> @test_vget_high_u32(<4 x i32> %a) { -; CHECK-LABEL: test_vget_high_u32: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - ret <2 x i32> %shuffle.i -} - -define <1 x i64> @test_vget_high_u64(<2 x i64> %a) { -; CHECK-LABEL: test_vget_high_u64: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> - ret <1 x i64> %shuffle.i -} - -define <1 x i64> @test_vget_high_p64(<2 x i64> %a) { -; CHECK-LABEL: test_vget_high_p64: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> - ret <1 x i64> %shuffle.i -} - -define <4 x i16> @test_vget_high_f16(<8 x i16> %a) { -; CHECK-LABEL: test_vget_high_f16: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - ret <4 x i16> %shuffle.i -} - -define <2 x float> @test_vget_high_f32(<4 x float> %a) { -; CHECK-LABEL: test_vget_high_f32: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> - ret <2 x float> %shuffle.i -} - -define <8 x i8> @test_vget_high_p8(<16 x i8> %a) { -; CHECK-LABEL: test_vget_high_p8: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> - ret <8 x i8> %shuffle.i -} - -define <4 x i16> @test_vget_high_p16(<8 x i16> %a) { -; CHECK-LABEL: test_vget_high_p16: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - ret <4 x i16> %shuffle.i -} - -define <1 x double> @test_vget_high_f64(<2 x double> %a) { -; CHECK-LABEL: test_vget_high_f64: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <2 x double> %a, <2 x double> undef, <1 x i32> - ret <1 x double> %shuffle.i -} - -define <8 x i8> @test_vget_low_s8(<16 x i8> %a) { -; CHECK-LABEL: test_vget_low_s8: -; CHECK: ret -entry: - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> - ret <8 x i8> %shuffle.i -} - -define <4 x i16> @test_vget_low_s16(<8 x i16> %a) { -; CHECK-LABEL: test_vget_low_s16: -; CHECK: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - ret <4 x i16> %shuffle.i -} - -define <2 x i32> @test_vget_low_s32(<4 x i32> %a) { -; CHECK-LABEL: test_vget_low_s32: -; CHECK: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - ret <2 x i32> %shuffle.i -} - -define <1 x i64> @test_vget_low_s64(<2 x i64> %a) { -; CHECK-LABEL: test_vget_low_s64: -; CHECK: ret -entry: - %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer - ret <1 x i64> %shuffle.i -} - -define <8 x i8> @test_vget_low_u8(<16 x i8> %a) { -; CHECK-LABEL: test_vget_low_u8: -; CHECK: ret -entry: - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> - ret <8 x i8> %shuffle.i -} - -define <4 x i16> @test_vget_low_u16(<8 x i16> %a) { -; CHECK-LABEL: test_vget_low_u16: -; CHECK: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - ret <4 x i16> %shuffle.i -} - -define <2 x i32> @test_vget_low_u32(<4 x i32> %a) { -; CHECK-LABEL: test_vget_low_u32: -; CHECK: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - ret <2 x i32> %shuffle.i -} - -define <1 x i64> @test_vget_low_u64(<2 x i64> %a) { -; CHECK-LABEL: test_vget_low_u64: -; CHECK: ret -entry: - %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer - ret <1 x i64> %shuffle.i -} - -define <1 x i64> @test_vget_low_p64(<2 x i64> %a) { -; CHECK-LABEL: test_vget_low_p64: -; CHECK: ret -entry: - %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer - ret <1 x i64> %shuffle.i -} - -define <4 x i16> @test_vget_low_f16(<8 x i16> %a) { -; CHECK-LABEL: test_vget_low_f16: -; CHECK: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - ret <4 x i16> %shuffle.i -} - -define <2 x float> @test_vget_low_f32(<4 x float> %a) { -; CHECK-LABEL: test_vget_low_f32: -; CHECK: ret -entry: - %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> - ret <2 x float> %shuffle.i -} - -define <8 x i8> @test_vget_low_p8(<16 x i8> %a) { -; CHECK-LABEL: test_vget_low_p8: -; CHECK: ret -entry: - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> - ret <8 x i8> %shuffle.i -} - -define <4 x i16> @test_vget_low_p16(<8 x i16> %a) { -; CHECK-LABEL: test_vget_low_p16: -; CHECK: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - ret <4 x i16> %shuffle.i -} - -define <1 x double> @test_vget_low_f64(<2 x double> %a) { -; CHECK-LABEL: test_vget_low_f64: -; CHECK: ret -entry: - %shuffle.i = shufflevector <2 x double> %a, <2 x double> undef, <1 x i32> zeroinitializer - ret <1 x double> %shuffle.i -} diff --git a/test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll b/test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll deleted file mode 100644 index 142b0a8bd537..000000000000 --- a/test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll +++ /dev/null @@ -1,31 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; not relevant for arm64: <1 x iN> isn't legal - -; This file tests the spill of FPR8/FPR16. The volatile loads/stores force the -; allocator to keep the value live until it's needed. - -%bigtype_v1i8 = type [20 x <1 x i8>] - -define void @spill_fpr8(%bigtype_v1i8* %addr) { -; CHECK-LABEL: spill_fpr8: -; CHECK: 1-byte Folded Spill -; CHECK: 1-byte Folded Reload - %val1 = load volatile %bigtype_v1i8* %addr - %val2 = load volatile %bigtype_v1i8* %addr - store volatile %bigtype_v1i8 %val1, %bigtype_v1i8* %addr - store volatile %bigtype_v1i8 %val2, %bigtype_v1i8* %addr - ret void -} - -%bigtype_v1i16 = type [20 x <1 x i16>] - -define void @spill_fpr16(%bigtype_v1i16* %addr) { -; CHECK-LABEL: spill_fpr16: -; CHECK: 2-byte Folded Spill -; CHECK: 2-byte Folded Reload - %val1 = load volatile %bigtype_v1i16* %addr - %val2 = load volatile %bigtype_v1i16* %addr - store volatile %bigtype_v1i16 %val1, %bigtype_v1i16* %addr - store volatile %bigtype_v1i16 %val2, %bigtype_v1i16* %addr - ret void -} diff --git a/test/CodeGen/AArch64/neon-truncStore-extLoad.ll b/test/CodeGen/AArch64/neon-truncStore-extLoad.ll index dbaccacdf554..f15cd24e5d42 100644 --- a/test/CodeGen/AArch64/neon-truncStore-extLoad.ll +++ b/test/CodeGen/AArch64/neon-truncStore-extLoad.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s ; A vector TruncStore can not be selected. diff --git a/test/CodeGen/AArch64/neon-v1i1-setcc.ll b/test/CodeGen/AArch64/neon-v1i1-setcc.ll deleted file mode 100644 index 114e44ac8bf0..000000000000 --- a/test/CodeGen/AArch64/neon-v1i1-setcc.ll +++ /dev/null @@ -1,69 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s -; arm64 has a separate copy as aarch64-neon-v1i1-setcc.ll - -; This file test the DAG node like "v1i1 SETCC v1i64, v1i64". As the v1i1 type -; is illegal in AArch64 backend, the legalizer tries to scalarize this node. -; As the v1i64 operands of SETCC are legal types, they will not be scalarized. -; Currently the type legalizer will have an assertion failure as it assumes all -; operands of SETCC have been legalized. -; FIXME: If the algorithm of type scalarization is improved and can legaize -; "v1i1 SETCC" correctly, these test cases are not needed. - -define i64 @test_sext_extr_cmp_0(<1 x i64> %v1, <1 x i64> %v2) { -; CHECK-LABEL: test_sext_extr_cmp_0: -; CHECK: cmge d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = icmp sge <1 x i64> %v1, %v2 - %2 = extractelement <1 x i1> %1, i32 0 - %vget_lane = sext i1 %2 to i64 - ret i64 %vget_lane -} - -define i64 @test_sext_extr_cmp_1(<1 x double> %v1, <1 x double> %v2) { -; CHECK-LABEL: test_sext_extr_cmp_1: -; CHECK: fcmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = fcmp oeq <1 x double> %v1, %v2 - %2 = extractelement <1 x i1> %1, i32 0 - %vget_lane = sext i1 %2 to i64 - ret i64 %vget_lane -} - -define <1 x i64> @test_select_v1i1_0(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) { -; CHECK-LABEL: test_select_v1i1_0: -; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} -; CHECK: bsl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %1 = icmp eq <1 x i64> %v1, %v2 - %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3 - ret <1 x i64> %res -} - -define <1 x i64> @test_select_v1i1_1(<1 x double> %v1, <1 x double> %v2, <1 x i64> %v3) { -; CHECK-LABEL: test_select_v1i1_1: -; CHECK: fcmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} -; CHECK: bsl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %1 = fcmp oeq <1 x double> %v1, %v2 - %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3 - ret <1 x i64> %res -} - -define <1 x double> @test_select_v1i1_2(<1 x i64> %v1, <1 x i64> %v2, <1 x double> %v3) { -; CHECK-LABEL: test_select_v1i1_2: -; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} -; CHECK: bsl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %1 = icmp eq <1 x i64> %v1, %v2 - %res = select <1 x i1> %1, <1 x double> zeroinitializer, <1 x double> %v3 - ret <1 x double> %res -} - -define i32 @test_br_extr_cmp(<1 x i64> %v1, <1 x i64> %v2) { -; CHECK-LABEL: test_br_extr_cmp: -; CHECK: cmp x{{[0-9]+}}, x{{[0-9]+}} - %1 = icmp eq <1 x i64> %v1, %v2 - %2 = extractelement <1 x i1> %1, i32 0 - br i1 %2, label %if.end, label %if.then - -if.then: - ret i32 0; - -if.end: - ret i32 1; -} diff --git a/test/CodeGen/AArch64/neon-vector-list-spill.ll b/test/CodeGen/AArch64/neon-vector-list-spill.ll deleted file mode 100644 index 5df0aacb38af..000000000000 --- a/test/CodeGen/AArch64/neon-vector-list-spill.ll +++ /dev/null @@ -1,176 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast -; arm64 has separate copy as aarch64-neon-vector-list-spill.ll - -; FIXME: We should not generate ld/st for such register spill/fill, because the -; test case seems very simple and the register pressure is not high. If the -; spill/fill algorithm is optimized, this test case may not be triggered. And -; then we can delete it. -define i32 @spill.DPairReg(i8* %arg1, i32 %arg2) { -; CHECK-LABEL: spill.DPairReg: -; CHECK: ld2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}] -; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -entry: - %vld = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8* %arg1, i32 4) - %cmp = icmp eq i32 %arg2, 0 - br i1 %cmp, label %if.then, label %if.end - -if.then: - tail call void @foo() - br label %if.end - -if.end: - %vld.extract = extractvalue { <2 x i32>, <2 x i32> } %vld, 0 - %res = extractelement <2 x i32> %vld.extract, i32 1 - ret i32 %res -} - -define i16 @spill.DTripleReg(i8* %arg1, i32 %arg2) { -; CHECK-LABEL: spill.DTripleReg: -; CHECK: ld3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] -; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -entry: - %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8* %arg1, i32 4) - %cmp = icmp eq i32 %arg2, 0 - br i1 %cmp, label %if.then, label %if.end - -if.then: - tail call void @foo() - br label %if.end - -if.end: - %vld.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld, 0 - %res = extractelement <4 x i16> %vld.extract, i32 1 - ret i16 %res -} - -define i16 @spill.DQuadReg(i8* %arg1, i32 %arg2) { -; CHECK-LABEL: spill.DQuadReg: -; CHECK: ld4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] -; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -entry: - %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8* %arg1, i32 4) - %cmp = icmp eq i32 %arg2, 0 - br i1 %cmp, label %if.then, label %if.end - -if.then: - tail call void @foo() - br label %if.end - -if.end: - %vld.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld, 0 - %res = extractelement <4 x i16> %vld.extract, i32 0 - ret i16 %res -} - -define i32 @spill.QPairReg(i8* %arg1, i32 %arg2) { -; CHECK-LABEL: spill.QPairReg: -; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] -; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -entry: - %vld = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8* %arg1, i32 4) - %cmp = icmp eq i32 %arg2, 0 - br i1 %cmp, label %if.then, label %if.end - -if.then: - tail call void @foo() - br label %if.end - -if.end: - %vld.extract = extractvalue { <4 x i32>, <4 x i32> } %vld, 0 - %res = extractelement <4 x i32> %vld.extract, i32 1 - ret i32 %res -} - -define float @spill.QTripleReg(i8* %arg1, i32 %arg2) { -; CHECK-LABEL: spill.QTripleReg: -; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] -; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -entry: - %vld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8* %arg1, i32 4) - %cmp = icmp eq i32 %arg2, 0 - br i1 %cmp, label %if.then, label %if.end - -if.then: - tail call void @foo() - br label %if.end - -if.end: - %vld3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 0 - %res = extractelement <4 x float> %vld3.extract, i32 1 - ret float %res -} - -define i8 @spill.QQuadReg(i8* %arg1, i32 %arg2) { -; CHECK-LABEL: spill.QQuadReg: -; CHECK: ld4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] -; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -entry: - %vld = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %arg1, i32 4) - %cmp = icmp eq i32 %arg2, 0 - br i1 %cmp, label %if.then, label %if.end - -if.then: - tail call void @foo() - br label %if.end - -if.end: - %vld.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld, 0 - %res = extractelement <16 x i8> %vld.extract, i32 1 - ret i8 %res -} - -declare { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8*, i32) -declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8*, i32) -declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8*, i32) -declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8*, i32) -declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8*, i32) -declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32) - -declare void @foo() - -; FIXME: We should not generate ld/st for such register spill/fill, because the -; test case seems very simple and the register pressure is not high. If the -; spill/fill algorithm is optimized, this test case may not be triggered. And -; then we can delete it. -; check the spill for Register Class QPair_with_qsub_0_in_FPR128Lo -define <8 x i16> @test_2xFPR128Lo(i64 %got, i8* %ptr, <1 x i64> %a) { - tail call void @llvm.arm.neon.vst2lane.v1i64(i8* %ptr, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i32 0, i32 8) - tail call void @foo() - %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> - %1 = bitcast <2 x i64> %sv to <8 x i16> - %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> - %3 = mul <8 x i16> %2, %2 - ret <8 x i16> %3 -} - -; check the spill for Register Class QTriple_with_qsub_0_in_FPR128Lo -define <8 x i16> @test_3xFPR128Lo(i64 %got, i8* %ptr, <1 x i64> %a) { - tail call void @llvm.arm.neon.vst3lane.v1i64(i8* %ptr, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i32 0, i32 8) - tail call void @foo() - %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> - %1 = bitcast <2 x i64> %sv to <8 x i16> - %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> - %3 = mul <8 x i16> %2, %2 - ret <8 x i16> %3 -} - -; check the spill for Register Class QQuad_with_qsub_0_in_FPR128Lo -define <8 x i16> @test_4xFPR128Lo(i64 %got, i8* %ptr, <1 x i64> %a) { - tail call void @llvm.arm.neon.vst4lane.v1i64(i8* %ptr, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i32 0, i32 8) - tail call void @foo() - %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> - %1 = bitcast <2 x i64> %sv to <8 x i16> - %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> - %3 = mul <8 x i16> %2, %2 - ret <8 x i16> %3 -} - -declare void @llvm.arm.neon.vst2lane.v1i64(i8*, <1 x i64>, <1 x i64>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32) diff --git a/test/CodeGen/AArch64/pic-eh-stubs.ll b/test/CodeGen/AArch64/pic-eh-stubs.ll index 399d1c1123fb..d2697910e6f7 100644 --- a/test/CodeGen/AArch64/pic-eh-stubs.ll +++ b/test/CodeGen/AArch64/pic-eh-stubs.ll @@ -1,5 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s -; RUN: llc -mtriple=aarch64_be-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s ; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s ; RUN: llc -mtriple=arm64_be-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s diff --git a/test/CodeGen/AArch64/ragreedy-csr.ll b/test/CodeGen/AArch64/ragreedy-csr.ll index 20e1b30d74d8..de29b1baa8d5 100644 --- a/test/CodeGen/AArch64/ragreedy-csr.ll +++ b/test/CodeGen/AArch64/ragreedy-csr.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -regalloc=greedy -regalloc-csr-first-time-cost=15 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -regalloc=greedy -regalloc-csr-first-time-cost=15 | FileCheck %s ; This testing case is reduced from 197.parser prune_match function. diff --git a/test/CodeGen/AArch64/regress-bitcast-formals.ll b/test/CodeGen/AArch64/regress-bitcast-formals.ll index 7f3ba7276b50..58e0542d84f5 100644 --- a/test/CodeGen/AArch64/regress-bitcast-formals.ll +++ b/test/CodeGen/AArch64/regress-bitcast-formals.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=arm64-apple-ios7.0 -verify-machineinstrs < %s | FileCheck %s ; CallingConv.td requires a bitcast for vector arguments. Make sure we're diff --git a/test/CodeGen/AArch64/regress-f128csel-flags.ll b/test/CodeGen/AArch64/regress-f128csel-flags.ll index a7352d6815ee..313cdb1bf0c4 100644 --- a/test/CodeGen/AArch64/regress-f128csel-flags.ll +++ b/test/CodeGen/AArch64/regress-f128csel-flags.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s ; We used to not mark NZCV as being used in the continuation basic-block diff --git a/test/CodeGen/AArch64/regress-fp128-livein.ll b/test/CodeGen/AArch64/regress-fp128-livein.ll index 5c2142aeeeb4..141c0d862f6a 100644 --- a/test/CodeGen/AArch64/regress-fp128-livein.ll +++ b/test/CodeGen/AArch64/regress-fp128-livein.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s ; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -o - %s ; Regression test for NZCV reg live-in not being added to fp128csel IfTrue BB, diff --git a/test/CodeGen/AArch64/regress-tail-livereg.ll b/test/CodeGen/AArch64/regress-tail-livereg.ll index 4a6ad55b6796..e32ac8458f93 100644 --- a/test/CodeGen/AArch64/regress-tail-livereg.ll +++ b/test/CodeGen/AArch64/regress-tail-livereg.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s @var = global void()* zeroinitializer diff --git a/test/CodeGen/AArch64/regress-tblgen-chains.ll b/test/CodeGen/AArch64/regress-tblgen-chains.ll index 1f8ad4503c41..55c3bcdcdd46 100644 --- a/test/CodeGen/AArch64/regress-tblgen-chains.ll +++ b/test/CodeGen/AArch64/regress-tblgen-chains.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s --check-prefix CHECK-AARCH64 ; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s --check-prefix CHECK-ARM64 ; When generating DAG selection tables, TableGen used to only flag an @@ -13,7 +12,6 @@ declare void @bar(i8*) define i64 @test_chains() { -; CHECK-AARCH64-LABEL: test_chains: ; CHECK-ARM64-LABEL: test_chains: %locvar = alloca i8 @@ -26,10 +24,6 @@ define i64 @test_chains() { %inc.3 = add i64 %inc.2, 1 %inc.4 = trunc i64 %inc.3 to i8 store i8 %inc.4, i8* %locvar -; CHECK-AARCH64: ldrb {{w[0-9]+}}, [sp, [[LOCADDR:#[0-9]+]]] -; CHECK-AARCH64: add {{w[0-9]+}}, {{w[0-9]+}}, #1 -; CHECK-AARCH64: strb {{w[0-9]+}}, [sp, [[LOCADDR]]] -; CHECK-AARCH64: ldrb {{w[0-9]+}}, [sp, [[LOCADDR]]] ; CHECK-ARM64: ldurb {{w[0-9]+}}, [x29, [[LOCADDR:#-?[0-9]+]]] ; CHECK-ARM64: add {{w[0-9]+}}, {{w[0-9]+}}, #1 @@ -39,6 +33,5 @@ define i64 @test_chains() { %ret.1 = load i8* %locvar %ret.2 = zext i8 %ret.1 to i64 ret i64 %ret.2 -; CHECK-AARCH64: ret ; CHECK-ARM64: ret } diff --git a/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll b/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll index cfd94e1503b1..cc42b0c9df41 100644 --- a/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll +++ b/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s ; RUN: llc -mtriple=arm64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s @var = global i32 0 diff --git a/test/CodeGen/AArch64/regress-wzr-allocatable.ll b/test/CodeGen/AArch64/regress-wzr-allocatable.ll deleted file mode 100644 index 8620ce14e9b4..000000000000 --- a/test/CodeGen/AArch64/regress-wzr-allocatable.ll +++ /dev/null @@ -1,44 +0,0 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 - -; Skipping for arm64, there's no evidence it would ever have hit the same -; problem. - -; When WZR wasn't marked as reserved, this function tried to allocate -; it at O0 and then generated an internal fault (mostly incidentally) -; when it discovered that it was already in use for a multiplication. - -; I'm not really convinced this is a good test since it could easily -; stop testing what it does now with no-one any the wiser. However, I -; can't think of a better way to force the allocator to use WZR -; specifically. - -define void @test() nounwind { -entry: - br label %for.cond - -for.cond: ; preds = %for.body, %entry - br i1 undef, label %for.body, label %for.end - -for.body: ; preds = %for.cond - br label %for.cond - -for.end: ; preds = %for.cond - br label %for.cond6 - -for.cond6: ; preds = %for.body9, %for.end - br i1 undef, label %for.body9, label %while.cond30 - -for.body9: ; preds = %for.cond6 - store i16 0, i16* undef, align 2 - %0 = load i32* undef, align 4 - %1 = load i32* undef, align 4 - %mul15 = mul i32 %0, %1 - %add16 = add i32 %mul15, 32768 - %div = udiv i32 %add16, 65535 - %add17 = add i32 %div, 1 - store i32 %add17, i32* undef, align 4 - br label %for.cond6 - -while.cond30: ; preds = %for.cond6 - ret void -} diff --git a/test/CodeGen/AArch64/returnaddr.ll b/test/CodeGen/AArch64/returnaddr.ll index 3f7edcbaa89c..b136f044cad8 100644 --- a/test/CodeGen/AArch64/returnaddr.ll +++ b/test/CodeGen/AArch64/returnaddr.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s define i8* @rt0(i32 %x) nounwind readnone { diff --git a/test/CodeGen/AArch64/setcc-takes-i32.ll b/test/CodeGen/AArch64/setcc-takes-i32.ll index 21c2688ca70f..f06c8ecd28da 100644 --- a/test/CodeGen/AArch64/setcc-takes-i32.ll +++ b/test/CodeGen/AArch64/setcc-takes-i32.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=arm64-none-linux-gnu -o - %s | FileCheck %s ; Most important point here is that the promotion of the i1 works diff --git a/test/CodeGen/AArch64/sext_inreg.ll b/test/CodeGen/AArch64/sext_inreg.ll deleted file mode 100644 index 7873c6462d78..000000000000 --- a/test/CodeGen/AArch64/sext_inreg.ll +++ /dev/null @@ -1,202 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s - -; arm64: This test contains much that is unique and valuable. Unfortunately the -; bits that are unique aren't valuable and the bits that are valuable aren't -; unique. (weird ABI types vs bog-standard shifting & extensions). - -; For formal arguments, we have the following vector type promotion, -; v2i8 is promoted to v2i32(f64) -; v2i16 is promoted to v2i32(f64) -; v4i8 is promoted to v4i16(f64) -; v8i1 is promoted to v8i16(f128) - -define <2 x i8> @test_sext_inreg_v2i8i16(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v2i8i16 -; CHECK: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: uzp1 v1.8h, v1.8h, v1.8h - %1 = sext <2 x i8> %v1 to <2 x i16> - %2 = sext <2 x i8> %v2 to <2 x i16> - %3 = shufflevector <2 x i16> %1, <2 x i16> %2, <2 x i32> - %4 = trunc <2 x i16> %3 to <2 x i8> - ret <2 x i8> %4 -} - -define <2 x i8> @test_sext_inreg_v2i8i16_2(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v2i8i16_2 -; CHECK: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: uzp1 v1.8h, v1.8h, v1.8h - %a1 = shl <2 x i32> %v1, - %a2 = ashr <2 x i32> %a1, - %b1 = shl <2 x i32> %v2, - %b2 = ashr <2 x i32> %b1, - %c = shufflevector <2 x i32> %a2, <2 x i32> %b2, <2 x i32> - %d = trunc <2 x i32> %c to <2 x i8> - ret <2 x i8> %d -} - -define <2 x i8> @test_sext_inreg_v2i8i32(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v2i8i32 -; CHECK: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: uzp1 v1.8h, v1.8h, v1.8h - %1 = sext <2 x i8> %v1 to <2 x i32> - %2 = sext <2 x i8> %v2 to <2 x i32> - %3 = shufflevector <2 x i32> %1, <2 x i32> %2, <2 x i32> - %4 = trunc <2 x i32> %3 to <2 x i8> - ret <2 x i8> %4 -} - -define <2 x i8> @test_sext_inreg_v2i8i64(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v2i8i64 -; CHECK: ushll v1.2d, v1.2s, #0 -; CHECK: ushll v0.2d, v0.2s, #0 -; CHECK: shl v0.2d, v0.2d, #56 -; CHECK: sshr v0.2d, v0.2d, #56 -; CHECK: shl v1.2d, v1.2d, #56 -; CHECK: sshr v1.2d, v1.2d, #56 - %1 = sext <2 x i8> %v1 to <2 x i64> - %2 = sext <2 x i8> %v2 to <2 x i64> - %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> - %4 = trunc <2 x i64> %3 to <2 x i8> - ret <2 x i8> %4 -} - -define <4 x i8> @test_sext_inreg_v4i8i16(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v4i8i16 -; CHECK: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: uzp1 v1.8h, v1.8h, v1.8h - %1 = sext <4 x i8> %v1 to <4 x i16> - %2 = sext <4 x i8> %v2 to <4 x i16> - %3 = shufflevector <4 x i16> %1, <4 x i16> %2, <4 x i32> - %4 = trunc <4 x i16> %3 to <4 x i8> - ret <4 x i8> %4 -} - -define <4 x i8> @test_sext_inreg_v4i8i16_2(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v4i8i16_2 -; CHECK: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: uzp1 v1.8h, v1.8h, v1.8h - %a1 = shl <4 x i16> %v1, - %a2 = ashr <4 x i16> %a1, - %b1 = shl <4 x i16> %v2, - %b2 = ashr <4 x i16> %b1, - %c = shufflevector <4 x i16> %a2, <4 x i16> %b2, <4 x i32> - %d = trunc <4 x i16> %c to <4 x i8> - ret <4 x i8> %d -} - -define <4 x i8> @test_sext_inreg_v4i8i32(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v4i8i32 -; CHECK: ushll v1.4s, v1.4h, #0 -; CHECK: ushll v0.4s, v0.4h, #0 -; CHECK: shl v0.4s, v0.4s, #24 -; CHECK: sshr v0.4s, v0.4s, #24 -; CHECK: shl v1.4s, v1.4s, #24 -; CHECK: sshr v1.4s, v1.4s, #24 - %1 = sext <4 x i8> %v1 to <4 x i32> - %2 = sext <4 x i8> %v2 to <4 x i32> - %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> - %4 = trunc <4 x i32> %3 to <4 x i8> - ret <4 x i8> %4 -} - -define <8 x i8> @test_sext_inreg_v8i8i16(<8 x i8> %v1, <8 x i8> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v8i8i16 -; CHECK: sshll v0.8h, v0.8b, #0 -; CHECK: sshll v1.8h, v1.8b, #0 - %1 = sext <8 x i8> %v1 to <8 x i16> - %2 = sext <8 x i8> %v2 to <8 x i16> - %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> - %4 = trunc <8 x i16> %3 to <8 x i8> - ret <8 x i8> %4 -} - -define <8 x i1> @test_sext_inreg_v8i1i16(<8 x i1> %v1, <8 x i1> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v8i1i16 -; CHECK: ushll v1.8h, v1.8b, #0 -; CHECK: ushll v0.8h, v0.8b, #0 -; CHECK: shl v0.8h, v0.8h, #15 -; CHECK: sshr v0.8h, v0.8h, #15 -; CHECK: shl v1.8h, v1.8h, #15 -; CHECK: sshr v1.8h, v1.8h, #15 - %1 = sext <8 x i1> %v1 to <8 x i16> - %2 = sext <8 x i1> %v2 to <8 x i16> - %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> - %4 = trunc <8 x i16> %3 to <8 x i1> - ret <8 x i1> %4 -} - -define <2 x i16> @test_sext_inreg_v2i16i32(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v2i16i32 -; CHECK: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s -; CHECK-NEXT: sshll v1.4s, v1.4h, #0 -; CHECK-NEXT: uzp1 v1.4s, v1.4s, v1.4s - %1 = sext <2 x i16> %v1 to <2 x i32> - %2 = sext <2 x i16> %v2 to <2 x i32> - %3 = shufflevector <2 x i32> %1, <2 x i32> %2, <2 x i32> - %4 = trunc <2 x i32> %3 to <2 x i16> - ret <2 x i16> %4 -} - -define <2 x i16> @test_sext_inreg_v2i16i32_2(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v2i16i32_2 -; CHECK: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s -; CHECK-NEXT: sshll v1.4s, v1.4h, #0 -; CHECK-NEXT: uzp1 v1.4s, v1.4s, v1.4s - %a1 = shl <2 x i32> %v1, - %a2 = ashr <2 x i32> %a1, - %b1 = shl <2 x i32> %v2, - %b2 = ashr <2 x i32> %b1, - %c = shufflevector <2 x i32> %a2, <2 x i32> %b2, <2 x i32> - %d = trunc <2 x i32> %c to <2 x i16> - ret <2 x i16> %d -} - -define <2 x i16> @test_sext_inreg_v2i16i64(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v2i16i64 -; CHECK: ushll v1.2d, v1.2s, #0 -; CHECK: ushll v0.2d, v0.2s, #0 -; CHECK: shl v0.2d, v0.2d, #48 -; CHECK: sshr v0.2d, v0.2d, #48 -; CHECK: shl v1.2d, v1.2d, #48 -; CHECK: sshr v1.2d, v1.2d, #48 - %1 = sext <2 x i16> %v1 to <2 x i64> - %2 = sext <2 x i16> %v2 to <2 x i64> - %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> - %4 = trunc <2 x i64> %3 to <2 x i16> - ret <2 x i16> %4 -} - -define <4 x i16> @test_sext_inreg_v4i16i32(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v4i16i32 -; CHECK: sshll v0.4s, v0.4h, #0 -; CHECK: sshll v1.4s, v1.4h, #0 - %1 = sext <4 x i16> %v1 to <4 x i32> - %2 = sext <4 x i16> %v2 to <4 x i32> - %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> - %4 = trunc <4 x i32> %3 to <4 x i16> - ret <4 x i16> %4 -} - -define <2 x i32> @test_sext_inreg_v2i32i64(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v2i32i64 -; CHECK: sshll v0.2d, v0.2s, #0 -; CHECK: sshll v1.2d, v1.2s, #0 - %1 = sext <2 x i32> %v1 to <2 x i64> - %2 = sext <2 x i32> %v2 to <2 x i64> - %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> - %4 = trunc <2 x i64> %3 to <2 x i32> - ret <2 x i32> %4 -} - diff --git a/test/CodeGen/AArch64/sibling-call.ll b/test/CodeGen/AArch64/sibling-call.ll index a08f8cbd702e..85245718afc0 100644 --- a/test/CodeGen/AArch64/sibling-call.ll +++ b/test/CodeGen/AArch64/sibling-call.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -arm64-load-store-opt=0 | FileCheck %s declare void @callee_stack0() diff --git a/test/CodeGen/AArch64/sincos-expansion.ll b/test/CodeGen/AArch64/sincos-expansion.ll index 1498eb53625a..5ba1d8d0a834 100644 --- a/test/CodeGen/AArch64/sincos-expansion.ll +++ b/test/CodeGen/AArch64/sincos-expansion.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s define float @test_sincos_f32(float %f) { diff --git a/test/CodeGen/AArch64/sincospow-vector-expansion.ll b/test/CodeGen/AArch64/sincospow-vector-expansion.ll index baa73a3c7163..38c8bb2d5e35 100644 --- a/test/CodeGen/AArch64/sincospow-vector-expansion.ll +++ b/test/CodeGen/AArch64/sincospow-vector-expansion.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc -o - %s -verify-machineinstrs -mtriple=arm64-linux-gnu -mattr=+neon | FileCheck %s diff --git a/test/CodeGen/AArch64/stackpointer.ll b/test/CodeGen/AArch64/stackpointer.ll deleted file mode 100644 index 1f20692c8c95..000000000000 --- a/test/CodeGen/AArch64/stackpointer.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llc < %s -mtriple=aarch64-linux-gnueabi | FileCheck %s -; arm64 has a separate copy of this test - -define i64 @get_stack() nounwind { -entry: -; CHECK-LABEL: get_stack: -; CHECK: mov x0, sp - %sp = call i64 @llvm.read_register.i64(metadata !0) - ret i64 %sp -} - -define void @set_stack(i64 %val) nounwind { -entry: -; CHECK-LABEL: set_stack: -; CHECK: mov sp, x0 - call void @llvm.write_register.i64(metadata !0, i64 %val) - ret void -} - -declare i64 @llvm.read_register.i64(metadata) nounwind -declare void @llvm.write_register.i64(metadata, i64) nounwind - -; register unsigned long current_stack_pointer asm("sp"); -; CHECK-NOT: .asciz "sp" -!0 = metadata !{metadata !"sp\00"} diff --git a/test/CodeGen/AArch64/tail-call.ll b/test/CodeGen/AArch64/tail-call.ll index da05848dcc59..b3841fac68ab 100644 --- a/test/CodeGen/AArch64/tail-call.ll +++ b/test/CodeGen/AArch64/tail-call.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -tailcallopt | FileCheck --check-prefix=CHECK-ARM64 %s declare fastcc void @callee_stack0() diff --git a/test/CodeGen/AArch64/tls-dynamic-together.ll b/test/CodeGen/AArch64/tls-dynamic-together.ll deleted file mode 100644 index 80ed2181c4c1..000000000000 --- a/test/CodeGen/AArch64/tls-dynamic-together.ll +++ /dev/null @@ -1,19 +0,0 @@ -; RUN: llc -O0 -mtriple=aarch64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s -; arm64 has its own copy of this file, copied during implementation. - -; If the .tlsdesccall and blr parts are emitted completely separately (even with -; glue) then LLVM will separate them quite happily (with a spill at O0, hence -; the option). This is definitely wrong, so we make sure they are emitted -; together. - -@general_dynamic_var = external thread_local global i32 - -define i32 @test_generaldynamic() { -; CHECK-LABEL: test_generaldynamic: - - %val = load i32* @general_dynamic_var - ret i32 %val - -; CHECK: .tlsdesccall general_dynamic_var -; CHECK-NEXT: blr {{x[0-9]+}} -} diff --git a/test/CodeGen/AArch64/tls-dynamics.ll b/test/CodeGen/AArch64/tls-dynamics.ll deleted file mode 100644 index 0fb84c823bc9..000000000000 --- a/test/CodeGen/AArch64/tls-dynamics.ll +++ /dev/null @@ -1,121 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s -; arm64 has its own tls-dynamics.ll, copied from this one during implementation. -@general_dynamic_var = external thread_local global i32 - -define i32 @test_generaldynamic() { -; CHECK-LABEL: test_generaldynamic: - - %val = load i32* @general_dynamic_var - ret i32 %val - -; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var -; CHECK-DAG: add x0, x[[TLSDESC_HI]], {{#?}}:tlsdesc_lo12:general_dynamic_var -; CHECK-DAG: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], {{#?}}:tlsdesc_lo12:general_dynamic_var] -; CHECK: .tlsdesccall general_dynamic_var -; CHECK-NEXT: blr [[CALLEE]] - -; CHECK: mrs x[[TP:[0-9]+]], tpidr_el0 -; CHECK: ldr w0, [x[[TP]], x0] - -; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE -; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_ADD_LO12_NC -; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_LD64_LO12_NC -; CHECK-RELOC: R_AARCH64_TLSDESC_CALL - -} - -define i32* @test_generaldynamic_addr() { -; CHECK-LABEL: test_generaldynamic_addr: - - ret i32* @general_dynamic_var - -; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var -; CHECK-DAG: add x0, x[[TLSDESC_HI]], {{#?}}:tlsdesc_lo12:general_dynamic_var -; CHECK-DAG: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], {{#?}}:tlsdesc_lo12:general_dynamic_var] -; CHECK: .tlsdesccall general_dynamic_var -; CHECK-NEXT: blr [[CALLEE]] - -; CHECK: mrs [[TP:x[0-9]+]], tpidr_el0 -; CHECK: add x0, [[TP]], x0 - -; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE -; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_ADD_LO12_NC -; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_LD64_LO12_NC -; CHECK-RELOC: R_AARCH64_TLSDESC_CALL - -} - -@local_dynamic_var = external thread_local(localdynamic) global i32 - -define i32 @test_localdynamic() { -; CHECK-LABEL: test_localdynamic: - - %val = load i32* @local_dynamic_var - ret i32 %val - -; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ -; CHECK-DAG: add x0, x[[TLSDESC_HI]], {{#?}}:tlsdesc_lo12:_TLS_MODULE_BASE_ -; CHECK-DAG: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], {{#?}}:tlsdesc_lo12:_TLS_MODULE_BASE_] -; CHECK: .tlsdesccall _TLS_MODULE_BASE_ -; CHECK-NEXT: blr [[CALLEE]] - -; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var -; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var - -; CHECK: ldr w0, [x0, [[DTP_OFFSET]]] - -; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE -; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_ADD_LO12_NC -; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_LD64_LO12_NC -; CHECK-RELOC: R_AARCH64_TLSDESC_CALL - -} - -define i32* @test_localdynamic_addr() { -; CHECK-LABEL: test_localdynamic_addr: - - ret i32* @local_dynamic_var - -; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ -; CHECK-DAG: add x0, x[[TLSDESC_HI]], {{#?}}:tlsdesc_lo12:_TLS_MODULE_BASE_ -; CHECK-DAG: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], {{#?}}:tlsdesc_lo12:_TLS_MODULE_BASE_] -; CHECK: .tlsdesccall _TLS_MODULE_BASE_ -; CHECK-NEXT: blr [[CALLEE]] - -; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var -; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var - -; CHECK: add x0, x0, [[DTP_OFFSET]] - -; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE -; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_ADD_LO12_NC -; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_LD64_LO12_NC -; CHECK-RELOC: R_AARCH64_TLSDESC_CALL - -} - -; The entire point of the local-dynamic access model is to have a single call to -; the expensive resolver. Make sure we achieve that goal. - -@local_dynamic_var2 = external thread_local(localdynamic) global i32 - -define i32 @test_localdynamic_deduplicate() { -; CHECK-LABEL: test_localdynamic_deduplicate: - - %val = load i32* @local_dynamic_var - %val2 = load i32* @local_dynamic_var2 - - %sum = add i32 %val, %val2 - ret i32 %sum - -; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ -; CHECK-DAG: add x0, x[[TLSDESC_HI]], {{#?}}:tlsdesc_lo12:_TLS_MODULE_BASE_ -; CHECK-DAG: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], {{#?}}:tlsdesc_lo12:_TLS_MODULE_BASE_] -; CHECK: .tlsdesccall _TLS_MODULE_BASE_ -; CHECK-NEXT: blr [[CALLEE]] - -; CHECK-NOT: _TLS_MODULE_BASE_ - -; CHECK: ret -} diff --git a/test/CodeGen/AArch64/tls-execs.ll b/test/CodeGen/AArch64/tls-execs.ll deleted file mode 100644 index 61600380c244..000000000000 --- a/test/CodeGen/AArch64/tls-execs.ll +++ /dev/null @@ -1,64 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s -; arm64 has its own copy of tls-execs.ll, copied from this one during implementation. - -@initial_exec_var = external thread_local(initialexec) global i32 - -define i32 @test_initial_exec() { -; CHECK-LABEL: test_initial_exec: - %val = load i32* @initial_exec_var - -; CHECK: adrp x[[GOTADDR:[0-9]+]], :gottprel:initial_exec_var -; CHECK: ldr x[[TP_OFFSET:[0-9]+]], [x[[GOTADDR]], #:gottprel_lo12:initial_exec_var] -; CHECK: mrs x[[TP:[0-9]+]], tpidr_el0 -; CHECK: ldr w0, [x[[TP]], x[[TP_OFFSET]]] - -; CHECK-RELOC: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 -; CHECK-RELOC: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC - - ret i32 %val -} - -define i32* @test_initial_exec_addr() { -; CHECK-LABEL: test_initial_exec_addr: - ret i32* @initial_exec_var - -; CHECK: adrp x[[GOTADDR:[0-9]+]], :gottprel:initial_exec_var -; CHECK: ldr [[TP_OFFSET:x[0-9]+]], [x[[GOTADDR]], #:gottprel_lo12:initial_exec_var] -; CHECK: mrs [[TP:x[0-9]+]], tpidr_el0 -; CHECK: add x0, [[TP]], [[TP_OFFSET]] - -; CHECK-RELOC: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 -; CHECK-RELOC: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC - -} - -@local_exec_var = thread_local(initialexec) global i32 0 - -define i32 @test_local_exec() { -; CHECK-LABEL: test_local_exec: - %val = load i32* @local_exec_var - -; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var // encoding: [A,A,0xa0'A',0x92'A'] -; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var -; CHECK: mrs x[[TP:[0-9]+]], tpidr_el0 -; CHECK: ldr w0, [x[[TP]], [[TP_OFFSET]]] - -; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1 -; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC - - ret i32 %val -} - -define i32* @test_local_exec_addr() { -; CHECK-LABEL: test_local_exec_addr: - ret i32* @local_exec_var - -; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var -; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var -; CHECK: mrs [[TP:x[0-9]+]], tpidr_el0 -; CHECK: add x0, [[TP]], [[TP_OFFSET]] - -; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1 -; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC -} diff --git a/test/CodeGen/AArch64/tst-br.ll b/test/CodeGen/AArch64/tst-br.ll index b6e2b19fb845..8a2fe26803ea 100644 --- a/test/CodeGen/AArch64/tst-br.ll +++ b/test/CodeGen/AArch64/tst-br.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s ; We've got the usual issues with LLVM reordering blocks here. The diff --git a/test/CodeGen/AArch64/unaligned-vector-ld1-st1.ll b/test/CodeGen/AArch64/unaligned-vector-ld1-st1.ll deleted file mode 100644 index 60cc6e40b359..000000000000 --- a/test/CodeGen/AArch64/unaligned-vector-ld1-st1.ll +++ /dev/null @@ -1,172 +0,0 @@ -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon -o - | FileCheck %s -; RUN: llc < %s -mtriple=aarch64_be-none-linux-gnu -mattr=+neon -o - | FileCheck %s -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -aarch64-no-strict-align -mattr=+neon -o - | FileCheck %s -; RUN: llc < %s -mtriple=aarch64_be-none-linux-gnu -aarch64-no-strict-align -mattr=+neon -o - | FileCheck %s -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -aarch64-strict-align -mattr=+neon -o - | FileCheck %s -; RUN: llc < %s -mtriple=aarch64_be-none-linux-gnu -aarch64-strict-align -mattr=+neon -o - | FileCheck %s --check-prefix=BE-STRICT-ALIGN - -;; Check element-aligned 128-bit vector load/store - integer -define <16 x i8> @qwordint (<16 x i8>* %head.v16i8, <8 x i16>* %head.v8i16, <4 x i32>* %head.v4i32, <2 x i64>* %head.v2i64, - <16 x i8>* %tail.v16i8, <8 x i16>* %tail.v8i16, <4 x i32>* %tail.v4i32, <2 x i64>* %tail.v2i64) { -; CHECK-LABEL: qwordint -; CHECK: ld1 { v0.16b }, [x0] -; CHECK: ld1 { v1.8h }, [x1] -; CHECK: ld1 { v2.4s }, [x2] -; CHECK: ld1 { v3.2d }, [x3] -; CHECK: st1 { v0.16b }, [x4] -; CHECK: st1 { v1.8h }, [x5] -; CHECK: st1 { v2.4s }, [x6] -; CHECK: st1 { v3.2d }, [x7] -; BE-STRICT-ALIGN-LABEL: qwordint -; BE-STRICT-ALIGN: ldrb -; BE-STRICT-ALIGN: ldrh -; BE-STRICT-ALIGN: ldr -; BE-STRICT-ALIGN: ldr -; BE-STRICT-ALIGN: strb -; BE-STRICT-ALIGN: strh -; BE-STRICT-ALIGN: str -; BE-STRICT-ALIGN: str -entry: - %val.v16i8 = load <16 x i8>* %head.v16i8, align 1 - %val.v8i16 = load <8 x i16>* %head.v8i16, align 2 - %val.v4i32 = load <4 x i32>* %head.v4i32, align 4 - %val.v2i64 = load <2 x i64>* %head.v2i64, align 8 - store <16 x i8> %val.v16i8, <16 x i8>* %tail.v16i8, align 1 - store <8 x i16> %val.v8i16, <8 x i16>* %tail.v8i16, align 2 - store <4 x i32> %val.v4i32, <4 x i32>* %tail.v4i32, align 4 - store <2 x i64> %val.v2i64, <2 x i64>* %tail.v2i64, align 8 - ret <16 x i8> %val.v16i8 -} - -;; Check element-aligned 128-bit vector load/store - floating point -define <4 x float> @qwordfloat (<4 x float>* %head.v4f32, <2 x double>* %head.v2f64, - <4 x float>* %tail.v4f32, <2 x double>* %tail.v2f64) { -; CHECK-LABEL: qwordfloat -; CHECK: ld1 { v0.4s }, [x0] -; CHECK: ld1 { v1.2d }, [x1] -; CHECK: st1 { v0.4s }, [x2] -; CHECK: st1 { v1.2d }, [x3] -; BE-STRICT-ALIGN-LABEL: qwordfloat -; BE-STRICT-ALIGN: ldr -; BE-STRICT-ALIGN: ldr -; BE-STRICT-ALIGN: str -; BE-STRICT-ALIGN: str -entry: - %val.v4f32 = load <4 x float>* %head.v4f32, align 4 - %val.v2f64 = load <2 x double>* %head.v2f64, align 8 - store <4 x float> %val.v4f32, <4 x float>* %tail.v4f32, align 4 - store <2 x double> %val.v2f64, <2 x double>* %tail.v2f64, align 8 - ret <4 x float> %val.v4f32 -} - -;; Check element-aligned 64-bit vector load/store - integer -define <8 x i8> @dwordint (<8 x i8>* %head.v8i8, <4 x i16>* %head.v4i16, <2 x i32>* %head.v2i32, <1 x i64>* %head.v1i64, - <8 x i8>* %tail.v8i8, <4 x i16>* %tail.v4i16, <2 x i32>* %tail.v2i32, <1 x i64>* %tail.v1i64) { -; CHECK-LABEL: dwordint -; CHECK: ld1 { v0.8b }, [x0] -; CHECK: ld1 { v1.4h }, [x1] -; CHECK: ld1 { v2.2s }, [x2] -; CHECK: ld1 { v3.1d }, [x3] -; CHECK: st1 { v0.8b }, [x4] -; CHECK: st1 { v1.4h }, [x5] -; CHECK: st1 { v2.2s }, [x6] -; CHECK: st1 { v3.1d }, [x7] -; BE-STRICT-ALIGN-LABEL: dwordint -; BE-STRICT-ALIGN: ldrb -; BE-STRICT-ALIGN: ldrh -; BE-STRICT-ALIGN: ldr -; BE-STRICT-ALIGN: ld1 { v1.1d }, [x3] -; BE-STRICT-ALIGN: strb -; BE-STRICT-ALIGN: strh -; BE-STRICT-ALIGN: str -; BE-STRICT-ALIGN: st1 { v1.1d }, [x7] -entry: - %val.v8i8 = load <8 x i8>* %head.v8i8, align 1 - %val.v4i16 = load <4 x i16>* %head.v4i16, align 2 - %val.v2i32 = load <2 x i32>* %head.v2i32, align 4 - %val.v1i64 = load <1 x i64>* %head.v1i64, align 8 - store <8 x i8> %val.v8i8, <8 x i8>* %tail.v8i8 , align 1 - store <4 x i16> %val.v4i16, <4 x i16>* %tail.v4i16, align 2 - store <2 x i32> %val.v2i32, <2 x i32>* %tail.v2i32, align 4 - store <1 x i64> %val.v1i64, <1 x i64>* %tail.v1i64, align 8 - ret <8 x i8> %val.v8i8 -} - -;; Check element-aligned 64-bit vector load/store - floating point -define <2 x float> @dwordfloat (<2 x float>* %head.v2f32, <1 x double>* %head.v1f64, - <2 x float>* %tail.v2f32, <1 x double>* %tail.v1f64) { -; CHECK-LABEL: dwordfloat -; CHECK: ld1 { v0.2s }, [x0] -; CHECK: ld1 { v1.1d }, [x1] -; CHECK: st1 { v0.2s }, [x2] -; CHECK: st1 { v1.1d }, [x3] -; BE-STRICT-ALIGN-LABEL: dwordfloat -; BE-STRICT-ALIGN: ldr -; BE-STRICT-ALIGN: ld1 { v1.1d }, [x1] -; BE-STRICT-ALIGN: str -; BE-STRICT-ALIGN: st1 { v1.1d }, [x3] -entry: - %val.v2f32 = load <2 x float>* %head.v2f32, align 4 - %val.v1f64 = load <1 x double>* %head.v1f64, align 8 - store <2 x float> %val.v2f32, <2 x float>* %tail.v2f32, align 4 - store <1 x double> %val.v1f64, <1 x double>* %tail.v1f64, align 8 - ret <2 x float> %val.v2f32 -} - -;; Check load/store of 128-bit vectors with less-than 16-byte alignment -define <2 x i64> @align2vi64 (<2 x i64>* %head.byte, <2 x i64>* %head.half, <2 x i64>* %head.word, <2 x i64>* %head.dword, - <2 x i64>* %tail.byte, <2 x i64>* %tail.half, <2 x i64>* %tail.word, <2 x i64>* %tail.dword) { -; CHECK-LABEL: align2vi64 -; CHECK: ld1 { v0.2d }, [x0] -; CHECK: ld1 { v1.2d }, [x1] -; CHECK: ld1 { v2.2d }, [x2] -; CHECK: ld1 { v3.2d }, [x3] -; CHECK: st1 { v0.2d }, [x4] -; CHECK: st1 { v1.2d }, [x5] -; CHECK: st1 { v2.2d }, [x6] -; CHECK: st1 { v3.2d }, [x7] -; BE-STRICT-ALIGN-LABEL: align2vi64 -; BE-STRICT-ALIGN: ldrb -; BE-STRICT-ALIGN: ldrh -; BE-STRICT-ALIGN: ldr -; BE-STRICT-ALIGN: strb -; BE-STRICT-ALIGN: strh -; BE-STRICT-ALIGN: str -entry: - %val.byte = load <2 x i64>* %head.byte, align 1 - %val.half = load <2 x i64>* %head.half, align 2 - %val.word = load <2 x i64>* %head.word, align 4 - %val.dword = load <2 x i64>* %head.dword, align 8 - store <2 x i64> %val.byte, <2 x i64>* %tail.byte, align 1 - store <2 x i64> %val.half, <2 x i64>* %tail.half, align 2 - store <2 x i64> %val.word, <2 x i64>* %tail.word, align 4 - store <2 x i64> %val.dword, <2 x i64>* %tail.dword, align 8 - ret <2 x i64> %val.byte - } - -;; Check load/store of 64-bit vectors with less-than 8-byte alignment -define <2 x float> @align2vf32 (<2 x float>* %head.byte, <2 x float>* %head.half, <2 x float>* %head.word, <2 x float>* %head.dword, - <2 x float>* %tail.byte, <2 x float>* %tail.half, <2 x float>* %tail.word, <2 x float>* %tail.dword) { -; CHECK-LABEL: align2vf32 -; CHECK: ld1 { v0.2s }, [x0] -; CHECK: ld1 { v1.2s }, [x1] -; CHECK: ld1 { v2.2s }, [x2] -; CHECK: st1 { v0.2s }, [x4] -; CHECK: st1 { v1.2s }, [x5] -; CHECK: st1 { v2.2s }, [x6] -; BE-STRICT-ALIGN-LABEL: align2vf32 -; BE-STRICT-ALIGN: ldrb -; BE-STRICT-ALIGN: ldrh -; BE-STRICT-ALIGN: ldr -; BE-STRICT-ALIGN: strb -; BE-STRICT-ALIGN: strh -; BE-STRICT-ALIGN: str -entry: - %val.byte = load <2 x float>* %head.byte, align 1 - %val.half = load <2 x float>* %head.half, align 2 - %val.word = load <2 x float>* %head.word, align 4 - store <2 x float> %val.byte, <2 x float>* %tail.byte, align 1 - store <2 x float> %val.half, <2 x float>* %tail.half, align 2 - store <2 x float> %val.word, <2 x float>* %tail.word, align 4 - ret <2 x float> %val.byte -} diff --git a/test/CodeGen/AArch64/variadic.ll b/test/CodeGen/AArch64/variadic.ll deleted file mode 100644 index 7b85227cbd32..000000000000 --- a/test/CodeGen/AArch64/variadic.ll +++ /dev/null @@ -1,241 +0,0 @@ -; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s -; arm64 has its own copy of this file, ported during implementation (variadic-aapcs.ll) - -%va_list = type {i8*, i8*, i8*, i32, i32} - -@var = global %va_list zeroinitializer - -declare void @llvm.va_start(i8*) - -define void @test_simple(i32 %n, ...) { -; CHECK-LABEL: test_simple: -; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]] -; CHECK: mov x[[FPRBASE:[0-9]+]], sp -; CHECK: str q7, [x[[FPRBASE]], #112] -; CHECK: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]] -; CHECK: str x7, [x[[GPRBASE]], #48] - -; CHECK-NOFP: sub sp, sp, #[[STACKSIZE:[0-9]+]] -; CHECK-NOFP: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]] -; CHECK-NOFP: str x7, [x[[GPRBASE]], #48] -; CHECK-NOFP-NOT: str q7, -; CHECK-NOFP: str x1, [sp, #[[GPRFROMSP]]] - -; Omit the middle ones - -; CHECK: str q0, [sp] -; CHECK: str x1, [sp, #[[GPRFROMSP]]] -; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var - -; CHECK-NOFP-NOT: str q0, [sp] -; CHECK-NOFP: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var - - %addr = bitcast %va_list* @var to i8* - call void @llvm.va_start(i8* %addr) -; CHECK: movn [[VR_OFFS:w[0-9]+]], #127 -; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28] -; CHECK: movn [[GR_OFFS:w[0-9]+]], #55 -; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24] -; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #128 -; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16] -; CHECK: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #56 -; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8] -; CHECK: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]] -; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] - -; CHECK-NOFP: str wzr, [x[[VA_LIST]], #28] -; CHECK-NOFP: movn [[GR_OFFS:w[0-9]+]], #55 -; CHECK-NOFP: str [[GR_OFFS]], [x[[VA_LIST]], #24] -; CHECK-NOFP: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #56 -; CHECK-NOFP: str [[GR_TOP]], [x[[VA_LIST]], #8] -; CHECK-NOFP: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]] -; CHECK-NOFP: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] - - ret void -} - -define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) { -; CHECK-LABEL: test_fewargs: -; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]] -; CHECK: mov x[[FPRBASE:[0-9]+]], sp -; CHECK: str q7, [x[[FPRBASE]], #96] -; CHECK: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]] -; CHECK: str x7, [x[[GPRBASE]], #32] - -; CHECK-NOFP: sub sp, sp, #[[STACKSIZE:[0-9]+]] -; CHECK-NOFP-NOT: str q7, -; CHECK-NOFP: mov x[[GPRBASE:[0-9]+]], sp -; CHECK-NOFP: str x7, [x[[GPRBASE]], #24] - -; Omit the middle ones - -; CHECK: str q1, [sp] -; CHECK: str x3, [sp, #[[GPRFROMSP]]] - -; CHECK-NOFP-NOT: str q1, [sp] -; CHECK-NOFP: str x4, [sp] - - %addr = bitcast %va_list* @var to i8* - call void @llvm.va_start(i8* %addr) -; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var -; CHECK: movn [[VR_OFFS:w[0-9]+]], #111 -; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28] -; CHECK: movn [[GR_OFFS:w[0-9]+]], #39 -; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24] -; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #112 -; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16] -; CHECK: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #40 -; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8] -; CHECK: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]] -; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] - -; CHECK-NOFP: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var -; CHECK-NOFP: str wzr, [x[[VA_LIST]], #28] -; CHECK-NOFP: movn [[GR_OFFS:w[0-9]+]], #31 -; CHECK-NOFP: str [[GR_OFFS]], [x[[VA_LIST]], #24] -; CHECK-NOFP: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #32 -; CHECK-NOFP: str [[GR_TOP]], [x[[VA_LIST]], #8] -; CHECK-NOFP: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]] -; CHECK-NOFP: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] - - ret void -} - -define void @test_nospare([8 x i64], [8 x float], ...) { -; CHECK-LABEL: test_nospare: - - %addr = bitcast %va_list* @var to i8* - call void @llvm.va_start(i8* %addr) -; CHECK-NOT: sub sp, sp -; CHECK: mov [[STACK:x[0-9]+]], sp -; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] - -; CHECK-NOFP-NOT: sub sp, sp -; CHECK-NOFP: add [[STACK:x[0-9]+]], sp, #64 -; CHECK-NOFP: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] - ret void -} - -; If there are non-variadic arguments on the stack (here two i64s) then the -; __stack field should point just past them. -define void @test_offsetstack([10 x i64], [3 x float], ...) { -; CHECK-LABEL: test_offsetstack: -; CHECK: sub sp, sp, #80 -; CHECK: mov x[[FPRBASE:[0-9]+]], sp -; CHECK: str q7, [x[[FPRBASE]], #64] - -; CHECK-NOT: str x{{[0-9]+}}, - -; CHECK-NOFP-NOT: str q7, -; CHECK-NOT: str x7, - -; Omit the middle ones - -; CHECK: str q3, [sp] - - %addr = bitcast %va_list* @var to i8* - call void @llvm.va_start(i8* %addr) -; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var -; CHECK: movn [[VR_OFFS:w[0-9]+]], #79 -; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28] -; CHECK: str wzr, [x[[VA_LIST]], #24] -; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #80 -; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16] -; CHECK: add [[STACK:x[0-9]+]], sp, #96 -; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] - -; CHECK-NOFP: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var -; CHECK-NOFP: add [[STACK:x[0-9]+]], sp, #40 -; CHECK-NOFP: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] -; CHECK-NOFP: str wzr, [x[[VA_LIST]], #28] -; CHECK-NOFP: str wzr, [x[[VA_LIST]], #24] - ret void -} - -declare void @llvm.va_end(i8*) - -define void @test_va_end() nounwind { -; CHECK-LABEL: test_va_end: -; CHECK-NEXT: BB#0 -; CHECK-NOFP: BB#0 - - %addr = bitcast %va_list* @var to i8* - call void @llvm.va_end(i8* %addr) - - ret void -; CHECK-NEXT: ret -; CHECK-NOFP-NEXT: ret -} - -declare void @llvm.va_copy(i8* %dest, i8* %src) - -@second_list = global %va_list zeroinitializer - -define void @test_va_copy() { -; CHECK-LABEL: test_va_copy: - %srcaddr = bitcast %va_list* @var to i8* - %dstaddr = bitcast %va_list* @second_list to i8* - call void @llvm.va_copy(i8* %dstaddr, i8* %srcaddr) - -; Check beginning and end again: - -; CHECK: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var -; CHECK: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list -; CHECK: ldr [[BLOCK1:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var] -; CHECK: ldr [[BLOCK2:x[0-9]+]], [x[[SRC_LIST]], #24] -; CHECK: str [[BLOCK1]], [{{x[0-9]+}}, #:lo12:second_list] -; CHECK: str [[BLOCK2]], [x[[DEST_LIST]], #24] - -; CHECK-NOFP: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var -; CHECK-NOFP: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list -; CHECK-NOFP: ldr [[BLOCK1:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var] -; CHECK-NOFP: ldr [[BLOCK2:x[0-9]+]], [x[[SRC_LIST]], #24] -; CHECK-NOFP: str [[BLOCK1]], [{{x[0-9]+}}, #:lo12:second_list] -; CHECK-NOFP: str [[BLOCK2]], [x[[DEST_LIST]], #24] - - ret void -; CHECK: ret -; CHECK-NOFP: ret -} - -%struct.s_3i = type { i32, i32, i32 } - -; This checks that, if the last named argument is not a multiple of 8 bytes, -; and is allocated on the stack, that __va_list.__stack is initialised to the -; first 8-byte aligned location above it. -define void @test_va_odd_struct_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, [1 x i64], %struct.s_3i* byval nocapture readnone align 4 %h, ...) { -; CHECK-LABEL: test_va_odd_struct_on_stack: - -; CHECK: sub sp, sp, #128 -; CHECK: mov x[[FPRBASE:[0-9]+]], sp -; CHECK: str q7, [x[[FPRBASE]], #112] - -; CHECK-NOT: str x{{[0-9]+}}, - -; CHECK-NOFP-NOT: str q7, -; CHECK-NOT: str x7, - -; Omit the middle ones - -; CHECK: str q0, [sp] - - %addr = bitcast %va_list* @var to i8* - call void @llvm.va_start(i8* %addr) -; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var -; CHECK: movn [[VR_OFFS:w[0-9]+]], #127 -; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28] -; CHECK: str wzr, [x[[VA_LIST]], #24] -; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #128 -; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16] -; This constant would be #140 if it was not 8-byte aligned -; CHECK: add [[STACK:x[0-9]+]], sp, #144 -; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] - -; CHECK-NOFP: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var -; This constant would be #12 if it was not 8-byte aligned -; CHECK-NOFP: add [[STACK:x[0-9]+]], sp, #16 -; CHECK-NOFP: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] -; CHECK-NOFP: str wzr, [x[[VA_LIST]], #28] -; CHECK-NOFP: str wzr, [x[[VA_LIST]], #24] - ret void -} diff --git a/test/CodeGen/AArch64/zero-reg.ll b/test/CodeGen/AArch64/zero-reg.ll index c4073cba08db..44072c67d904 100644 --- a/test/CodeGen/AArch64/zero-reg.ll +++ b/test/CodeGen/AArch64/zero-reg.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s @var32 = global i32 0 diff --git a/test/DebugInfo/AArch64/cfi-frame.ll b/test/DebugInfo/AArch64/cfi-frame.ll deleted file mode 100644 index 7290ddf357c1..000000000000 --- a/test/DebugInfo/AArch64/cfi-frame.ll +++ /dev/null @@ -1,58 +0,0 @@ -; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-WITH-FP - -@bigspace = global [8 x i64] zeroinitializer - -declare void @use_addr(i8*) - -define void @test_frame([8 x i64] %val) { -; CHECK: test_frame: -; CHECK: .cfi_startproc - - %var = alloca i8, i32 1000000 -; CHECK: sub sp, sp, #[[SP_INIT_ADJ:[0-9]+]] -; CHECK-NEXT: .Ltmp -; CHECK-NEXT: .cfi_def_cfa sp, [[SP_INIT_ADJ]] - -; Make sure the prologue is reasonably efficient -; CHECK-NEXT: stp x29, x30, [sp, -; CHECK-NEXT: stp x25, x26, [sp, -; CHECK-NEXT: stp x23, x24, [sp, -; CHECK-NEXT: stp x21, x22, [sp, -; CHECK-NEXT: stp x19, x20, [sp, -; CHECK-NEXT: sub sp, sp, #160 -; CHECK-NEXT: sub sp, sp, #244, lsl #12 -; CHECK-NEXT: .Ltmp -; CHECK-NEXT: .cfi_def_cfa sp, 1000080 -; CHECK-NEXT: .Ltmp -; CHECK-NEXT: .cfi_offset x30, -8 -; CHECK-NEXT: .Ltmp -; CHECK-NEXT: .cfi_offset x29, -16 -; [...] -; CHECK: .cfi_offset x19, -80 - -; CHECK: bl use_addr - call void @use_addr(i8* %var) - - store [8 x i64] %val, [8 x i64]* @bigspace - ret void -; CHECK: ret -; CHECK: .cfi_endproc -} - -; CHECK-WITH-FP: test_frame: - -; CHECK-WITH-FP: sub sp, sp, #[[SP_INIT_ADJ:[0-9]+]] -; CHECK-WITH-FP-NEXT: .Ltmp -; CHECK-WITH-FP-NEXT: .cfi_def_cfa sp, [[SP_INIT_ADJ]] - -; CHECK-WITH-FP: stp x29, x30, [sp, [[OFFSET:#[0-9]+]]] -; CHECK-WITH-FP-NEXT: add x29, sp, [[OFFSET]] -; CHECK-WITH-FP-NEXT: .Ltmp -; CHECK-WITH-FP-NEXT: .cfi_def_cfa x29, 16 - - ; We shouldn't emit any kind of update for the second stack adjustment if the - ; FP is in use. -; CHECK-WITH-FP-NOT: .cfi_def_cfa_offset - -; CHECK-WITH-FP: bl use_addr diff --git a/test/DebugInfo/AArch64/lit.local.cfg b/test/DebugInfo/AArch64/lit.local.cfg index 9a66a00189ea..a75a42b6f74c 100644 --- a/test/DebugInfo/AArch64/lit.local.cfg +++ b/test/DebugInfo/AArch64/lit.local.cfg @@ -1,4 +1,4 @@ targets = set(config.root.targets_to_build.split()) -if not 'AArch64' in targets: +if not 'ARM64' in targets: config.unsupported = True diff --git a/test/DebugInfo/AArch64/variable-loc.ll b/test/DebugInfo/AArch64/variable-loc.ll deleted file mode 100644 index 9f432d9f2c02..000000000000 --- a/test/DebugInfo/AArch64/variable-loc.ll +++ /dev/null @@ -1,94 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-fp-elim -filetype=obj < %s \ -; RUN: | llvm-dwarfdump -debug-dump=info - | FileCheck --check-prefix=DEBUG %s - -; This is a regression test making sure the location of variables is correct in -; debugging information, even if they're addressed via the frame pointer. - -; In case it needs, regenerating, the following suffices: -; int printf(const char *, ...); -; void populate_array(int *, int); -; int sum_array(int *, int); - -; int main() { -; int main_arr[100], val; -; populate_array(main_arr, 100); -; val = sum_array(main_arr, 100); -; printf("Total is %d\n", val); -; return 0; -; } - - ; First make sure main_arr is where we expect it: sp + 4 == x29 - 412: -; CHECK: main: -; CHECK: sub sp, sp, #432 -; CHECK: stp x29, x30, [sp, #416] -; CHECK: add x29, sp, #416 -; CHECK: add {{x[0-9]+}}, sp, #4 - -; DEBUG: DW_TAG_variable -; DEBUG-NEXT: DW_AT_name {{.*}} "main_arr" -; Rather hard-coded, but 0x91 => DW_OP_fbreg and 0xe47c is LEB128 encoded -412. -; DEBUG: DW_AT_location {{.*}}(<0x3> 91 e4 7c ) - -target datalayout = "e-p:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-f128:128:128-n32:64-S128" -target triple = "aarch64-none-linux-gnu" - -@.str = private unnamed_addr constant [13 x i8] c"Total is %d\0A\00", align 1 - -declare void @populate_array(i32*, i32) nounwind - -declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone - -declare i32 @sum_array(i32*, i32) nounwind - -define i32 @main() nounwind { -entry: - %retval = alloca i32, align 4 - %main_arr = alloca [100 x i32], align 4 - %val = alloca i32, align 4 - store i32 0, i32* %retval - call void @llvm.dbg.declare(metadata !{[100 x i32]* %main_arr}, metadata !17), !dbg !22 - call void @llvm.dbg.declare(metadata !{i32* %val}, metadata !23), !dbg !24 - %arraydecay = getelementptr inbounds [100 x i32]* %main_arr, i32 0, i32 0, !dbg !25 - call void @populate_array(i32* %arraydecay, i32 100), !dbg !25 - %arraydecay1 = getelementptr inbounds [100 x i32]* %main_arr, i32 0, i32 0, !dbg !26 - %call = call i32 @sum_array(i32* %arraydecay1, i32 100), !dbg !26 - store i32 %call, i32* %val, align 4, !dbg !26 - %0 = load i32* %val, align 4, !dbg !27 - %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), i32 %0), !dbg !27 - ret i32 0, !dbg !28 -} - -declare i32 @printf(i8*, ...) - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!30} - -!0 = metadata !{i32 786449, metadata !29, i32 12, metadata !"clang version 3.2 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/timnor01/a64-trunk/build/simple.c] [DW_LANG_C99] -!1 = metadata !{} -!3 = metadata !{metadata !5, metadata !11, metadata !14} -!5 = metadata !{i32 786478, metadata !29, metadata !6, metadata !"populate_array", metadata !"populate_array", metadata !"", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*, i32)* @populate_array, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [populate_array] -!6 = metadata !{i32 786473, metadata !29} ; [ DW_TAG_file_type ] -!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] -!8 = metadata !{null, metadata !9, metadata !10} -!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int] -!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] -!11 = metadata !{i32 786478, metadata !29, metadata !6, metadata !"sum_array", metadata !"sum_array", metadata !"", i32 9, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*, i32)* @sum_array, null, null, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [sum_array] -!12 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] -!13 = metadata !{metadata !10, metadata !9, metadata !10} -!14 = metadata !{i32 786478, metadata !29, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 18, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [main] -!15 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] -!16 = metadata !{metadata !10} -!17 = metadata !{i32 786688, metadata !18, metadata !"main_arr", metadata !6, i32 19, metadata !19, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [main_arr] [line 19] -!18 = metadata !{i32 786443, metadata !29, metadata !14, i32 18, i32 16, i32 4} ; [ DW_TAG_lexical_block ] [/home/timnor01/a64-trunk/build/simple.c] -!19 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 3200, i64 32, i32 0, i32 0, metadata !10, metadata !20, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 3200, align 32, offset 0] [from int] -!20 = metadata !{i32 786465, i64 0, i64 99} ; [ DW_TAG_subrange_type ] [0, 99] -!22 = metadata !{i32 19, i32 7, metadata !18, null} -!23 = metadata !{i32 786688, metadata !18, metadata !"val", metadata !6, i32 20, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [val] [line 20] -!24 = metadata !{i32 20, i32 7, metadata !18, null} -!25 = metadata !{i32 22, i32 3, metadata !18, null} -!26 = metadata !{i32 23, i32 9, metadata !18, null} -!27 = metadata !{i32 24, i32 3, metadata !18, null} -!28 = metadata !{i32 26, i32 3, metadata !18, null} -!29 = metadata !{metadata !"simple.c", metadata !"/home/timnor01/a64-trunk/build"} -!30 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/test/MC/AArch64/adrp-relocation.s b/test/MC/AArch64/adrp-relocation.s index 6c7fbf5b872f..03b930d53970 100644 --- a/test/MC/AArch64/adrp-relocation.s +++ b/test/MC/AArch64/adrp-relocation.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-linux-gnu -filetype=obj -o - %s| llvm-readobj -r - | FileCheck %s // RUN: llvm-mc -triple=arm64-linux-gnu -filetype=obj -o - %s| llvm-readobj -r - | FileCheck %s .text // These should produce an ADRP/ADD pair to calculate the address of diff --git a/test/MC/AArch64/basic-a64-diagnostics.s b/test/MC/AArch64/basic-a64-diagnostics.s index 213dc00f0a60..c6cb6b01f8a6 100644 --- a/test/MC/AArch64/basic-a64-diagnostics.s +++ b/test/MC/AArch64/basic-a64-diagnostics.s @@ -1,5 +1,3 @@ -// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2> %t -// RUN: FileCheck --check-prefix=CHECK-ERROR --check-prefix=CHECK-ERROR-AARCH64 < %t %s // RUN: not llvm-mc -triple arm64-none-linux-gnu < %s 2> %t // RUN: FileCheck --check-prefix=CHECK-ERROR --check-prefix=CHECK-ERROR-ARM64 < %t %s diff --git a/test/MC/AArch64/basic-a64-instructions.s b/test/MC/AArch64/basic-a64-instructions.s index 9a4ec81aae9a..72156bc9c51c 100644 --- a/test/MC/AArch64/basic-a64-instructions.s +++ b/test/MC/AArch64/basic-a64-instructions.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+fp-armv8 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 // RUN: llvm-mc -triple arm64-none-linux-gnu -show-encoding -mattr=+fp-armv8 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 .globl _func @@ -128,7 +127,6 @@ _func: // CHECK: adds w19, w17, w1, uxtx // encoding: [0x33,0x62,0x21,0x2b] // CHECK: adds w2, w5, w1, sxtb #1 // encoding: [0xa2,0x84,0x21,0x2b] // CHECK: adds w26, wsp, w19, sxth // encoding: [0xfa,0xa3,0x33,0x2b] -// CHECK-AARCH64: adds wzr, w2, w3, sxtw // encoding: [0x5f,0xc0,0x23,0x2b] // CHECK-ARM64: cmn w2, w3, sxtw // encoding: [0x5f,0xc0,0x23,0x2b] // CHECK: adds w2, w3, w5, sxtx // encoding: [0x62,0xe0,0x25,0x2b] @@ -257,7 +255,6 @@ _func: // CHECK: sub sp, x3, x7, lsl #4 // encoding: [0x7f,0x70,0x27,0xcb] // CHECK: add w2, wsp, w3, lsl #1 // encoding: [0xe2,0x47,0x23,0x0b] // CHECK: cmp wsp, w9 // encoding: [0xff,0x43,0x29,0x6b] -// CHECK-AARCH64: adds wzr, wsp, w3, lsl #4 // encoding: [0xff,0x53,0x23,0x2b] // CHECK-ARM64: cmn wsp, w3, lsl #4 // encoding: [0xff,0x53,0x23,0x2b] // CHECK: subs x3, sp, x9, lsl #2 // encoding: [0xe3,0x6b,0x29,0xeb] @@ -352,8 +349,6 @@ _func: // A relocation check (default to lo12, which is the only sane relocation anyway really) add x0, x4, #:lo12:var -// CHECK-AARCH64: add x0, x4, #:lo12:var // encoding: [0x80'A',A,A,0x91'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: :lo12:var, kind: fixup_a64_add_lo12 // CHECK-ARM64: add x0, x4, :lo12:var // encoding: [0x80,0bAAAAAA00,0b00AAAAAA,0x91] // CHECK-ARM64: // fixup A - offset: 0, value: :lo12:var, kind: fixup_arm64_add_imm12 @@ -489,7 +484,6 @@ _func: sub w4, w6, wzr // CHECK: sub w3, w5, w7 // encoding: [0xa3,0x00,0x07,0x4b] // CHECK: sub wzr, w3, w5 // encoding: [0x7f,0x00,0x05,0x4b] -// CHECK-AARCH64: sub w20, wzr, w4 // encoding: [0xf4,0x03,0x04,0x4b] // CHECK-ARM64: neg w20, w4 // encoding: [0xf4,0x03,0x04,0x4b] // CHECK: sub w4, w6, wzr // encoding: [0xc4,0x00,0x1f,0x4b] @@ -520,7 +514,6 @@ _func: sub x4, x6, xzr // CHECK: sub x3, x5, x7 // encoding: [0xa3,0x00,0x07,0xcb] // CHECK: sub xzr, x3, x5 // encoding: [0x7f,0x00,0x05,0xcb] -// CHECK-AARCH64: sub x20, xzr, x4 // encoding: [0xf4,0x03,0x04,0xcb] // CHECK-ARM64: neg x20, x4 // encoding: [0xf4,0x03,0x04,0xcb] // CHECK: sub x4, x6, xzr // encoding: [0xc4,0x00,0x1f,0xcb] @@ -551,7 +544,6 @@ _func: subs w4, w6, wzr // CHECK: subs w3, w5, w7 // encoding: [0xa3,0x00,0x07,0x6b] // CHECK: {{subs wzr,|cmp}} w3, w5 // encoding: [0x7f,0x00,0x05,0x6b] -// CHECK-AARCH64: subs w20, wzr, w4 // encoding: [0xf4,0x03,0x04,0x6b] // CHECK-ARM64: negs w20, w4 // encoding: [0xf4,0x03,0x04,0x6b] // CHECK: subs w4, w6, wzr // encoding: [0xc4,0x00,0x1f,0x6b] @@ -582,7 +574,6 @@ _func: subs x4, x6, xzr // CHECK: subs x3, x5, x7 // encoding: [0xa3,0x00,0x07,0xeb] // CHECK: {{subs xzr,|cmp}} x3, x5 // encoding: [0x7f,0x00,0x05,0xeb] -// CHECK-AARCH64: subs x20, xzr, x4 // encoding: [0xf4,0x03,0x04,0xeb] // CHECK-ARM64: negs x20, x4 // encoding: [0xf4,0x03,0x04,0xeb] // CHECK: subs x4, x6, xzr // encoding: [0xc4,0x00,0x1f,0xeb] @@ -722,9 +713,6 @@ _func: neg w29, w30 neg w30, wzr neg wzr, w0 -// CHECK-AARCH64: sub w29, wzr, w30 // encoding: [0xfd,0x03,0x1e,0x4b] -// CHECK-AARCH64: sub w30, wzr, wzr // encoding: [0xfe,0x03,0x1f,0x4b] -// CHECK-AARCH64: sub wzr, wzr, w0 // encoding: [0xff,0x03,0x00,0x4b] // CHECK-ARM64: neg w29, w30 // encoding: [0xfd,0x03,0x1e,0x4b] // CHECK-ARM64: neg w30, wzr // encoding: [0xfe,0x03,0x1f,0x4b] // CHECK-ARM64: neg wzr, w0 // encoding: [0xff,0x03,0x00,0x4b] @@ -732,9 +720,6 @@ _func: neg w28, w27, lsl #0 neg w26, w25, lsl #29 neg w24, w23, lsl #31 -// CHECK-AARCH64: sub w28, wzr, w27 // encoding: [0xfc,0x03,0x1b,0x4b] -// CHECK-AARCH64: neg w26, w25, lsl #29 // encoding: [0xfa,0x77,0x19,0x4b] -// CHECK-AARCH64: neg w24, w23, lsl #31 // encoding: [0xf8,0x7f,0x17,0x4b] // CHECK-ARM64: neg w28, w27 // encoding: [0xfc,0x03,0x1b,0x4b] // CHECK-ARM64: neg w26, w25, lsl #29 // encoding: [0xfa,0x77,0x19,0x4b] @@ -757,9 +742,6 @@ _func: neg x29, x30 neg x30, xzr neg xzr, x0 -// CHECK-AARCH64: sub x29, xzr, x30 // encoding: [0xfd,0x03,0x1e,0xcb] -// CHECK-AARCH64: sub x30, xzr, xzr // encoding: [0xfe,0x03,0x1f,0xcb] -// CHECK-AARCH64: sub xzr, xzr, x0 // encoding: [0xff,0x03,0x00,0xcb] // CHECK-ARM64: neg x29, x30 // encoding: [0xfd,0x03,0x1e,0xcb] // CHECK-ARM64: neg x30, xzr // encoding: [0xfe,0x03,0x1f,0xcb] // CHECK-ARM64: neg xzr, x0 // encoding: [0xff,0x03,0x00,0xcb] @@ -767,9 +749,6 @@ _func: neg x28, x27, lsl #0 neg x26, x25, lsl #29 neg x24, x23, lsl #31 -// CHECK-AARCH64: sub x28, xzr, x27 // encoding: [0xfc,0x03,0x1b,0xcb] -// CHECK-AARCH64: neg x26, x25, lsl #29 // encoding: [0xfa,0x77,0x19,0xcb] -// CHECK-AARCH64: neg x24, x23, lsl #31 // encoding: [0xf8,0x7f,0x17,0xcb] // CHECK-ARM64: neg x28, x27 // encoding: [0xfc,0x03,0x1b,0xcb] // CHECK-ARM64: neg x26, x25, lsl #29 // encoding: [0xfa,0x77,0x19,0xcb] @@ -792,9 +771,6 @@ _func: negs w29, w30 negs w30, wzr negs wzr, w0 -// CHECK-AARCH64: subs w29, wzr, w30 // encoding: [0xfd,0x03,0x1e,0x6b] -// CHECK-AARCH64: subs w30, wzr, wzr // encoding: [0xfe,0x03,0x1f,0x6b] -// CHECK-AARCH64: subs wzr, wzr, w0 // encoding: [0xff,0x03,0x00,0x6b] // CHECK-ARM64: negs w29, w30 // encoding: [0xfd,0x03,0x1e,0x6b] // CHECK-ARM64: negs w30, wzr // encoding: [0xfe,0x03,0x1f,0x6b] // CHECK-ARM64: cmp wzr, w0 // encoding: [0xff,0x03,0x00,0x6b] @@ -802,9 +778,6 @@ _func: negs w28, w27, lsl #0 negs w26, w25, lsl #29 negs w24, w23, lsl #31 -// CHECK-AARCH64: subs w28, wzr, w27 // encoding: [0xfc,0x03,0x1b,0x6b] -// CHECK-AARCH64: negs w26, w25, lsl #29 // encoding: [0xfa,0x77,0x19,0x6b] -// CHECK-AARCH64: negs w24, w23, lsl #31 // encoding: [0xf8,0x7f,0x17,0x6b] // CHECK-ARM64: negs w28, w27 // encoding: [0xfc,0x03,0x1b,0x6b] // CHECK-ARM64: negs w26, w25, lsl #29 // encoding: [0xfa,0x77,0x19,0x6b] @@ -827,9 +800,6 @@ _func: negs x29, x30 negs x30, xzr negs xzr, x0 -// CHECK-AARCH64: subs x29, xzr, x30 // encoding: [0xfd,0x03,0x1e,0xeb] -// CHECK-AARCH64: subs x30, xzr, xzr // encoding: [0xfe,0x03,0x1f,0xeb] -// CHECK-AARCH64: subs xzr, xzr, x0 // encoding: [0xff,0x03,0x00,0xeb] // CHECK-ARM64: negs x29, x30 // encoding: [0xfd,0x03,0x1e,0xeb] // CHECK-ARM64: negs x30, xzr // encoding: [0xfe,0x03,0x1f,0xeb] // CHECK-ARM64: cmp xzr, x0 // encoding: [0xff,0x03,0x00,0xeb] @@ -837,9 +807,6 @@ _func: negs x28, x27, lsl #0 negs x26, x25, lsl #29 negs x24, x23, lsl #31 -// CHECK-AARCH64: subs x28, xzr, x27 // encoding: [0xfc,0x03,0x1b,0xeb] -// CHECK-AARCH64: negs x26, x25, lsl #29 // encoding: [0xfa,0x77,0x19,0xeb] -// CHECK-AARCH64: negs x24, x23, lsl #31 // encoding: [0xf8,0x7f,0x17,0xeb] // CHECK-ARM64: negs x28, x27 // encoding: [0xfc,0x03,0x1b,0xeb] // CHECK-ARM64: negs x26, x25, lsl #29 // encoding: [0xfa,0x77,0x19,0xeb] @@ -970,10 +937,6 @@ _func: sbfm x3, x4, #63, #63 sbfm wzr, wzr, #31, #31 sbfm w12, w9, #0, #0 -// CHECK-AARCH64: sbfm x1, x2, #3, #4 // encoding: [0x41,0x10,0x43,0x93] -// CHECK-AARCH64: sbfm x3, x4, #63, #63 // encoding: [0x83,0xfc,0x7f,0x93] -// CHECK-AARCH64: sbfm wzr, wzr, #31, #31 // encoding: [0xff,0x7f,0x1f,0x13] -// CHECK-AARCH64: sbfm w12, w9, #0, #0 // encoding: [0x2c,0x01,0x00,0x13] // CHECK-ARM64: sbfx x1, x2, #3, #2 // encoding: [0x41,0x10,0x43,0x93] // CHECK-ARM64: asr x3, x4, #63 // encoding: [0x83,0xfc,0x7f,0x93] @@ -984,10 +947,6 @@ _func: ubfm xzr, x4, #0, #0 ubfm x4, xzr, #63, #5 ubfm x5, x6, #12, #63 -// CHECK-AARCH64: ubfm x4, x5, #12, #10 // encoding: [0xa4,0x28,0x4c,0xd3] -// CHECK-AARCH64: ubfm xzr, x4, #0, #0 // encoding: [0x9f,0x00,0x40,0xd3] -// CHECK-AARCH64: ubfm x4, xzr, #63, #5 // encoding: [0xe4,0x17,0x7f,0xd3] -// CHECK-AARCH64: ubfm x5, x6, #12, #63 // encoding: [0xc5,0xfc,0x4c,0xd3] // CHECK-ARM64: ubfiz x4, x5, #52, #11 // encoding: [0xa4,0x28,0x4c,0xd3] // CHECK-ARM64: ubfx xzr, x4, #0, #1 // encoding: [0x9f,0x00,0x40,0xd3] // CHECK-ARM64: ubfiz x4, xzr, #1, #6 // encoding: [0xe4,0x17,0x7f,0xd3] @@ -997,10 +956,6 @@ _func: bfm xzr, x4, #0, #0 bfm x4, xzr, #63, #5 bfm x5, x6, #12, #63 -// CHECK-AARCH64: bfm x4, x5, #12, #10 // encoding: [0xa4,0x28,0x4c,0xb3] -// CHECK-AARCH64: bfm xzr, x4, #0, #0 // encoding: [0x9f,0x00,0x40,0xb3] -// CHECK-AARCH64: bfm x4, xzr, #63, #5 // encoding: [0xe4,0x17,0x7f,0xb3] -// CHECK-AARCH64: bfm x5, x6, #12, #63 // encoding: [0xc5,0xfc,0x4c,0xb3] // CHECK-ARM64: bfi x4, x5, #52, #11 // encoding: [0xa4,0x28,0x4c,0xb3] // CHECK-ARM64: bfxil xzr, x4, #0, #1 // encoding: [0x9f,0x00,0x40,0xb3] // CHECK-ARM64: bfi x4, xzr, #1, #6 // encoding: [0xe4,0x17,0x7f,0xb3] @@ -1063,10 +1018,8 @@ _func: sbfiz xzr, xzr, #10, #11 // CHECK: {{sbfiz|sbfx}} w9, w10, #0, #1 // encoding: [0x49,0x01,0x00,0x13] // CHECK: sbfiz x2, x3, #63, #1 // encoding: [0x62,0x00,0x41,0x93] -// CHECK-AARCH64: sbfiz x19, x20, #0, #64 // encoding: [0x93,0xfe,0x40,0x93] // CHECK-ARM64: asr x19, x20, #0 // encoding: [0x93,0xfe,0x40,0x93] // CHECK: sbfiz x9, x10, #5, #59 // encoding: [0x49,0xe9,0x7b,0x93] -// CHECK-AARCH64: sbfiz w9, w10, #0, #32 // encoding: [0x49,0x7d,0x00,0x13] // CHECK-ARM64: asr w9, w10, #0 // encoding: [0x49,0x7d,0x00,0x13] // CHECK: sbfiz w11, w12, #31, #1 // encoding: [0x8b,0x01,0x01,0x13] // CHECK: sbfiz w13, w14, #29, #3 // encoding: [0xcd,0x09,0x03,0x13] @@ -1081,17 +1034,11 @@ _func: sbfx w13, w14, #29, #3 sbfx xzr, xzr, #10, #11 // CHECK: sbfx w9, w10, #0, #1 // encoding: [0x49,0x01,0x00,0x13] -// CHECK-AARCH64: sbfx x2, x3, #63, #1 // encoding: [0x62,0xfc,0x7f,0x93] // CHECK-ARM64: asr x2, x3, #63 // encoding: [0x62,0xfc,0x7f,0x93] -// CHECK-AARCH64: sbfx x19, x20, #0, #64 // encoding: [0x93,0xfe,0x40,0x93] // CHECK-ARM64: asr x19, x20, #0 // encoding: [0x93,0xfe,0x40,0x93] -// CHECK-AARCH64: sbfx x9, x10, #5, #59 // encoding: [0x49,0xfd,0x45,0x93] // CHECK-ARM64: asr x9, x10, #5 // encoding: [0x49,0xfd,0x45,0x93] -// CHECK-AARCH64: sbfx w9, w10, #0, #32 // encoding: [0x49,0x7d,0x00,0x13] // CHECK-ARM64: asr w9, w10, #0 // encoding: [0x49,0x7d,0x00,0x13] -// CHECK-AARCH64: sbfx w11, w12, #31, #1 // encoding: [0x8b,0x7d,0x1f,0x13] // CHECK-ARM64: asr w11, w12, #31 // encoding: [0x8b,0x7d,0x1f,0x13] -// CHECK-AARCH64: sbfx w13, w14, #29, #3 // encoding: [0xcd,0x7d,0x1d,0x13] // CHECK-ARM64: asr w13, w14, #29 // encoding: [0xcd,0x7d,0x1d,0x13] // CHECK: sbfx xzr, xzr, #10, #11 // encoding: [0xff,0x53,0x4a,0x93] @@ -1103,14 +1050,6 @@ _func: bfi w11, w12, #31, #1 bfi w13, w14, #29, #3 bfi xzr, xzr, #10, #11 -// CHECK-AARCH64: bfi w9, w10, #0, #1 // encoding: [0x49,0x01,0x00,0x33] -// CHECK-AARCH64: bfi x2, x3, #63, #1 // encoding: [0x62,0x00,0x41,0xb3] -// CHECK-AARCH64: bfi x19, x20, #0, #64 // encoding: [0x93,0xfe,0x40,0xb3] -// CHECK-AARCH64: bfi x9, x10, #5, #59 // encoding: [0x49,0xe9,0x7b,0xb3] -// CHECK-AARCH64: bfi w9, w10, #0, #32 // encoding: [0x49,0x7d,0x00,0x33] -// CHECK-AARCH64: bfi w11, w12, #31, #1 // encoding: [0x8b,0x01,0x01,0x33] -// CHECK-AARCH64: bfi w13, w14, #29, #3 // encoding: [0xcd,0x09,0x03,0x33] -// CHECK-AARCH64: bfi xzr, xzr, #10, #11 // encoding: [0xff,0x2b,0x76,0xb3] // CHECK-ARM64: bfxil w9, w10, #0, #1 // encoding: [0x49,0x01,0x00,0x33] // CHECK-ARM64: bfi x2, x3, #63, #1 // encoding: [0x62,0x00,0x41,0xb3] @@ -1146,14 +1085,6 @@ _func: ubfiz w11, w12, #31, #1 ubfiz w13, w14, #29, #3 ubfiz xzr, xzr, #10, #11 -// CHECK-AARCH64: ubfiz w9, w10, #0, #1 // encoding: [0x49,0x01,0x00,0x53] -// CHECK-AARCH64: ubfiz x2, x3, #63, #1 // encoding: [0x62,0x00,0x41,0xd3] -// CHECK-AARCH64: ubfiz x19, x20, #0, #64 // encoding: [0x93,0xfe,0x40,0xd3] -// CHECK-AARCH64: ubfiz x9, x10, #5, #59 // encoding: [0x49,0xe9,0x7b,0xd3] -// CHECK-AARCH64: ubfiz w9, w10, #0, #32 // encoding: [0x49,0x7d,0x00,0x53] -// CHECK-AARCH64: ubfiz w11, w12, #31, #1 // encoding: [0x8b,0x01,0x01,0x53] -// CHECK-AARCH64: ubfiz w13, w14, #29, #3 // encoding: [0xcd,0x09,0x03,0x53] -// CHECK-AARCH64: ubfiz xzr, xzr, #10, #11 // encoding: [0xff,0x2b,0x76,0xd3] // CHECK-ARM64: ubfx w9, w10, #0, #1 // encoding: [0x49,0x01,0x00,0x53] // CHECK-ARM64: lsl x2, x3, #63 // encoding: [0x62,0x00,0x41,0xd3] @@ -1172,14 +1103,6 @@ _func: ubfx w11, w12, #31, #1 ubfx w13, w14, #29, #3 ubfx xzr, xzr, #10, #11 -// CHECK-AARCH64: ubfx w9, w10, #0, #1 // encoding: [0x49,0x01,0x00,0x53] -// CHECK-AARCH64: ubfx x2, x3, #63, #1 // encoding: [0x62,0xfc,0x7f,0xd3] -// CHECK-AARCH64: ubfx x19, x20, #0, #64 // encoding: [0x93,0xfe,0x40,0xd3] -// CHECK-AARCH64: ubfx x9, x10, #5, #59 // encoding: [0x49,0xfd,0x45,0xd3] -// CHECK-AARCH64: ubfx w9, w10, #0, #32 // encoding: [0x49,0x7d,0x00,0x53] -// CHECK-AARCH64: ubfx w11, w12, #31, #1 // encoding: [0x8b,0x7d,0x1f,0x53] -// CHECK-AARCH64: ubfx w13, w14, #29, #3 // encoding: [0xcd,0x7d,0x1d,0x53] -// CHECK-AARCH64: ubfx xzr, xzr, #10, #11 // encoding: [0xff,0x53,0x4a,0xd3] // CHECK-ARM64: ubfx w9, w10, #0, #1 // encoding: [0x49,0x01,0x00,0x53] // CHECK-ARM64: lsr x2, x3, #63 // encoding: [0x62,0xfc,0x7f,0xd3] @@ -1197,14 +1120,6 @@ _func: cbz x5, lbl cbnz x2, lbl cbnz x26, lbl -// CHECK-AARCH64: cbz w5, lbl // encoding: [0x05'A',A,A,0x34'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: cbz x5, lbl // encoding: [0x05'A',A,A,0xb4'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: cbnz x2, lbl // encoding: [0x02'A',A,A,0xb5'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: cbnz x26, lbl // encoding: [0x1a'A',A,A,0xb5'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr // CHECK-ARM64: cbz w5, lbl // encoding: [0bAAA00101,A,A,0x34] // CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 // CHECK-ARM64: cbz x5, lbl // encoding: [0bAAA00101,A,A,0xb4] @@ -1216,10 +1131,6 @@ _func: cbz wzr, lbl cbnz xzr, lbl -// CHECK-AARCH64: cbz wzr, lbl // encoding: [0x1f'A',A,A,0x34'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: cbnz xzr, lbl // encoding: [0x1f'A',A,A,0xb5'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr // CHECK-ARM64: cbz wzr, lbl // encoding: [0bAAA11111,A,A,0x34] // CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 @@ -1256,40 +1167,6 @@ _func: b.gt lbl b.le lbl b.al lbl -// CHECK-AARCH64: b.eq lbl // encoding: [A,A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.ne lbl // encoding: [0x01'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.hs lbl // encoding: [0x02'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.hs lbl // encoding: [0x02'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.lo lbl // encoding: [0x03'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.lo lbl // encoding: [0x03'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.mi lbl // encoding: [0x04'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.pl lbl // encoding: [0x05'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.vs lbl // encoding: [0x06'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.vc lbl // encoding: [0x07'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.hi lbl // encoding: [0x08'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.ls lbl // encoding: [0x09'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.ge lbl // encoding: [0x0a'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.lt lbl // encoding: [0x0b'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.gt lbl // encoding: [0x0c'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.le lbl // encoding: [0x0d'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.al lbl // encoding: [0x0e'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr // CHECK-ARM64: b.eq lbl // encoding: [0bAAA00000,A,A,0x54] // CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 @@ -1344,40 +1221,6 @@ _func: bgt lbl ble lbl bal lbl -// CHECK-AARCH64: b.eq lbl // encoding: [A,A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.ne lbl // encoding: [0x01'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.hs lbl // encoding: [0x02'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.hs lbl // encoding: [0x02'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.lo lbl // encoding: [0x03'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.lo lbl // encoding: [0x03'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.mi lbl // encoding: [0x04'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.pl lbl // encoding: [0x05'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.vs lbl // encoding: [0x06'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.vc lbl // encoding: [0x07'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.hi lbl // encoding: [0x08'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.ls lbl // encoding: [0x09'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.ge lbl // encoding: [0x0a'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.lt lbl // encoding: [0x0b'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.gt lbl // encoding: [0x0c'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.le lbl // encoding: [0x0d'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.al lbl // encoding: [0x0e'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr b.eq #0 b.lt #-4 @@ -2342,12 +2185,6 @@ _func: ldr w3, here ldr x29, there ldrsw xzr, everywhere -// CHECK-AARCH64: ldr w3, here // encoding: [0x03'A',A,A,0x18'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: here, kind: fixup_a64_ld_prel -// CHECK-AARCH64: ldr x29, there // encoding: [0x1d'A',A,A,0x58'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: there, kind: fixup_a64_ld_prel -// CHECK-AARCH64: ldrsw xzr, everywhere // encoding: [0x1f'A',A,A,0x98'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: everywhere, kind: fixup_a64_ld_prel // CHECK-ARM64: ldr w3, here // encoding: [0bAAA00011,A,A,0x18] // CHECK-ARM64: // fixup A - offset: 0, value: here, kind: fixup_arm64_ldr_pcrel_imm19 @@ -2359,12 +2196,6 @@ _func: ldr s0, who_knows ldr d0, i_dont ldr q0, there_must_be_a_better_way -// CHECK-AARCH64: ldr s0, who_knows // encoding: [A,A,A,0x1c'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: who_knows, kind: fixup_a64_ld_prel -// CHECK-AARCH64: ldr d0, i_dont // encoding: [A,A,A,0x5c'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: i_dont, kind: fixup_a64_ld_prel -// CHECK-AARCH64: ldr q0, there_must_be_a_better_way // encoding: [A,A,A,0x9c'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: there_must_be_a_better_way, kind: fixup_a64_ld_prel // CHECK-ARM64: ldr s0, who_knows // encoding: [0bAAA00000,A,A,0x1c] // CHECK-ARM64: // fixup A - offset: 0, value: who_knows, kind: fixup_arm64_ldr_pcrel_imm19 @@ -2380,10 +2211,6 @@ _func: prfm pldl1strm, nowhere prfm #22, somewhere -// CHECK-AARCH64: prfm pldl1strm, nowhere // encoding: [0x01'A',A,A,0xd8'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: nowhere, kind: fixup_a64_ld_prel -// CHECK-AARCH64: prfm #22, somewhere // encoding: [0x16'A',A,A,0xd8'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: somewhere, kind: fixup_a64_ld_prel // CHECK-ARM64: prfm pldl1strm, nowhere // encoding: [0bAAA00001,A,A,0xd8] // CHECK-ARM64: // fixup A - offset: 0, value: nowhere, kind: fixup_arm64_ldr_pcrel_imm19 @@ -2603,18 +2430,6 @@ _func: ldrsw x15, [x5, #:lo12:sym] ldr x15, [x5, #:lo12:sym] ldr q3, [x2, #:lo12:sym] -// CHECK-AARCH64: str x15, [x5, #:lo12:sym] // encoding: [0xaf'A',A,A,0xf9'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst64_lo12 -// CHECK-AARCH64: ldrb w15, [x5, #:lo12:sym] // encoding: [0xaf'A',A,0x40'A',0x39'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst8_lo12 -// CHECK-AARCH64: ldrsh x15, [x5, #:lo12:sym] // encoding: [0xaf'A',A,0x80'A',0x79'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst16_lo12 -// CHECK-AARCH64: ldrsw x15, [x5, #:lo12:sym] // encoding: [0xaf'A',A,0x80'A',0xb9'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst32_lo12 -// CHECK-AARCH64: ldr x15, [x5, #:lo12:sym] // encoding: [0xaf'A',A,0x40'A',0xf9'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst64_lo12 -// CHECK-AARCH64: ldr q3, [x2, #:lo12:sym] // encoding: [0x43'A',A,0xc0'A',0x3d'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst128_lo12 // CHECK-ARM64: str x15, [x5, :lo12:sym] // encoding: [0xaf,0bAAAAAA00,0b00AAAAAA,0xf9] // CHECK-ARM64: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_arm64_ldst_imm12_scale8 @@ -3507,10 +3322,6 @@ _func: movz x2, #:abs_g0:sym movk w3, #:abs_g0_nc:sym -// CHECK-AARCH64: movz x2, #:abs_g0:sym // encoding: [0x02'A',A,0x80'A',0xd2'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g0:sym, kind: fixup_a64_movw_uabs_g0 -// CHECK-AARCH64: movk w3, #:abs_g0_nc:sym // encoding: [0x03'A',A,0x80'A',0x72'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g0_nc:sym, kind: fixup_a64_movw_uabs_g0_nc // CHECK-ARM64: movz x2, #:abs_g0:sym // encoding: [0bAAA00010,A,0b100AAAAA,0xd2] // CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g0:sym, kind: fixup_arm64_movw @@ -3519,10 +3330,6 @@ _func: movz x4, #:abs_g1:sym movk w5, #:abs_g1_nc:sym -// CHECK-AARCH64: movz x4, #:abs_g1:sym // encoding: [0x04'A',A,0xa0'A',0xd2'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g1:sym, kind: fixup_a64_movw_uabs_g1 -// CHECK-AARCH64: movk w5, #:abs_g1_nc:sym // encoding: [0x05'A',A,0xa0'A',0x72'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g1_nc:sym, kind: fixup_a64_movw_uabs_g1_nc // CHECK-ARM64: movz x4, #:abs_g1:sym // encoding: [0bAAA00100,A,0b101AAAAA,0xd2] // CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g1:sym, kind: fixup_arm64_movw @@ -3531,10 +3338,6 @@ _func: movz x6, #:abs_g2:sym movk x7, #:abs_g2_nc:sym -// CHECK-AARCH64: movz x6, #:abs_g2:sym // encoding: [0x06'A',A,0xc0'A',0xd2'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g2:sym, kind: fixup_a64_movw_uabs_g2 -// CHECK-AARCH64: movk x7, #:abs_g2_nc:sym // encoding: [0x07'A',A,0xc0'A',0xf2'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g2_nc:sym, kind: fixup_a64_movw_uabs_g2_nc // CHECK-ARM64: movz x6, #:abs_g2:sym // encoding: [0bAAA00110,A,0b110AAAAA,0xd2] // CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g2:sym, kind: fixup_arm64_movw @@ -3543,10 +3346,6 @@ _func: movz x8, #:abs_g3:sym movk x9, #:abs_g3:sym -// CHECK-AARCH64: movz x8, #:abs_g3:sym // encoding: [0x08'A',A,0xe0'A',0xd2'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_a64_movw_uabs_g3 -// CHECK-AARCH64: movk x9, #:abs_g3:sym // encoding: [0x09'A',A,0xe0'A',0xf2'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_a64_movw_uabs_g3 // CHECK-ARM64: movz x8, #:abs_g3:sym // encoding: [0bAAA01000,A,0b111AAAAA,0xd2] // CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_arm64_movw @@ -3558,14 +3357,6 @@ _func: movz x19, #:abs_g0_s:sym movn w10, #:abs_g0_s:sym movz w25, #:abs_g0_s:sym -// CHECK-AARCH64: movn x30, #:abs_g0_s:sym // encoding: [0x1e'A',A,0x80'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_a64_movw_sabs_g0 -// CHECK-AARCH64: movz x19, #:abs_g0_s:sym // encoding: [0x13'A',A,0x80'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_a64_movw_sabs_g0 -// CHECK-AARCH64: movn w10, #:abs_g0_s:sym // encoding: [0x0a'A',A,0x80'A',0x12'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_a64_movw_sabs_g0 -// CHECK-AARCH64: movz w25, #:abs_g0_s:sym // encoding: [0x19'A',A,0x80'A',0x12'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_a64_movw_sabs_g0 // CHECK-ARM64: movn x30, #:abs_g0_s:sym // encoding: [0bAAA11110,A,0b100AAAAA,0x92] // CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_arm64_movw @@ -3580,14 +3371,6 @@ _func: movz x19, #:abs_g1_s:sym movn w10, #:abs_g1_s:sym movz w25, #:abs_g1_s:sym -// CHECK-AARCH64: movn x30, #:abs_g1_s:sym // encoding: [0x1e'A',A,0xa0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_a64_movw_sabs_g1 -// CHECK-AARCH64: movz x19, #:abs_g1_s:sym // encoding: [0x13'A',A,0xa0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_a64_movw_sabs_g1 -// CHECK-AARCH64: movn w10, #:abs_g1_s:sym // encoding: [0x0a'A',A,0xa0'A',0x12'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_a64_movw_sabs_g1 -// CHECK-AARCH64: movz w25, #:abs_g1_s:sym // encoding: [0x19'A',A,0xa0'A',0x12'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_a64_movw_sabs_g1 // CHECK-ARM64: movn x30, #:abs_g1_s:sym // encoding: [0bAAA11110,A,0b101AAAAA,0x92] // CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_arm64_movw @@ -3600,10 +3383,6 @@ _func: movn x30, #:abs_g2_s:sym movz x19, #:abs_g2_s:sym -// CHECK-AARCH64: movn x30, #:abs_g2_s:sym // encoding: [0x1e'A',A,0xc0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g2_s:sym, kind: fixup_a64_movw_sabs_g2 -// CHECK-AARCH64: movz x19, #:abs_g2_s:sym // encoding: [0x13'A',A,0xc0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g2_s:sym, kind: fixup_a64_movw_sabs_g2 // CHECK-ARM64: movn x30, #:abs_g2_s:sym // encoding: [0bAAA11110,A,0b110AAAAA,0x92] // CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g2_s:sym, kind: fixup_arm64_movw @@ -3616,10 +3395,6 @@ _func: adr x2, loc adr xzr, loc - // CHECK-AARCH64: adr x2, loc // encoding: [0x02'A',A,A,0x10'A'] - // CHECK-AARCH64: // fixup A - offset: 0, value: loc, kind: fixup_a64_adr_prel - // CHECK-AARCH64: adr xzr, loc // encoding: [0x1f'A',A,A,0x10'A'] - // CHECK-AARCH64: // fixup A - offset: 0, value: loc, kind: fixup_a64_adr_prel // CHECK-ARM64: adr x2, loc // encoding: [0x02'A',A,A,0x10'A'] // CHECK-ARM64: // fixup A - offset: 0, value: loc, kind: fixup_arm64_pcrel_adr_imm21 @@ -3627,8 +3402,6 @@ _func: // CHECK-ARM64: // fixup A - offset: 0, value: loc, kind: fixup_arm64_pcrel_adr_imm21 adrp x29, loc - // CHECK-AARCH64: adrp x29, loc // encoding: [0x1d'A',A,A,0x90'A'] - // CHECK-AARCH64: // fixup A - offset: 0, value: loc, kind: fixup_a64_adr_prel_page // CHECK-ARM64: adrp x29, loc // encoding: [0x1d'A',A,A,0x90'A'] // CHECK-ARM64: // fixup A - offset: 0, value: loc, kind: fixup_arm64_pcrel_adrp_imm21 @@ -5008,12 +4781,6 @@ _func: tbz x5, #0, somewhere tbz xzr, #63, elsewhere tbnz x5, #45, nowhere -// CHECK-AARCH64: tbz x5, #0, somewhere // encoding: [0x05'A',A,A,0x36'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: somewhere, kind: fixup_a64_tstbr -// CHECK-AARCH64: tbz xzr, #63, elsewhere // encoding: [0x1f'A',A,0xf8'A',0xb6'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: elsewhere, kind: fixup_a64_tstbr -// CHECK-AARCH64: tbnz x5, #45, nowhere // encoding: [0x05'A',A,0x68'A',0xb7'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: nowhere, kind: fixup_a64_tstbr // CHECK-ARM64: tbz w5, #0, somewhere // encoding: [0bAAA00101,A,0b00000AAA,0x36] // CHECK-ARM64: // fixup A - offset: 0, value: somewhere, kind: fixup_arm64_pcrel_branch14 @@ -5026,12 +4793,6 @@ _func: tbnz w3, #2, there tbnz wzr, #31, nowhere tbz w5, #12, anywhere -// CHECK-AARCH64: tbnz w3, #2, there // encoding: [0x03'A',A,0x10'A',0x37'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: there, kind: fixup_a64_tstbr -// CHECK-AARCH64: tbnz wzr, #31, nowhere // encoding: [0x1f'A',A,0xf8'A',0x37'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: nowhere, kind: fixup_a64_tstbr -// CHECK-AARCH64: tbz w5, #12, anywhere // encoding: [0x05'A',A,0x60'A',0x36'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: anywhere, kind: fixup_a64_tstbr // CHECK-ARM64: tbnz w3, #2, there // encoding: [0bAAA00011,A,0b00010AAA,0x37] // CHECK-ARM64: // fixup A - offset: 0, value: there, kind: fixup_arm64_pcrel_branch14 @@ -5046,10 +4807,6 @@ _func: b somewhere bl elsewhere -// CHECK-AARCH64: b somewhere // encoding: [A,A,A,0x14'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: somewhere, kind: fixup_a64_uncondbr -// CHECK-AARCH64: bl elsewhere // encoding: [A,A,A,0x94'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: elsewhere, kind: fixup_a64_call // CHECK-ARM64: b somewhere // encoding: [A,A,A,0b000101AA] // CHECK-ARM64: // fixup A - offset: 0, value: somewhere, kind: fixup_arm64_pcrel_branch26 diff --git a/test/MC/AArch64/basic-pic.s b/test/MC/AArch64/basic-pic.s index c3317f35d3bc..6bb6aaa7de13 100644 --- a/test/MC/AArch64/basic-pic.s +++ b/test/MC/AArch64/basic-pic.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o -| llvm-objdump -r - | FileCheck %s // RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o -| llvm-objdump -r - | FileCheck %s // CHECK: RELOCATION RECORDS FOR [.rela.text] diff --git a/test/MC/AArch64/elf-extern.s b/test/MC/AArch64/elf-extern.s index 23cb4bd46c79..3d84bde052ff 100644 --- a/test/MC/AArch64/elf-extern.s +++ b/test/MC/AArch64/elf-extern.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc < %s -triple=aarch64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s // RUN: llvm-mc < %s -triple=arm64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s // External symbols are a different concept to global variables but should still diff --git a/test/MC/AArch64/elf-objdump.s b/test/MC/AArch64/elf-objdump.s index 2d134ff58620..b69926efbc2b 100644 --- a/test/MC/AArch64/elf-objdump.s +++ b/test/MC/AArch64/elf-objdump.s @@ -1,5 +1,4 @@ // 64 bit little endian -// RUN: llvm-mc -filetype=obj -triple aarch64-none-linux-gnu %s -o - | llvm-objdump -d - // RUN: llvm-mc -filetype=obj -triple arm64-none-linux-gnu %s -o - | llvm-objdump -d - // We just want to see if llvm-objdump works at all. diff --git a/test/MC/AArch64/elf-reloc-addend.s b/test/MC/AArch64/elf-reloc-addend.s deleted file mode 100644 index 8d575fb8b920..000000000000 --- a/test/MC/AArch64/elf-reloc-addend.s +++ /dev/null @@ -1,10 +0,0 @@ -// RUN: llvm-mc -triple=aarch64-linux-gnu -filetype=obj -o - %s | llvm-objdump -triple=aarch64-linux-gnu -r - | FileCheck %s - -// RUN: llvm-mc -triple=arm64-linux-gnu -filetype=obj -o - %s | llvm-objdump -triple=aarch64-linux-gnu -r - | FileCheck %s - - add x0, x4, #:lo12:sym -// CHECK: 0 R_AARCH64_ADD_ABS_LO12_NC sym - add x3, x5, #:lo12:sym+1 -// CHECK: 4 R_AARCH64_ADD_ABS_LO12_NC sym+1 - add x3, x5, #:lo12:sym-1 -// CHECK: 8 R_AARCH64_ADD_ABS_LO12_NC sym-1 diff --git a/test/MC/AArch64/elf-reloc-addsubimm.s b/test/MC/AArch64/elf-reloc-addsubimm.s index a64249e8b8f8..cc5c3f7f25b0 100644 --- a/test/MC/AArch64/elf-reloc-addsubimm.s +++ b/test/MC/AArch64/elf-reloc-addsubimm.s @@ -1,6 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \ -// RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s - // RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \ // RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s diff --git a/test/MC/AArch64/elf-reloc-ldrlit.s b/test/MC/AArch64/elf-reloc-ldrlit.s index 55ba5f8b748a..3554ef3ae423 100644 --- a/test/MC/AArch64/elf-reloc-ldrlit.s +++ b/test/MC/AArch64/elf-reloc-ldrlit.s @@ -1,6 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \ -// RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s - // RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \ // RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s diff --git a/test/MC/AArch64/elf-reloc-ldstunsimm.s b/test/MC/AArch64/elf-reloc-ldstunsimm.s index faf2c459a65b..196f65fd2999 100644 --- a/test/MC/AArch64/elf-reloc-ldstunsimm.s +++ b/test/MC/AArch64/elf-reloc-ldstunsimm.s @@ -1,6 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+fp-armv8 -filetype=obj %s -o - | \ -// RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s - // RUN: llvm-mc -triple=arm64-none-linux-gnu -mattr=+fp-armv8 -filetype=obj %s -o - | \ // RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s diff --git a/test/MC/AArch64/elf-reloc-movw.s b/test/MC/AArch64/elf-reloc-movw.s index 29f89443c336..dc7dbb0c156a 100644 --- a/test/MC/AArch64/elf-reloc-movw.s +++ b/test/MC/AArch64/elf-reloc-movw.s @@ -1,6 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \ -// RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s - // RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \ // RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s diff --git a/test/MC/AArch64/elf-reloc-pcreladdressing.s b/test/MC/AArch64/elf-reloc-pcreladdressing.s index ee9b2073694a..652011318c39 100644 --- a/test/MC/AArch64/elf-reloc-pcreladdressing.s +++ b/test/MC/AArch64/elf-reloc-pcreladdressing.s @@ -1,7 +1,4 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \ -// RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s - - // RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \ +// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \ // RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s adr x2, some_label diff --git a/test/MC/AArch64/elf-reloc-tstb.s b/test/MC/AArch64/elf-reloc-tstb.s index 370b9ee126ac..9cbe3a53fb7f 100644 --- a/test/MC/AArch64/elf-reloc-tstb.s +++ b/test/MC/AArch64/elf-reloc-tstb.s @@ -1,6 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \ -// RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s - // RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \ // RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s diff --git a/test/MC/AArch64/elf-reloc-uncondbrimm.s b/test/MC/AArch64/elf-reloc-uncondbrimm.s index 69b0a2fcb619..8f3915afab79 100644 --- a/test/MC/AArch64/elf-reloc-uncondbrimm.s +++ b/test/MC/AArch64/elf-reloc-uncondbrimm.s @@ -1,6 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \ -// RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s - // RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \ // RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s diff --git a/test/MC/AArch64/gicv3-regs-diagnostics.s b/test/MC/AArch64/gicv3-regs-diagnostics.s index c88431235413..6f4f5ee66c65 100644 --- a/test/MC/AArch64/gicv3-regs-diagnostics.s +++ b/test/MC/AArch64/gicv3-regs-diagnostics.s @@ -1,4 +1,3 @@ -// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2>&1 | FileCheck %s // RUN: not llvm-mc -triple arm64-none-linux-gnu < %s 2>&1 | FileCheck %s // Write-only diff --git a/test/MC/AArch64/gicv3-regs.s b/test/MC/AArch64/gicv3-regs.s index 470fc4667f77..b9eac1a56951 100644 --- a/test/MC/AArch64/gicv3-regs.s +++ b/test/MC/AArch64/gicv3-regs.s @@ -1,4 +1,3 @@ - // RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -show-encoding < %s | FileCheck %s mrs x8, icc_iar1_el1 diff --git a/test/MC/AArch64/inline-asm-modifiers.s b/test/MC/AArch64/inline-asm-modifiers.s index c12ebf4636ba..33d5bf519f92 100644 --- a/test/MC/AArch64/inline-asm-modifiers.s +++ b/test/MC/AArch64/inline-asm-modifiers.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj -mattr=+fp-armv8 < %s | llvm-objdump -r - | FileCheck %s // RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj -mattr=+fp-armv8 < %s | llvm-objdump -r - | FileCheck %s .file "" diff --git a/test/MC/AArch64/jump-table.s b/test/MC/AArch64/jump-table.s index 3fe9bc58cdd9..439ecd90de34 100644 --- a/test/MC/AArch64/jump-table.s +++ b/test/MC/AArch64/jump-table.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc < %s -triple=aarch64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s // RUN: llvm-mc < %s -triple=arm64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s .file "" diff --git a/test/MC/AArch64/lit.local.cfg b/test/MC/AArch64/lit.local.cfg index 8378712e9cf5..17a6b7ab033d 100644 --- a/test/MC/AArch64/lit.local.cfg +++ b/test/MC/AArch64/lit.local.cfg @@ -1,3 +1,3 @@ targets = set(config.root.targets_to_build.split()) -if 'AArch64' not in targets or 'ARM64' not in targets: - config.unsupported = True \ No newline at end of file +if 'ARM64' not in targets: + config.unsupported = True diff --git a/test/MC/AArch64/mapping-across-sections.s b/test/MC/AArch64/mapping-across-sections.s index 14336382bedb..00b324cb8264 100644 --- a/test/MC/AArch64/mapping-across-sections.s +++ b/test/MC/AArch64/mapping-across-sections.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -t - | FileCheck %s // RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj < %s | llvm-objdump -t - | FileCheck %s .text diff --git a/test/MC/AArch64/mapping-within-section.s b/test/MC/AArch64/mapping-within-section.s index b80721ac652f..f515cb9a5c0b 100644 --- a/test/MC/AArch64/mapping-within-section.s +++ b/test/MC/AArch64/mapping-within-section.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -t - | FileCheck %s // RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj < %s | llvm-objdump -t - | FileCheck %s .text diff --git a/test/MC/AArch64/neon-2velem.s b/test/MC/AArch64/neon-2velem.s index 567f22892171..04841d0164f2 100644 --- a/test/MC/AArch64/neon-2velem.s +++ b/test/MC/AArch64/neon-2velem.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-3vdiff.s b/test/MC/AArch64/neon-3vdiff.s index 476f7c6abe64..3ffc38fc69c4 100644 --- a/test/MC/AArch64/neon-3vdiff.s +++ b/test/MC/AArch64/neon-3vdiff.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64-none-linux-gnu -mattr=+crypto -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-aba-abd.s b/test/MC/AArch64/neon-aba-abd.s index 8833b3bbe956..e79648341468 100644 --- a/test/MC/AArch64/neon-aba-abd.s +++ b/test/MC/AArch64/neon-aba-abd.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-across.s b/test/MC/AArch64/neon-across.s index 1a5446b3a4f3..60b766d8c881 100644 --- a/test/MC/AArch64/neon-across.s +++ b/test/MC/AArch64/neon-across.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-add-pairwise.s b/test/MC/AArch64/neon-add-pairwise.s index 83d443edb777..0b9e4d3146b8 100644 --- a/test/MC/AArch64/neon-add-pairwise.s +++ b/test/MC/AArch64/neon-add-pairwise.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-add-sub-instructions.s b/test/MC/AArch64/neon-add-sub-instructions.s index ad169a8ff246..7d11d70bb90c 100644 --- a/test/MC/AArch64/neon-add-sub-instructions.s +++ b/test/MC/AArch64/neon-add-sub-instructions.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-bitwise-instructions.s b/test/MC/AArch64/neon-bitwise-instructions.s index 949d1b14ffc3..ec192aa2d8af 100644 --- a/test/MC/AArch64/neon-bitwise-instructions.s +++ b/test/MC/AArch64/neon-bitwise-instructions.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-compare-instructions.s b/test/MC/AArch64/neon-compare-instructions.s index dfc3ae71515e..4d3daf066ed3 100644 --- a/test/MC/AArch64/neon-compare-instructions.s +++ b/test/MC/AArch64/neon-compare-instructions.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-crypto.s b/test/MC/AArch64/neon-crypto.s index 3f36ba9e2a11..ed1bf8882648 100644 --- a/test/MC/AArch64/neon-crypto.s +++ b/test/MC/AArch64/neon-crypto.s @@ -1,5 +1,3 @@ -// RUN: llvm-mc -triple=aarch64 -mattr=+neon -mattr=+crypto -show-encoding < %s | FileCheck %s -// RUN: not llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s 2>&1 | FileCheck -check-prefix=CHECK-NO-CRYPTO %s // RUN: llvm-mc -triple=arm64 -mattr=+neon -mattr=+crypto -show-encoding < %s | FileCheck %s // RUN: not llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s 2>&1 | FileCheck -check-prefix=CHECK-NO-CRYPTO-ARM64 %s diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s index 10fdde460284..46ae311f5f8b 100644 --- a/test/MC/AArch64/neon-diagnostics.s +++ b/test/MC/AArch64/neon-diagnostics.s @@ -1,5 +1,3 @@ -// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon < %s 2> %t -// RUN: FileCheck --check-prefix=CHECK-ERROR --check-prefix=CHECK-AARCH64-ERROR < %t %s // RUN: not llvm-mc -triple arm64-none-linux-gnu -mattr=+neon < %s 2> %t // RUN: FileCheck --check-prefix=CHECK-ERROR --check-prefix=CHECK-ARM64-ERROR < %t %s @@ -590,12 +588,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fcmgt v0.2d, v31.2s, v16.2s // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected floating-point constant #0.0 or invalid register type -// CHECK-AARCH64-ERROR: fcmgt v4.4s, v7.4s, v15.4h -// CHECK-AARCH64-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected floating-point constant #0.0 or invalid register type -// CHECK-AARCH64-ERROR: fcmlt v29.2d, v5.2d, v2.16b -// CHECK-AARCH64-ERROR: ^ // CHECK-ARM64-ERROR: error: invalid operand for instruction // CHECK-ARM64-ERROR: fcmgt v4.4s, v7.4s, v15.4h @@ -691,12 +683,6 @@ // CHECK-ERROR: fcmeq v0.16b, v1.16b, #0.0 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: only #0.0 is acceptable as immediate -// CHECK-AARCH64-ERROR: fcmeq v0.8b, v1.4h, #1.0 -// CHECK-AARCH64-ERROR: ^ -// CHECK-AARCH64-ERROR: error: only #0.0 is acceptable as immediate -// CHECK-AARCH64-ERROR: fcmeq v0.8b, v1.4h, #1 -// CHECK-AARCH64-ERROR: ^ // CHECK-ARM64-ERROR: error: expected floating-point constant #0.0 // CHECK-ARM64-ERROR: fcmeq v0.8b, v1.4h, #1.0 @@ -722,12 +708,6 @@ // CHECK-ERROR: fcmge v3.8b, v8.2s, #0.0 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: only #0.0 is acceptable as immediate -// CHECK-AARCH64-ERROR: fcmle v17.8h, v15.2d, #-1.0 -// CHECK-AARCH64-ERROR: ^ -// CHECK-AARCH64-ERROR: error: only #0.0 is acceptable as immediate -// CHECK-AARCH64-ERROR: fcmle v17.8h, v15.2d, #2 -// CHECK-AARCH64-ERROR: ^ // CHECK-ARM64-ERROR: error: expected floating-point constant #0.0 // CHECK-ARM64-ERROR: fcmle v17.8h, v15.2d, #-1.0 @@ -752,12 +732,6 @@ // CHECK-ERROR: fcmgt v4.4s, v7.4h, #0.0 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: only #0.0 is acceptable as immediate -// CHECK-AARCH64-ERROR: fcmlt v29.2d, v5.2d, #255.0 -// CHECK-AARCH64-ERROR: ^ -// CHECK-AARCH64-ERROR: error: only #0.0 is acceptable as immediate -// CHECK-AARCH64-ERROR: fcmlt v29.2d, v5.2d, #255 -// CHECK-AARCH64-ERROR: ^ // CHECK-ARM64-ERROR: error: expected floating-point constant #0.0 // CHECK-ARM64-ERROR: fcmlt v29.2d, v5.2d, #255.0 @@ -782,12 +756,6 @@ // CHECK-ERROR: fcmge v3.8b, v8.2s, #0.0 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: only #0.0 is acceptable as immediate -// CHECK-AARCH64-ERROR: fcmle v17.2d, v15.2d, #15.0 -// CHECK-AARCH64-ERROR: ^ -// CHECK-AARCH64-ERROR: error: only #0.0 is acceptable as immediate -// CHECK-AARCH64-ERROR: fcmle v17.2d, v15.2d, #15 -// CHECK-AARCH64-ERROR: ^ // CHECK-ARM64-ERROR: error: expected floating-point constant #0.0 // CHECK-ARM64-ERROR: fcmle v17.2d, v15.2d, #15.0 @@ -812,12 +780,6 @@ // CHECK-ERROR: fcmgt v4.4s, v7.4h, #0.0 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: only #0.0 is acceptable as immediate -// CHECK-AARCH64-ERROR: fcmlt v29.2d, v5.2d, #16.0 -// CHECK-AARCH64-ERROR: ^ -// CHECK-AARCH64-ERROR: error: only #0.0 is acceptable as immediate -// CHECK-AARCH64-ERROR: fcmlt v29.2d, v5.2d, #2 -// CHECK-AARCH64-ERROR: ^ // CHECK-ARM64-ERROR: error: expected floating-point constant #0.0 // CHECK-ARM64-ERROR: fcmlt v29.2d, v5.2d, #16.0 @@ -1337,9 +1299,6 @@ shl v0.4s, v21.4s, #32 shl v0.2d, v1.2d, #64 -// CHECK-AARCH64-ERROR: error: expected comma before next operand -// CHECK-AARCH64-ERROR: shl v0.4s, v15,2s, #3 -// CHECK-AARCH64-ERROR: ^ // CHECK-ARM64-ERROR: error: unexpected token in argument list // CHECK-ARM64-ERROR: shl v0.4s, v15,2s, #3 @@ -2673,9 +2632,6 @@ pmull2 v0.4s, v1.8h v2.8h pmull2 v0.2d, v1.4s, v2.4s -// CHECK-AARCH64-ERROR: error: expected comma before next operand -// CHECK-AARCH64-ERROR: pmull2 v0.4s, v1.8h v2.8h -// CHECK-AARCH64-ERROR: ^ // CHECK-ARM64-ERROR: error: unexpected token in argument list // CHECK-ARM64-ERROR: pmull2 v0.4s, v1.8h v2.8h @@ -3003,22 +2959,18 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: mla v0.2d, v1.2d, v16.d[1] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mla v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mla v0.4s, v1.4s, v2.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: mla v0.2h, v1.2h, v2.h[1] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mla v0.4h, v1.4h, v2.h[8] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mla v0.8h, v1.8h, v2.h[8] // CHECK-ERROR: ^ @@ -3041,22 +2993,18 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: mls v0.2d, v1.2d, v16.d[1] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mls v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mls v0.4s, v1.4s, v2.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: mls v0.2h, v1.2h, v2.h[1] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mls v0.4h, v1.4h, v2.h[8] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mls v0.8h, v1.8h, v2.h[8] // CHECK-ERROR: ^ @@ -3082,27 +3030,21 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmla v0.8h, v1.8h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmla v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmla v0.2s, v1.2s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmla v3.4s, v8.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmla v3.4s, v8.4s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmla v0.2d, v1.2d, v2.d[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmla v0.2d, v1.2d, v22.d[2] // CHECK-ERROR: ^ @@ -3122,27 +3064,21 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmls v0.8h, v1.8h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmls v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmls v0.2s, v1.2s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmls v3.4s, v8.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmls v3.4s, v8.4s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmls v0.2d, v1.2d, v2.d[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmls v0.2d, v1.2d, v22.d[2] // CHECK-ERROR: ^ @@ -3163,7 +3099,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smlal v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlal v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ @@ -3173,18 +3108,15 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smlal v0.2s, v1.2s, v2.s[1] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlal v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlal v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smlal2 v0.4h, v1.8h, v1.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlal2 v0.4s, v1.8h, v1.h[8] // CHECK-ERROR: ^ @@ -3194,11 +3126,9 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smlal2 v0.2s, v1.4s, v1.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlal2 v0.2d, v1.4s, v1.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlal2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3219,7 +3149,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smlsl v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlsl v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ @@ -3229,18 +3158,15 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smlsl v0.2s, v1.2s, v2.s[1] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlsl v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlsl v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smlsl2 v0.4h, v1.8h, v1.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlsl2 v0.4s, v1.8h, v1.h[8] // CHECK-ERROR: ^ @@ -3250,11 +3176,9 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smlsl2 v0.2s, v1.4s, v1.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlsl2 v0.2d, v1.4s, v1.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlsl2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3275,7 +3199,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umlal v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlal v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ @@ -3285,18 +3208,15 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umlal v0.2s, v1.2s, v2.s[1] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlal v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlal v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umlal2 v0.4h, v1.8h, v1.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlal2 v0.4s, v1.8h, v1.h[8] // CHECK-ERROR: ^ @@ -3306,11 +3226,9 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umlal2 v0.2s, v1.4s, v1.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlal2 v0.2d, v1.4s, v1.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlal2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3331,7 +3249,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umlsl v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlsl v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ @@ -3341,18 +3258,15 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umlsl v0.2s, v1.2s, v2.s[3] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlsl v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlsl v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umlsl2 v0.4h, v1.8h, v1.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlsl2 v0.4s, v1.8h, v1.h[8] // CHECK-ERROR: ^ @@ -3362,11 +3276,9 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umlsl2 v0.2s, v1.4s, v1.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlsl2 v0.2d, v1.4s, v1.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlsl2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3387,7 +3299,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlal v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlal v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ @@ -3397,18 +3308,15 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlal v0.2s, v1.2s, v2.s[3] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlal v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlal v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlal2 v0.4h, v1.8h, v1.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlal2 v0.4s, v1.8h, v1.h[8] // CHECK-ERROR: ^ @@ -3418,11 +3326,9 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlal2 v0.2s, v1.4s, v1.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlal2 v0.2d, v1.4s, v1.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlal2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3443,7 +3349,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlsl v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlsl v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ @@ -3453,18 +3358,15 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlsl v0.2s, v1.2s, v2.s[3] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlsl v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlsl v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlsl2 v0.4h, v1.8h, v1.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlsl2 v0.4s, v1.8h, v1.h[8] // CHECK-ERROR: ^ @@ -3474,11 +3376,9 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlsl2 v0.2s, v1.4s, v1.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlsl2 v0.2d, v1.4s, v1.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlsl2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3493,35 +3393,27 @@ mul v0.4s, v1.4s, v22.s[4] mul v0.2d, v1.2d, v2.d[1] -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mul v0.4h, v1.4h, v2.h[8] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: error: invalid operand for instruction // CHECK-ERROR: mul v0.4h, v1.4h, v16.h[8] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mul v0.8h, v1.8h, v2.h[8] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: invalid operand for instruction // CHECK-ERROR: mul v0.8h, v1.8h, v16.h[8] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mul v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mul v0.2s, v1.2s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mul v0.4s, v1.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mul v0.4s, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3540,27 +3432,21 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmul v0.4h, v1.4h, v2.h[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmul v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmul v0.2s, v1.2s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmul v0.4s, v1.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmul v0.4s, v1.4s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmul v0.2d, v1.2d, v2.d[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmul v0.2d, v1.2d, v22.d[2] // CHECK-ERROR: ^ @@ -3576,27 +3462,21 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmulx v0.4h, v1.4h, v2.h[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmulx v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmulx v0.2s, v1.2s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmulx v0.4s, v1.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmulx v0.4s, v1.4s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmulx v0.2d, v1.2d, v2.d[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmulx v0.2d, v1.2d, v22.d[2] // CHECK-ERROR: ^ @@ -3617,7 +3497,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smull v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smull v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ @@ -3627,18 +3506,15 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smull v0.2s, v1.2s, v2.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smull v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smull v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smull2 v0.4h, v1.8h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smull2 v0.4s, v1.8h, v2.h[8] // CHECK-ERROR: ^ @@ -3648,11 +3524,9 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smull2 v0.2s, v1.4s, v2.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smull2 v0.2d, v1.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smull2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3673,7 +3547,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umull v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umull v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ @@ -3683,18 +3556,15 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umull v0.2s, v1.2s, v2.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umull v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umull v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umull2 v0.4h, v1.8h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umull2 v0.4s, v1.8h, v2.h[8] // CHECK-ERROR: ^ @@ -3704,11 +3574,9 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umull2 v0.2s, v1.4s, v2.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umull2 v0.2d, v1.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umull2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3729,7 +3597,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmull v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmull v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ @@ -3739,18 +3606,15 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmull v0.2s, v1.2s, v2.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmull v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmull v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmull2 v0.4h, v1.8h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmull2 v0.4s, v1.8h, v2.h[8] // CHECK-ERROR: ^ @@ -3760,11 +3624,9 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmull2 v0.2s, v1.4s, v2.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmull2 v0.2d, v1.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmull2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3779,33 +3641,27 @@ sqdmulh v0.4s, v1.4s, v22.s[4] sqdmulh v0.2d, v1.2d, v22.d[1] -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmulh v0.4h, v1.4h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmulh v0.4h, v1.4h, v16.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmulh v0.8h, v1.8h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmulh v0.8h, v1.8h, v16.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmulh v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmulh v0.2s, v1.2s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmulh v0.4s, v1.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmulh v0.4s, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3823,33 +3679,27 @@ sqrdmulh v0.4s, v1.4s, v22.s[4] sqrdmulh v0.2d, v1.2d, v22.d[1] -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqrdmulh v0.4h, v1.4h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqrdmulh v0.4h, v1.4h, v16.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqrdmulh v0.8h, v1.8h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqrdmulh v0.8h, v1.8h, v16.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqrdmulh v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqrdmulh v0.2s, v1.2s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqrdmulh v0.4s, v1.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqrdmulh v0.4s, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -4068,15 +3918,12 @@ ld1 {v4}, [x0] ld1 {v32.16b}, [x0] ld1 {v15.8h}, [x32] -// CHECK-AARCH64-ERROR: error: expected vector type register // CHECK-ARM64-ERROR: error: vector register expected // CHECK-ERROR: ld1 {x3}, [x2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected vector type register // CHECK-ARM64-ERROR: error: invalid operand for instruction // CHECK-ERROR: ld1 {v4}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected vector type register // CHECK-ARM64-ERROR: error: vector register expected // CHECK-ERROR: ld1 {v32.16b}, [x0] // CHECK-ERROR: ^ @@ -4091,14 +3938,12 @@ ld1 {v1.8h-v1.8h}, [x0] ld1 {v15.8h-v17.4h}, [x15] ld1 {v0.8b-v2.8b, [x0] -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: registers must be sequential // CHECK-ERROR: ld1 {v0.16b, v2.16b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid number of vectors // CHECK-ERROR: ld1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: '{' expected // CHECK-ARM64-ERROR: error: unexpected token in argument list // CHECK-ERROR: ld1 v0.8b, v1.8b}, [x0] // CHECK-ERROR: ^ @@ -4108,7 +3953,6 @@ // CHECK-ERROR: error: invalid number of vectors // CHECK-ERROR: ld1 {v1.8h-v1.8h}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected the same vector layout // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld1 {v15.8h-v17.4h}, [x15] // CHECK-ERROR: ^ @@ -4121,18 +3965,14 @@ ld2 {v15.4h, v16.4h, v17.4h}, [x32] ld2 {v15.8h-v16.4h}, [x15] ld2 {v0.2d-v2.2d}, [x0] -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld2 {v15.8h, v16.4h}, [x15] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: registers must be sequential // CHECK-ERROR: ld2 {v0.8b, v2.8b}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64: error: invalid operand for instruction // CHECK-ERROR: ld2 {v15.4h, v16.4h, v17.4h}, [x32] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected the same vector layout // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld2 {v15.8h-v16.4h}, [x15] // CHECK-ERROR: ^ @@ -4145,19 +3985,15 @@ ld3 {v0.8b, v2.8b, v3.8b}, [x0] ld3 {v15.8h-v17.4h}, [x15] ld3 {v31.4s-v2.4s}, [sp] -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld3 {v15.8h, v16.8h, v17.4h}, [x15] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected vector type register // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: registers must be sequential // CHECK-ERROR: ld3 {v0.8b, v2.8b, v3.8b}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected the same vector layout // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld3 {v15.8h-v17.4h}, [x15] // CHECK-ERROR: ^ @@ -4170,18 +4006,15 @@ ld4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31] ld4 {v15.8h-v18.4h}, [x15] ld4 {v31.2s-v1.2s}, [x31] -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: registers must be sequential // CHECK-ERROR: ld4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid number of vectors // CHECK-ERROR: ld4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected the same vector layout // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld4 {v15.8h-v18.4h}, [x15] // CHECK-ERROR: ^ @@ -4193,15 +4026,12 @@ st1 {v4}, [x0] st1 {v32.16b}, [x0] st1 {v15.8h}, [x32] -// CHECK-AARCH64-ERROR: error: expected vector type register // CHECK-ARM64-ERROR: error: vector register expected // CHECK-ERROR: st1 {x3}, [x2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected vector type register // CHECK-ARM64-ERROR: error: invalid operand for instruction // CHECK-ERROR: st1 {v4}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected vector type register // CHECK-ARM64-ERROR: error: vector register expected // CHECK-ERROR: st1 {v32.16b}, [x0] // CHECK-ERROR: ^ @@ -4216,14 +4046,12 @@ st1 {v1.8h-v1.8h}, [x0] st1 {v15.8h-v17.4h}, [x15] st1 {v0.8b-v2.8b, [x0] -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: registers must be sequential // CHECK-ERROR: st1 {v0.16b, v2.16b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid number of vectors // CHECK-ERROR: st1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: '{' expected // CHECK-ARM64-ERROR: error: unexpected token in argument list // CHECK-ERROR: st1 v0.8b, v1.8b}, [x0] // CHECK-ERROR: ^ @@ -4233,7 +4061,6 @@ // CHECK-ERROR: error: invalid number of vectors // CHECK-ERROR: st1 {v1.8h-v1.8h}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected the same vector layout // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: st1 {v15.8h-v17.4h}, [x15] // CHECK-ERROR: ^ @@ -4246,18 +4073,15 @@ st2 {v15.4h, v16.4h, v17.4h}, [x30] st2 {v15.8h-v16.4h}, [x15] st2 {v0.2d-v2.2d}, [x0] -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: st2 {v15.8h, v16.4h}, [x15] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: registers must be sequential // CHECK-ERROR: st2 {v0.8b, v2.8b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: st2 {v15.4h, v16.4h, v17.4h}, [x30] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected the same vector layout // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: st2 {v15.8h-v16.4h}, [x15] // CHECK-ERROR: ^ @@ -4270,19 +4094,15 @@ st3 {v0.8b, v2.8b, v3.8b}, [x0] st3 {v15.8h-v17.4h}, [x15] st3 {v31.4s-v2.4s}, [sp] -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: st3 {v15.8h, v16.8h, v17.4h}, [x15] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected vector type register // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: st3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: registers must be sequential // CHECK-ERROR: st3 {v0.8b, v2.8b, v3.8b}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected the same vector layout // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: st3 {v15.8h-v17.4h}, [x15] // CHECK-ERROR: ^ @@ -4295,18 +4115,15 @@ st4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31] st4 {v15.8h-v18.4h}, [x15] st4 {v31.2s-v1.2s}, [x31] -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: st4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: registers must be sequential // CHECK-ERROR: st4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid number of vectors // CHECK-ERROR: st4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected the same vector layout // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: st4 {v15.8h-v18.4h}, [x15] // CHECK-ERROR: ^ @@ -4324,7 +4141,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: ld1 {v0.16b}, [x0], #8 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected vector type register // CHECK-ARM64-ERROR: error: invalid vector kind qualifier // CHECK-ERROR: ld1 {v0.8h, v1.16h}, [x0], x1 // CHECK-ERROR: ^ @@ -4341,7 +4157,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: ld3 {v5.2s, v6.2s, v7.2s}, [x1], #48 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld4 {v31.2d, v0.2d, v1.2d, v2.1d}, [x3], x1 // CHECK-ERROR: ^ @@ -4352,7 +4167,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: st1 {v0.16b}, [x0], #8 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected vector type register // CHECK-ARM64-ERROR: error: invalid vector kind qualifier // CHECK-ERROR: st1 {v0.8h, v1.16h}, [x0], x1 // CHECK-ERROR: ^ @@ -4369,7 +4183,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: st3 {v5.2s, v6.2s, v7.2s}, [x1], #48 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: st4 {v31.2d, v0.2d, v1.2d, v2.1d}, [x3], x1 // CHECK-ERROR: ^ @@ -4382,18 +4195,15 @@ ld2r {v31.4s, v0.2s}, [sp] ld3r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0] ld4r {v31.2s, v0.2s, v1.2d, v2.2s}, [sp] -// CHECK-AARCH64-ERROR: error: expected vector type register // CHECK-ARM64-ERROR: error: vector register expected // CHECK-ERROR: ld1r {x1}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld2r {v31.4s, v0.2s}, [sp] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: ld3r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld4r {v31.2s, v0.2s, v1.2d, v2.2s}, [sp] // CHECK-ERROR: ^ @@ -4406,19 +4216,15 @@ ld2 {v15.h, v16.h}[8], [x15] ld3 {v31.s, v0.s, v1.s}[-1], [sp] ld4 {v0.d, v1.d, v2.d, v3.d}[2], [x0] -// CHECK-AARCH64-ERROR:: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: ld1 {v0.b}[16], [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: ld2 {v15.h, v16.h}[8], [x15] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected lane number // CHECK-ARM64-ERROR: error: vector lane must be an integer in range // CHECK-ERROR: ld3 {v31.s, v0.s, v1.s}[-1], [sp] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: ld4 {v0.d, v1.d, v2.d, v3.d}[2], [x0] // CHECK-ERROR: ^ @@ -4427,18 +4233,15 @@ st2 {v31.s, v0.s}[3], [8] st3 {v15.h, v16.h, v17.h}[-1], [x15] st4 {v0.d, v1.d, v2.d, v3.d}[2], [x0] -// CHECK-AARCH64-ERROR:: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: st1 {v0.d}[16], [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: st2 {v31.s, v0.s}[3], [8] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected lane number // CHECK-ARM64-ERROR: error: vector lane must be an integer in range // CHECK-ERROR: st3 {v15.h, v16.h, v17.h}[-1], [x15] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: st4 {v0.d, v1.d, v2.d, v3.d}[2], [x0] // CHECK-ERROR: ^ @@ -4478,7 +4281,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: ld2 {v15.h, v16.h}[0], [x15], #3 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected the same vector layout // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld3 {v31.s, v0.s, v1.d}[0], [sp], x9 // CHECK-ERROR: ^ @@ -4513,19 +4315,15 @@ ins v20.s[1], s30 ins v1.d[0], d7 -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: ins v2.b[16], w1 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: ins v7.h[8], w14 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: ins v20.s[5], w30 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: ins v1.d[2], x7 // CHECK-ERROR: ^ @@ -4553,23 +4351,18 @@ smov x14, v6.d[1] smov x20, v9.d[0] -// CHECK-AARCH64-ERROR error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR smov w1, v0.b[16] // CHECK-ERROR ^ -// CHECK-AARCH64-ERROR error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR smov w14, v6.h[8] // CHECK-ERROR ^ -// CHECK-AARCH64-ERROR error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR smov x1, v0.b[16] // CHECK-ERROR ^ -// CHECK-AARCH64-ERROR error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR smov x14, v6.h[8] // CHECK-ERROR ^ -// CHECK-AARCH64-ERROR error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR smov x20, v9.s[5] // CHECK-ERROR ^ @@ -4597,19 +4390,15 @@ umov s20, v9.s[2] umov d7, v18.d[1] -// CHECK-AARCH64-ERROR error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR umov w1, v0.b[16] // CHECK-ERROR ^ -// CHECK-AARCH64-ERROR error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR umov w14, v6.h[8] // CHECK-ERROR ^ -// CHECK-AARCH64-ERROR error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR umov w20, v9.s[5] // CHECK-ERROR ^ -// CHECK-AARCH64-ERROR error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR umov x7, v18.d[3] // CHECK-ERROR ^ @@ -5026,7 +4815,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlal s17, h27, s12 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: too few operands for instruction // CHECK-ARM64-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlal d19, s24, d12 // CHECK-ERROR: ^ @@ -5041,7 +4829,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlsl s14, h12, s25 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: too few operands for instruction // CHECK-ARM64-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlsl d12, s23, d13 // CHECK-ERROR: ^ @@ -5056,7 +4843,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmull s12, h22, s12 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: too few operands for instruction // CHECK-ARM64-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmull d15, s22, d12 // CHECK-ERROR: ^ @@ -7099,7 +6885,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmul h0, h1, v1.s[0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmul s2, s29, v10.s[4] // CHECK-ERROR: ^ @@ -7119,7 +6904,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmulx h0, h1, v1.d[0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmulx d2, d29, v10.d[3] // CHECK-ERROR: ^ @@ -7139,7 +6923,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmla d30, s11, v1.d[1] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmla s16, s22, v16.s[5] // CHECK-ERROR: ^ @@ -7159,7 +6942,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmls h7, h17, v26.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected lane number // CHECK-ARM64-ERROR: error: vector lane must be an integer in range [0, 1] // CHECK-ERROR: fmls d16, d22, v16.d[-1] // CHECK-ERROR: ^ @@ -7182,7 +6964,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlal s8, s9, v14.s[1] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlal d4, s5, v1.s[5] // CHECK-ERROR: ^ @@ -7208,7 +6989,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlsl d1, h1, v13.s[0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlsl d1, s1, v13.s[4] // CHECK-ERROR: ^ @@ -7236,7 +7016,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmull s1, s1, v4.s[0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmull s12, h17, v9.h[9] // CHECK-ERROR: ^ @@ -7262,7 +7041,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmulh s25, s26, v27.h[3] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmulh s25, s26, v27.s[4] // CHECK-ERROR: ^ @@ -7288,7 +7066,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqrdmulh s5, h6, v7.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqrdmulh h31, h30, v14.h[9] // CHECK-ERROR: ^ @@ -7321,19 +7098,15 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: dup d0, v17.s[3] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: dup d0, v17.d[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: dup s0, v1.s[7] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: dup h0, v31.h[16] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: dup b1, v3.b[16] // CHECK-ERROR: ^ diff --git a/test/MC/AArch64/neon-extract.s b/test/MC/AArch64/neon-extract.s index fbfc048de962..1daa46d096ee 100644 --- a/test/MC/AArch64/neon-extract.s +++ b/test/MC/AArch64/neon-extract.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-facge-facgt.s b/test/MC/AArch64/neon-facge-facgt.s index bb739fa1859e..799b85ff42f5 100644 --- a/test/MC/AArch64/neon-facge-facgt.s +++ b/test/MC/AArch64/neon-facge-facgt.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-frsqrt-frecp.s b/test/MC/AArch64/neon-frsqrt-frecp.s index ec3b64bfa59c..56bc47154a06 100644 --- a/test/MC/AArch64/neon-frsqrt-frecp.s +++ b/test/MC/AArch64/neon-frsqrt-frecp.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-halving-add-sub.s b/test/MC/AArch64/neon-halving-add-sub.s index 8e36b20386e5..19b56ced3e6a 100644 --- a/test/MC/AArch64/neon-halving-add-sub.s +++ b/test/MC/AArch64/neon-halving-add-sub.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-max-min-pairwise.s b/test/MC/AArch64/neon-max-min-pairwise.s index 4421be4ed0ab..e48f97535865 100644 --- a/test/MC/AArch64/neon-max-min-pairwise.s +++ b/test/MC/AArch64/neon-max-min-pairwise.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-max-min.s b/test/MC/AArch64/neon-max-min.s index 3700f7553941..8cc4ac86e650 100644 --- a/test/MC/AArch64/neon-max-min.s +++ b/test/MC/AArch64/neon-max-min.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-mla-mls-instructions.s b/test/MC/AArch64/neon-mla-mls-instructions.s index b82706862ec4..5c8b7d8788a4 100644 --- a/test/MC/AArch64/neon-mla-mls-instructions.s +++ b/test/MC/AArch64/neon-mla-mls-instructions.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-mov.s b/test/MC/AArch64/neon-mov.s index 8c420f1c013e..6231ffe49c51 100644 --- a/test/MC/AArch64/neon-mov.s +++ b/test/MC/AArch64/neon-mov.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-mul-div-instructions.s b/test/MC/AArch64/neon-mul-div-instructions.s index 6a39ad8e2e05..2601d50f1319 100644 --- a/test/MC/AArch64/neon-mul-div-instructions.s +++ b/test/MC/AArch64/neon-mul-div-instructions.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-perm.s b/test/MC/AArch64/neon-perm.s index 641415ee1ee6..4b28dd01db39 100644 --- a/test/MC/AArch64/neon-perm.s +++ b/test/MC/AArch64/neon-perm.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-rounding-halving-add.s b/test/MC/AArch64/neon-rounding-halving-add.s index 7e81b1a65ca7..55c9f921da75 100644 --- a/test/MC/AArch64/neon-rounding-halving-add.s +++ b/test/MC/AArch64/neon-rounding-halving-add.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-rounding-shift.s b/test/MC/AArch64/neon-rounding-shift.s index 5f72bafea48c..38924e7c4bd9 100644 --- a/test/MC/AArch64/neon-rounding-shift.s +++ b/test/MC/AArch64/neon-rounding-shift.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-saturating-add-sub.s b/test/MC/AArch64/neon-saturating-add-sub.s index 1d2916a48d1b..d39997901f7b 100644 --- a/test/MC/AArch64/neon-saturating-add-sub.s +++ b/test/MC/AArch64/neon-saturating-add-sub.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-saturating-rounding-shift.s b/test/MC/AArch64/neon-saturating-rounding-shift.s index bc5c1c0a2132..702b9d2c60e7 100644 --- a/test/MC/AArch64/neon-saturating-rounding-shift.s +++ b/test/MC/AArch64/neon-saturating-rounding-shift.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-saturating-shift.s b/test/MC/AArch64/neon-saturating-shift.s index d35e1f3d0f08..d03172b1788e 100644 --- a/test/MC/AArch64/neon-saturating-shift.s +++ b/test/MC/AArch64/neon-saturating-shift.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-abs.s b/test/MC/AArch64/neon-scalar-abs.s index c529cfc7522f..897c93506e1b 100644 --- a/test/MC/AArch64/neon-scalar-abs.s +++ b/test/MC/AArch64/neon-scalar-abs.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-add-sub.s b/test/MC/AArch64/neon-scalar-add-sub.s index fea1fc8ee8aa..955c30716b4e 100644 --- a/test/MC/AArch64/neon-scalar-add-sub.s +++ b/test/MC/AArch64/neon-scalar-add-sub.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ diff --git a/test/MC/AArch64/neon-scalar-by-elem-mla.s b/test/MC/AArch64/neon-scalar-by-elem-mla.s index 7d5c6d04fd47..d4f3682dc2bc 100644 --- a/test/MC/AArch64/neon-scalar-by-elem-mla.s +++ b/test/MC/AArch64/neon-scalar-by-elem-mla.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ diff --git a/test/MC/AArch64/neon-scalar-by-elem-mul.s b/test/MC/AArch64/neon-scalar-by-elem-mul.s index 78c51594d170..d22aa9b15b29 100644 --- a/test/MC/AArch64/neon-scalar-by-elem-mul.s +++ b/test/MC/AArch64/neon-scalar-by-elem-mul.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ diff --git a/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s b/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s index 007568cceb44..dadb8db99368 100644 --- a/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s +++ b/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //----------------------------------------------------------------------------- diff --git a/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s b/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s index 727bc670e1f6..90eeb5e64c01 100644 --- a/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s +++ b/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //----------------------------------------------------------------------------- diff --git a/test/MC/AArch64/neon-scalar-compare.s b/test/MC/AArch64/neon-scalar-compare.s index 1cd04fd111f9..16ba92e07974 100644 --- a/test/MC/AArch64/neon-scalar-compare.s +++ b/test/MC/AArch64/neon-scalar-compare.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-cvt.s b/test/MC/AArch64/neon-scalar-cvt.s index dc8e3165b6dd..047495276fb4 100644 --- a/test/MC/AArch64/neon-scalar-cvt.s +++ b/test/MC/AArch64/neon-scalar-cvt.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-dup.s b/test/MC/AArch64/neon-scalar-dup.s index 81bdb7c4f852..ba4f3c2ad797 100644 --- a/test/MC/AArch64/neon-scalar-dup.s +++ b/test/MC/AArch64/neon-scalar-dup.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ diff --git a/test/MC/AArch64/neon-scalar-extract-narrow.s b/test/MC/AArch64/neon-scalar-extract-narrow.s index 7e4ff85de7d0..e6167930d1ca 100644 --- a/test/MC/AArch64/neon-scalar-extract-narrow.s +++ b/test/MC/AArch64/neon-scalar-extract-narrow.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-fp-compare.s b/test/MC/AArch64/neon-scalar-fp-compare.s index 054f923322e0..cb9e7a7a66e0 100644 --- a/test/MC/AArch64/neon-scalar-fp-compare.s +++ b/test/MC/AArch64/neon-scalar-fp-compare.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-mul.s b/test/MC/AArch64/neon-scalar-mul.s index 968793fea885..21be537cbb7c 100644 --- a/test/MC/AArch64/neon-scalar-mul.s +++ b/test/MC/AArch64/neon-scalar-mul.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-neg.s b/test/MC/AArch64/neon-scalar-neg.s index ac61f9b78aa9..e902c2307a1d 100644 --- a/test/MC/AArch64/neon-scalar-neg.s +++ b/test/MC/AArch64/neon-scalar-neg.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-recip.s b/test/MC/AArch64/neon-scalar-recip.s index 9dc6d069cd03..dde26b557be5 100644 --- a/test/MC/AArch64/neon-scalar-recip.s +++ b/test/MC/AArch64/neon-scalar-recip.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-reduce-pairwise.s b/test/MC/AArch64/neon-scalar-reduce-pairwise.s index bf5eb5304b8f..cb7564ac68d1 100644 --- a/test/MC/AArch64/neon-scalar-reduce-pairwise.s +++ b/test/MC/AArch64/neon-scalar-reduce-pairwise.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //---------------------------------------------------------------------- diff --git a/test/MC/AArch64/neon-scalar-rounding-shift.s b/test/MC/AArch64/neon-scalar-rounding-shift.s index 2d654958917e..2594c2f2ac54 100644 --- a/test/MC/AArch64/neon-scalar-rounding-shift.s +++ b/test/MC/AArch64/neon-scalar-rounding-shift.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s diff --git a/test/MC/AArch64/neon-scalar-saturating-add-sub.s b/test/MC/AArch64/neon-scalar-saturating-add-sub.s index 3cdfd6204dfc..d5cd838a92bb 100644 --- a/test/MC/AArch64/neon-scalar-saturating-add-sub.s +++ b/test/MC/AArch64/neon-scalar-saturating-add-sub.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ diff --git a/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s b/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s index 17bf222661c9..83bd59f50c84 100644 --- a/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s +++ b/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ diff --git a/test/MC/AArch64/neon-scalar-saturating-shift.s b/test/MC/AArch64/neon-scalar-saturating-shift.s index 3eddabd616fa..679f1f4052c9 100644 --- a/test/MC/AArch64/neon-scalar-saturating-shift.s +++ b/test/MC/AArch64/neon-scalar-saturating-shift.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ diff --git a/test/MC/AArch64/neon-scalar-shift-imm.s b/test/MC/AArch64/neon-scalar-shift-imm.s index a0847d207a32..47a8dec212b1 100644 --- a/test/MC/AArch64/neon-scalar-shift-imm.s +++ b/test/MC/AArch64/neon-scalar-shift-imm.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-shift.s b/test/MC/AArch64/neon-scalar-shift.s index 54b42f5eab2d..98aa51a63da2 100644 --- a/test/MC/AArch64/neon-scalar-shift.s +++ b/test/MC/AArch64/neon-scalar-shift.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ diff --git a/test/MC/AArch64/neon-shift-left-long.s b/test/MC/AArch64/neon-shift-left-long.s index 679af09ea4a0..87204683104e 100644 --- a/test/MC/AArch64/neon-shift-left-long.s +++ b/test/MC/AArch64/neon-shift-left-long.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-shift.s b/test/MC/AArch64/neon-shift.s index d5b730c07028..dcff992a7824 100644 --- a/test/MC/AArch64/neon-shift.s +++ b/test/MC/AArch64/neon-shift.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-simd-copy.s b/test/MC/AArch64/neon-simd-copy.s index dc8b060b3577..917f7cb524ed 100644 --- a/test/MC/AArch64/neon-simd-copy.s +++ b/test/MC/AArch64/neon-simd-copy.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-simd-ldst-multi-elem.s b/test/MC/AArch64/neon-simd-ldst-multi-elem.s index 85e7c28e396d..b8b3e72ff777 100644 --- a/test/MC/AArch64/neon-simd-ldst-multi-elem.s +++ b/test/MC/AArch64/neon-simd-ldst-multi-elem.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-simd-ldst-one-elem.s b/test/MC/AArch64/neon-simd-ldst-one-elem.s index 63b7bca39850..4febf6d8fe0b 100644 --- a/test/MC/AArch64/neon-simd-ldst-one-elem.s +++ b/test/MC/AArch64/neon-simd-ldst-one-elem.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-simd-misc.s b/test/MC/AArch64/neon-simd-misc.s index 4486dddce409..6d1aafdd7725 100644 --- a/test/MC/AArch64/neon-simd-misc.s +++ b/test/MC/AArch64/neon-simd-misc.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-simd-post-ldst-multi-elem.s b/test/MC/AArch64/neon-simd-post-ldst-multi-elem.s index b8cc266cfca8..c57a122f35c8 100644 --- a/test/MC/AArch64/neon-simd-post-ldst-multi-elem.s +++ b/test/MC/AArch64/neon-simd-post-ldst-multi-elem.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-simd-shift.s b/test/MC/AArch64/neon-simd-shift.s index 46a75009dc40..1c1ad7489d59 100644 --- a/test/MC/AArch64/neon-simd-shift.s +++ b/test/MC/AArch64/neon-simd-shift.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-sxtl.s b/test/MC/AArch64/neon-sxtl.s index 2efdb4dcbbd5..363796ee3341 100644 --- a/test/MC/AArch64/neon-sxtl.s +++ b/test/MC/AArch64/neon-sxtl.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-tbl.s b/test/MC/AArch64/neon-tbl.s index e8d77c75c37f..bb39fa9f22ae 100644 --- a/test/MC/AArch64/neon-tbl.s +++ b/test/MC/AArch64/neon-tbl.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-uxtl.s b/test/MC/AArch64/neon-uxtl.s index 502166b28183..46c56625c0f7 100644 --- a/test/MC/AArch64/neon-uxtl.s +++ b/test/MC/AArch64/neon-uxtl.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/noneon-diagnostics.s b/test/MC/AArch64/noneon-diagnostics.s index 3c953e3764d4..470a74d5b317 100644 --- a/test/MC/AArch64/noneon-diagnostics.s +++ b/test/MC/AArch64/noneon-diagnostics.s @@ -1,6 +1,3 @@ -// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=-neon < %s 2> %t -// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s - // RUN: not llvm-mc -triple arm64-none-linux-gnu -mattr=-neon < %s 2> %t // RUN: FileCheck --check-prefix=CHECK-ARM64-ERROR < %t %s diff --git a/test/MC/AArch64/optional-hash.s b/test/MC/AArch64/optional-hash.s index a332cb091246..7ae1aa490476 100644 --- a/test/MC/AArch64/optional-hash.s +++ b/test/MC/AArch64/optional-hash.s @@ -1,7 +1,4 @@ // PR18929 -// RUN: llvm-mc < %s -triple=aarch64-linux-gnueabi -mattr=+fp-armv8,+neon -filetype=obj -o - \ -// RUN: | llvm-objdump --disassemble -arch=aarch64 -mattr=+fp-armv8,+neon - | FileCheck %s - // RUN: llvm-mc < %s -triple=arm64-linux-gnueabi -mattr=+fp-armv8,+neon -filetype=obj -o - \ // RUN: | llvm-objdump --disassemble -arch=arm64 -mattr=+fp-armv8,+neon - | FileCheck %s diff --git a/test/MC/AArch64/tls-relocs.s b/test/MC/AArch64/tls-relocs.s index 5b2e98875997..ae7b20cefd56 100644 --- a/test/MC/AArch64/tls-relocs.s +++ b/test/MC/AArch64/tls-relocs.s @@ -1,7 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s --check-prefix=CHECK-AARCH64 -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj < %s -o - | \ -// RUN: llvm-readobj -r -t | FileCheck --check-prefix=CHECK-ELF %s - // RUN: llvm-mc -triple=arm64-none-linux-gnu -show-encoding < %s | FileCheck %s --check-prefix=CHECK-ARM64 // RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj < %s -o - | \ // RUN: llvm-readobj -r -t | FileCheck --check-prefix=CHECK-ELF %s @@ -11,14 +7,6 @@ movn x2, #:dtprel_g2:var movz x3, #:dtprel_g2:var movn x4, #:dtprel_g2:var -// CHECK-AARCH64: movz x1, #:dtprel_g2:var // encoding: [0x01'A',A,0xc0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2 -// CHECK-AARCH64: movn x2, #:dtprel_g2:var // encoding: [0x02'A',A,0xc0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2 -// CHECK-AARCH64: movz x3, #:dtprel_g2:var // encoding: [0x03'A',A,0xc0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2 -// CHECK-AARCH64: movn x4, #:dtprel_g2:var // encoding: [0x04'A',A,0xc0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2 // CHECK-ARM64: movz x1, #:dtprel_g2:var // encoding: [0bAAA00001,A,0b110AAAAA,0x92] // CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_arm64_movw @@ -41,14 +29,6 @@ movn x6, #:dtprel_g1:var movz w7, #:dtprel_g1:var movn w8, #:dtprel_g1:var -// CHECK-AARCH64: movz x5, #:dtprel_g1:var // encoding: [0x05'A',A,0xa0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1 -// CHECK-AARCH64: movn x6, #:dtprel_g1:var // encoding: [0x06'A',A,0xa0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1 -// CHECK-AARCH64: movz w7, #:dtprel_g1:var // encoding: [0x07'A',A,0xa0'A',0x12'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1 -// CHECK-AARCH64: movn w8, #:dtprel_g1:var // encoding: [0x08'A',A,0xa0'A',0x12'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1 // CHECK-ARM64: movz x5, #:dtprel_g1:var // encoding: [0bAAA00101,A,0b101AAAAA,0x92] // CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_arm64_movw @@ -67,10 +47,6 @@ movk x9, #:dtprel_g1_nc:var movk w10, #:dtprel_g1_nc:var -// CHECK-AARCH64: movk x9, #:dtprel_g1_nc:var // encoding: [0x09'A',A,0xa0'A',0xf2'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_a64_movw_dtprel_g1_nc -// CHECK-AARCH64: movk w10, #:dtprel_g1_nc:var // encoding: [0x0a'A',A,0xa0'A',0x72'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_a64_movw_dtprel_g1_nc // CHECK-ARM64: movk x9, #:dtprel_g1_nc:var // encoding: [0bAAA01001,A,0b101AAAAA,0xf2] // CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_arm64_movw @@ -85,13 +61,6 @@ movn x12, #:dtprel_g0:var movz w13, #:dtprel_g0:var movn w14, #:dtprel_g0:var -// CHECK-AARCH64: movz x11, #:dtprel_g0:var // encoding: [0x0b'A',A,0x80'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_a64_movw_dtprel_g0 -// CHECK-AARCH64: movn x12, #:dtprel_g0:var // encoding: [0x0c'A',A,0x80'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_a64_movw_dtprel_g0 -// CHECK-AARCH64: movz w13, #:dtprel_g0:var // encoding: [0x0d'A',A,0x80'A',0x12'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_a64_movw_dtprel_g0 -// CHECK-AARCH64: movn w14, #:dtprel_g0:var // encoding: [0x0e'A',A,0x80'A',0x12'A'] // CHECK-ARM64: movz x11, #:dtprel_g0:var // encoding: [0bAAA01011,A,0b100AAAAA,0x92] // CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_arm64_movw @@ -110,10 +79,6 @@ movk x15, #:dtprel_g0_nc:var movk w16, #:dtprel_g0_nc:var -// CHECK-AARCH64: movk x15, #:dtprel_g0_nc:var // encoding: [0x0f'A',A,0x80'A',0xf2'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_a64_movw_dtprel_g0_nc -// CHECK-AARCH64: movk w16, #:dtprel_g0_nc:var // encoding: [0x10'A',A,0x80'A',0x72'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_a64_movw_dtprel_g0_nc // CHECK-ARM64: movk x15, #:dtprel_g0_nc:var // encoding: [0bAAA01111,A,0b100AAAAA,0xf2] // CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_arm64_movw @@ -126,10 +91,6 @@ add x17, x18, #:dtprel_hi12:var, lsl #12 add w19, w20, #:dtprel_hi12:var, lsl #12 -// CHECK-AARCH64: add x17, x18, #:dtprel_hi12:var, lsl #12 // encoding: [0x51'A',0x02'A',0x40'A',0x91'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_a64_add_dtprel_hi12 -// CHECK-AARCH64: add w19, w20, #:dtprel_hi12:var, lsl #12 // encoding: [0x93'A',0x02'A',0x40'A',0x11'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_a64_add_dtprel_hi12 // CHECK-ARM64: add x17, x18, :dtprel_hi12:var, lsl #12 // encoding: [0x51,0bAAAAAA10,0b00AAAAAA,0x91] // CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_arm64_add_imm12 @@ -142,10 +103,6 @@ add x21, x22, #:dtprel_lo12:var add w23, w24, #:dtprel_lo12:var -// CHECK-AARCH64: add x21, x22, #:dtprel_lo12:var // encoding: [0xd5'A',0x02'A',A,0x91'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_add_dtprel_lo12 -// CHECK-AARCH64: add w23, w24, #:dtprel_lo12:var // encoding: [0x17'A',0x03'A',A,0x11'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_add_dtprel_lo12 // CHECK-ARM64: add x21, x22, :dtprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91] // CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_add_imm12 @@ -158,10 +115,6 @@ add x25, x26, #:dtprel_lo12_nc:var add w27, w28, #:dtprel_lo12_nc:var -// CHECK-AARCH64: add x25, x26, #:dtprel_lo12_nc:var // encoding: [0x59'A',0x03'A',A,0x91'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_add_dtprel_lo12_nc -// CHECK-AARCH64: add w27, w28, #:dtprel_lo12_nc:var // encoding: [0x9b'A',0x03'A',A,0x11'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_add_dtprel_lo12_nc // CHECK-ARM64: add x25, x26, :dtprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91] // CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_add_imm12 @@ -174,10 +127,6 @@ ldrb w29, [x30, #:dtprel_lo12:var] ldrsb x29, [x28, #:dtprel_lo12_nc:var] -// CHECK-AARCH64: ldrb w29, [x30, #:dtprel_lo12:var] // encoding: [0xdd'A',0x03'A',0x40'A',0x39'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst8_dtprel_lo12 -// CHECK-AARCH64: ldrsb x29, [x28, #:dtprel_lo12_nc:var] // encoding: [0x9d'A',0x03'A',0x80'A',0x39'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst8_dtprel_lo12_nc // CHECK-ARM64: ldrb w29, [x30, :dtprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39] // CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale1 @@ -190,10 +139,6 @@ strh w27, [x26, #:dtprel_lo12:var] ldrsh x25, [x24, #:dtprel_lo12_nc:var] -// CHECK-AARCH64: strh w27, [x26, #:dtprel_lo12:var] // encoding: [0x5b'A',0x03'A',A,0x79'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst16_dtprel_lo12 -// CHECK-AARCH64: ldrsh x25, [x24, #:dtprel_lo12_nc:var] // encoding: [0x19'A',0x03'A',0x80'A',0x79'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst16_dtprel_lo12_n // CHECK-ARM64: strh w27, [x26, :dtprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79] // CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale2 @@ -206,10 +151,6 @@ ldr w23, [x22, #:dtprel_lo12:var] ldrsw x21, [x20, #:dtprel_lo12_nc:var] -// CHECK-AARCH64: ldr w23, [x22, #:dtprel_lo12:var] // encoding: [0xd7'A',0x02'A',0x40'A',0xb9'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst32_dtprel_lo12 -// CHECK-AARCH64: ldrsw x21, [x20, #:dtprel_lo12_nc:var] // encoding: [0x95'A',0x02'A',0x80'A',0xb9'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst32_dtprel_lo12_n // CHECK-ARM64: ldr w23, [x22, :dtprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9] // CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale4 @@ -222,10 +163,6 @@ ldr x19, [x18, #:dtprel_lo12:var] str x17, [x16, #:dtprel_lo12_nc:var] -// CHECK-AARCH64: ldr x19, [x18, #:dtprel_lo12:var] // encoding: [0x53'A',0x02'A',0x40'A',0xf9'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst64_dtprel_lo12 -// CHECK-AARCH64: str x17, [x16, #:dtprel_lo12_nc:var] // encoding: [0x11'A',0x02'A',A,0xf9'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst64_dtprel_lo12_nc // CHECK-ARM64: ldr x19, [x18, :dtprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9] // CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale8 @@ -239,10 +176,6 @@ // TLS initial-exec forms movz x15, #:gottprel_g1:var movz w14, #:gottprel_g1:var -// CHECK-AARCH64: movz x15, #:gottprel_g1:var // encoding: [0x0f'A',A,0xa0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_a64_movw_gottprel_g1 -// CHECK-AARCH64: movz w14, #:gottprel_g1:var // encoding: [0x0e'A',A,0xa0'A',0x12'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_a64_movw_gottprel_g1 // CHECK-ARM64: movz x15, #:gottprel_g1:var // encoding: [0bAAA01111,A,0b101AAAAA,0x92] // CHECK-ARM64: // fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_arm64_movw @@ -255,10 +188,6 @@ movk x13, #:gottprel_g0_nc:var movk w12, #:gottprel_g0_nc:var -// CHECK-AARCH64: movk x13, #:gottprel_g0_nc:var // encoding: [0x0d'A',A,0x80'A',0xf2'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_a64_movw_gottprel_g0_nc -// CHECK-AARCH64: movk w12, #:gottprel_g0_nc:var // encoding: [0x0c'A',A,0x80'A',0x72'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_a64_movw_gottprel_g0_nc // CHECK-ARM64: movk x13, #:gottprel_g0_nc:var // encoding: [0bAAA01101,A,0b100AAAAA,0xf2] // CHECK-ARM64: // fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_arm64_movw @@ -272,12 +201,6 @@ adrp x11, :gottprel:var ldr x10, [x0, #:gottprel_lo12:var] ldr x9, :gottprel:var -// CHECK-AARCH64: adrp x11, :gottprel:var // encoding: [0x0b'A',A,A,0x90'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :gottprel:var, kind: fixup_a64_adr_gottprel_page -// CHECK-AARCH64: ldr x10, [x0, #:gottprel_lo12:var] // encoding: [0x0a'A',A,0x40'A',0xf9'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :gottprel_lo12:var, kind: fixup_a64_ld64_gottprel_lo12_nc -// CHECK-AARCH64: ldr x9, :gottprel:var // encoding: [0x09'A',A,A,0x58'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :gottprel:var, kind: fixup_a64_ld_gottprel_prel19 // CHECK-ARM64: adrp x11, :gottprel:var // encoding: [0x0b'A',A,A,0x90'A'] // CHECK-ARM64: // fixup A - offset: 0, value: :gottprel:var, kind: fixup_arm64_pcrel_adrp_imm21 @@ -294,10 +217,6 @@ // TLS local-exec forms movz x3, #:tprel_g2:var movn x4, #:tprel_g2:var -// CHECK-AARCH64: movz x3, #:tprel_g2:var // encoding: [0x03'A',A,0xc0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_a64_movw_tprel_g2 -// CHECK-AARCH64: movn x4, #:tprel_g2:var // encoding: [0x04'A',A,0xc0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_a64_movw_tprel_g2 // CHECK-ARM64: movz x3, #:tprel_g2:var // encoding: [0bAAA00011,A,0b110AAAAA,0x92] // CHECK-ARM64: // fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_arm64_movw @@ -312,14 +231,6 @@ movn x6, #:tprel_g1:var movz w7, #:tprel_g1:var movn w8, #:tprel_g1:var -// CHECK-AARCH64: movz x5, #:tprel_g1:var // encoding: [0x05'A',A,0xa0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1 -// CHECK-AARCH64: movn x6, #:tprel_g1:var // encoding: [0x06'A',A,0xa0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1 -// CHECK-AARCH64: movz w7, #:tprel_g1:var // encoding: [0x07'A',A,0xa0'A',0x12'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1 -// CHECK-AARCH64: movn w8, #:tprel_g1:var // encoding: [0x08'A',A,0xa0'A',0x12'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1 // CHECK-ARM64: movz x5, #:tprel_g1:var // encoding: [0bAAA00101,A,0b101AAAAA,0x92] // CHECK-ARM64: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_arm64_movw @@ -338,10 +249,6 @@ movk x9, #:tprel_g1_nc:var movk w10, #:tprel_g1_nc:var -// CHECK-AARCH64: movk x9, #:tprel_g1_nc:var // encoding: [0x09'A',A,0xa0'A',0xf2'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_a64_movw_tprel_g1_nc -// CHECK-AARCH64: movk w10, #:tprel_g1_nc:var // encoding: [0x0a'A',A,0xa0'A',0x72'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_a64_movw_tprel_g1_nc // CHECK-ARM64: movk x9, #:tprel_g1_nc:var // encoding: [0bAAA01001,A,0b101AAAAA,0xf2] // CHECK-ARM64: // fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_arm64_movw @@ -356,14 +263,6 @@ movn x12, #:tprel_g0:var movz w13, #:tprel_g0:var movn w14, #:tprel_g0:var -// CHECK-AARCH64: movz x11, #:tprel_g0:var // encoding: [0x0b'A',A,0x80'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0 -// CHECK-AARCH64: movn x12, #:tprel_g0:var // encoding: [0x0c'A',A,0x80'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0 -// CHECK-AARCH64: movz w13, #:tprel_g0:var // encoding: [0x0d'A',A,0x80'A',0x12'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0 -// CHECK-AARCH64: movn w14, #:tprel_g0:var // encoding: [0x0e'A',A,0x80'A',0x12'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0 // CHECK-ARM64: movz x11, #:tprel_g0:var // encoding: [0bAAA01011,A,0b100AAAAA,0x92] // CHECK-ARM64: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_arm64_movw @@ -382,10 +281,6 @@ movk x15, #:tprel_g0_nc:var movk w16, #:tprel_g0_nc:var -// CHECK-AARCH64: movk x15, #:tprel_g0_nc:var // encoding: [0x0f'A',A,0x80'A',0xf2'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_a64_movw_tprel_g0_nc -// CHECK-AARCH64: movk w16, #:tprel_g0_nc:var // encoding: [0x10'A',A,0x80'A',0x72'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_a64_movw_tprel_g0_nc // CHECK-ARM64: movk x15, #:tprel_g0_nc:var // encoding: [0bAAA01111,A,0b100AAAAA,0xf2] // CHECK-ARM64: // fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_arm64_movw @@ -398,10 +293,6 @@ add x17, x18, #:tprel_hi12:var, lsl #12 add w19, w20, #:tprel_hi12:var, lsl #12 -// CHECK-AARCH64: add x17, x18, #:tprel_hi12:var, lsl #12 // encoding: [0x51'A',0x02'A',0x40'A',0x91'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_a64_add_tprel_hi12 -// CHECK-AARCH64: add w19, w20, #:tprel_hi12:var, lsl #12 // encoding: [0x93'A',0x02'A',0x40'A',0x11'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_a64_add_tprel_hi12 // CHECK-ARM64: add x17, x18, :tprel_hi12:var, lsl #12 // encoding: [0x51,0bAAAAAA10,0b00AAAAAA,0x91] // CHECK-ARM64: // fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_arm64_add_imm12 @@ -414,10 +305,6 @@ add x21, x22, #:tprel_lo12:var add w23, w24, #:tprel_lo12:var -// CHECK-AARCH64: add x21, x22, #:tprel_lo12:var // encoding: [0xd5'A',0x02'A',A,0x91'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_add_tprel_lo12 -// CHECK-AARCH64: add w23, w24, #:tprel_lo12:var // encoding: [0x17'A',0x03'A',A,0x11'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_add_tprel_lo12 // CHECK-ARM64: add x21, x22, :tprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91] // CHECK-ARM64: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_add_imm12 @@ -430,10 +317,6 @@ add x25, x26, #:tprel_lo12_nc:var add w27, w28, #:tprel_lo12_nc:var -// CHECK-AARCH64: add x25, x26, #:tprel_lo12_nc:var // encoding: [0x59'A',0x03'A',A,0x91'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_add_tprel_lo12_nc -// CHECK-AARCH64: add w27, w28, #:tprel_lo12_nc:var // encoding: [0x9b'A',0x03'A',A,0x11'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_add_tprel_lo12_nc // CHECK-ARM64: add x25, x26, :tprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91] // CHECK-ARM64: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_add_imm12 @@ -446,10 +329,6 @@ ldrb w29, [x30, #:tprel_lo12:var] ldrsb x29, [x28, #:tprel_lo12_nc:var] -// CHECK-AARCH64: ldrb w29, [x30, #:tprel_lo12:var] // encoding: [0xdd'A',0x03'A',0x40'A',0x39'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst8_tprel_lo12 -// CHECK-AARCH64: ldrsb x29, [x28, #:tprel_lo12_nc:var] // encoding: [0x9d'A',0x03'A',0x80'A',0x39'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst8_tprel_lo12_nc // CHECK-ARM64: ldrb w29, [x30, :tprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39] // CHECK-ARM64: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale1 @@ -462,10 +341,6 @@ strh w27, [x26, #:tprel_lo12:var] ldrsh x25, [x24, #:tprel_lo12_nc:var] -// CHECK-AARCH64: strh w27, [x26, #:tprel_lo12:var] // encoding: [0x5b'A',0x03'A',A,0x79'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst16_tprel_lo12 -// CHECK-AARCH64: ldrsh x25, [x24, #:tprel_lo12_nc:var] // encoding: [0x19'A',0x03'A',0x80'A',0x79'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst16_tprel_lo12_n // CHECK-ARM64: strh w27, [x26, :tprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79] // CHECK-ARM64: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale2 @@ -478,10 +353,6 @@ ldr w23, [x22, #:tprel_lo12:var] ldrsw x21, [x20, #:tprel_lo12_nc:var] -// CHECK-AARCH64: ldr w23, [x22, #:tprel_lo12:var] // encoding: [0xd7'A',0x02'A',0x40'A',0xb9'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst32_tprel_lo12 -// CHECK-AARCH64: ldrsw x21, [x20, #:tprel_lo12_nc:var] // encoding: [0x95'A',0x02'A',0x80'A',0xb9'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst32_tprel_lo12_n // CHECK-ARM64: ldr w23, [x22, :tprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9] // CHECK-ARM64: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale4 @@ -493,10 +364,6 @@ ldr x19, [x18, #:tprel_lo12:var] str x17, [x16, #:tprel_lo12_nc:var] -// CHECK-AARCH64: ldr x19, [x18, #:tprel_lo12:var] // encoding: [0x53'A',0x02'A',0x40'A',0xf9'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst64_tprel_lo12 -// CHECK-AARCH64: str x17, [x16, #:tprel_lo12_nc:var] // encoding: [0x11'A',0x02'A',A,0xf9'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst64_tprel_lo12_nc // CHECK-ARM64: ldr x19, [x18, :tprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9] // CHECK-ARM64: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale8 @@ -513,15 +380,6 @@ .tlsdesccall var blr x3 -// CHECK-AARCH64: adrp x8, :tlsdesc:var // encoding: [0x08'A',A,A,0x90'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_a64_tlsdesc_adr_page -// CHECK-AARCH64: ldr x7, [x6, #:tlsdesc_lo12:var] // encoding: [0xc7'A',A,0x40'A',0xf9'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_a64_tlsdesc_ld64_lo12_nc -// CHECK-AARCH64: add x5, x4, #:tlsdesc_lo12:var // encoding: [0x85'A',A,A,0x91'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_a64_tlsdesc_add_lo12_nc -// CHECK-AARCH64: .tlsdesccall var // encoding: [] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_a64_tlsdesc_call -// CHECK-AARCH64: blr x3 // encoding: [0x60,0x00,0x3f,0xd6] // CHECK-ARM64: adrp x8, :tlsdesc:var // encoding: [0x08'A',A,A,0x90'A'] // CHECK-ARM64: // fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_arm64_pcrel_adrp_imm21 diff --git a/test/MC/AArch64/trace-regs-diagnostics.s b/test/MC/AArch64/trace-regs-diagnostics.s index 04f9d277355c..fa57817dd38a 100644 --- a/test/MC/AArch64/trace-regs-diagnostics.s +++ b/test/MC/AArch64/trace-regs-diagnostics.s @@ -1,4 +1,3 @@ -// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2>&1 | FileCheck %s // RUN: not llvm-mc -triple arm64-none-linux-gnu < %s 2>&1 | FileCheck %s // Write-only mrs x12, trcoslar diff --git a/test/MC/AArch64/trace-regs.s b/test/MC/AArch64/trace-regs.s index b763e67c91cf..be25f08947b6 100644 --- a/test/MC/AArch64/trace-regs.s +++ b/test/MC/AArch64/trace-regs.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64-none-linux-gnu -show-encoding < %s | FileCheck %s mrs x8, trcstatr diff --git a/test/MC/Disassembler/AArch64/lit.local.cfg b/test/MC/Disassembler/AArch64/lit.local.cfg index c6f83453ac20..653697414894 100644 --- a/test/MC/Disassembler/AArch64/lit.local.cfg +++ b/test/MC/Disassembler/AArch64/lit.local.cfg @@ -1,4 +1,4 @@ targets = set(config.root.targets_to_build.split()) -if 'AArch64' not in targets or 'ARM64' not in targets: +if 'ARM64' not in targets: config.unsupported = True diff --git a/test/Transforms/LoopVectorize/AArch64/lit.local.cfg b/test/Transforms/LoopVectorize/AArch64/lit.local.cfg index a24a72819c35..f1d1f88cf399 100644 --- a/test/Transforms/LoopVectorize/AArch64/lit.local.cfg +++ b/test/Transforms/LoopVectorize/AArch64/lit.local.cfg @@ -1,6 +1,6 @@ config.suffixes = ['.ll'] targets = set(config.root.targets_to_build.split()) -if not 'AArch64' in targets: +if not 'ARM64' in targets: config.unsupported = True From 29f94c72014eaa5d0d3b920686e689e79759cacb Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Sat, 24 May 2014 12:50:23 +0000 Subject: [PATCH 133/906] AArch64/ARM64: move ARM64 into AArch64's place This commit starts with a "git mv ARM64 AArch64" and continues out from there, renaming the C++ classes, intrinsics, and other target-local objects for consistency. "ARM64" test directories are also moved, and tests that began their life in ARM64 use an arm64 triple, those from AArch64 use an aarch64 triple. Both should be equivalent though. This finishes the AArch64 merge, and everyone should feel free to continue committing as normal now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209577 91177308-0d34-0410-b5e6-96231b3b80d8 --- CMakeLists.txt | 4 +- autoconf/configure.ac | 18 +- cmake/config-ix.cmake | 2 +- configure | 18 +- docs/LangRef.rst | 2 +- include/llvm/IR/Intrinsics.td | 2 +- ...ntrinsicsARM64.td => IntrinsicsAArch64.td} | 388 +-- .../RuntimeDyld/RuntimeDyldMachO.cpp | 7 +- .../RuntimeDyld/RuntimeDyldMachO.h | 2 +- lib/LTO/LTOCodeGenerator.cpp | 3 +- lib/LTO/LTOModule.cpp | 3 +- lib/MC/MCObjectFileInfo.cpp | 10 +- lib/Target/AArch64/AArch64.h | 49 + .../{ARM64/ARM64.td => AArch64/AArch64.td} | 26 +- .../AArch64AddressTypePromotion.cpp} | 53 +- .../AArch64AdvSIMDScalarPass.cpp} | 84 +- .../AArch64AsmPrinter.cpp} | 175 +- .../AArch64BranchRelaxation.cpp} | 167 +- .../AArch64CallingConv.h} | 42 +- .../AArch64CallingConvention.td} | 34 +- .../AArch64CleanupLocalDynamicTLSPass.cpp} | 38 +- .../AArch64CollectLOH.cpp} | 166 +- .../AArch64ConditionalCompares.cpp} | 202 +- .../AArch64DeadRegisterDefinitionsPass.cpp} | 38 +- .../AArch64ExpandPseudoInsts.cpp} | 246 +- .../AArch64FastISel.cpp} | 472 ++-- .../AArch64FrameLowering.cpp} | 227 +- .../AArch64FrameLowering.h} | 18 +- .../AArch64ISelDAGToDAG.cpp} | 1163 ++++----- .../AArch64ISelLowering.cpp} | 2075 +++++++++-------- .../AArch64ISelLowering.h} | 30 +- .../AArch64InstrAtomics.td} | 40 +- .../AArch64InstrFormats.td} | 172 +- .../AArch64InstrInfo.cpp} | 1408 +++++------ .../AArch64InstrInfo.h} | 84 +- .../AArch64InstrInfo.td} | 1368 +++++------ .../AArch64LoadStoreOptimizer.cpp} | 360 ++- .../AArch64MCInstLower.cpp} | 111 +- .../AArch64MCInstLower.h} | 12 +- .../AArch64MachineFunctionInfo.h} | 20 +- .../AArch64PerfectShuffle.h} | 2 +- .../AArch64PromoteConstant.cpp} | 62 +- .../AArch64RegisterInfo.cpp} | 210 +- .../AArch64RegisterInfo.h} | 24 +- lib/Target/AArch64/AArch64RegisterInfo.td | 593 +++++ .../AArch64SchedA53.td} | 2 +- .../AArch64SchedCyclone.td} | 10 +- .../AArch64Schedule.td} | 8 +- .../AArch64SelectionDAGInfo.cpp} | 21 +- .../AArch64SelectionDAGInfo.h} | 16 +- .../AArch64StorePairSuppress.cpp} | 40 +- .../AArch64Subtarget.cpp} | 45 +- .../AArch64Subtarget.h} | 18 +- .../AArch64TargetMachine.cpp} | 136 +- lib/Target/AArch64/AArch64TargetMachine.h | 94 + .../AArch64TargetObjectFile.cpp} | 14 +- .../AArch64TargetObjectFile.h} | 14 +- .../AArch64TargetTransformInfo.cpp} | 73 +- .../AsmParser/AArch64AsmParser.cpp} | 1129 ++++----- .../AsmParser/CMakeLists.txt | 4 +- .../AsmParser/LLVMBuild.txt | 10 +- .../{ARM64 => AArch64}/AsmParser/Makefile | 4 +- lib/Target/AArch64/CMakeLists.txt | 51 + .../Disassembler/AArch64Disassembler.cpp} | 955 ++++---- .../Disassembler/AArch64Disassembler.h} | 12 +- .../AArch64ExternalSymbolizer.cpp} | 43 +- .../Disassembler/AArch64ExternalSymbolizer.h} | 23 +- .../Disassembler/CMakeLists.txt | 8 +- .../Disassembler/LLVMBuild.txt | 10 +- .../{ARM64 => AArch64}/Disassembler/Makefile | 4 +- .../InstPrinter/AArch64InstPrinter.cpp | 1316 +++++++++++ .../InstPrinter/AArch64InstPrinter.h} | 24 +- lib/Target/AArch64/InstPrinter/CMakeLists.txt | 7 + .../InstPrinter/LLVMBuild.txt | 10 +- .../{ARM64 => AArch64}/InstPrinter/Makefile | 4 +- lib/Target/{ARM64 => AArch64}/LLVMBuild.txt | 12 +- .../MCTargetDesc/AArch64AddressingModes.h} | 112 +- .../MCTargetDesc/AArch64AsmBackend.cpp} | 264 +-- .../MCTargetDesc/AArch64ELFObjectWriter.cpp} | 184 +- .../MCTargetDesc/AArch64ELFStreamer.cpp} | 20 +- .../MCTargetDesc/AArch64ELFStreamer.h} | 12 +- .../AArch64/MCTargetDesc/AArch64FixupKinds.h | 76 + .../MCTargetDesc/AArch64MCAsmInfo.cpp} | 18 +- .../MCTargetDesc/AArch64MCAsmInfo.h} | 16 +- .../MCTargetDesc/AArch64MCCodeEmitter.cpp} | 250 +- .../MCTargetDesc/AArch64MCExpr.cpp} | 20 +- .../MCTargetDesc/AArch64MCExpr.h} | 16 +- .../MCTargetDesc/AArch64MCTargetDesc.cpp | 225 ++ .../MCTargetDesc/AArch64MCTargetDesc.h} | 40 +- .../MCTargetDesc/AArch64MachObjectWriter.cpp} | 61 +- .../AArch64/MCTargetDesc/CMakeLists.txt | 14 + .../MCTargetDesc/LLVMBuild.txt | 10 +- .../{ARM64 => AArch64}/MCTargetDesc/Makefile | 4 +- lib/Target/AArch64/Makefile | 25 + .../TargetInfo/AArch64TargetInfo.cpp} | 16 +- lib/Target/AArch64/TargetInfo/CMakeLists.txt | 7 + .../TargetInfo}/LLVMBuild.txt | 8 +- .../{ARM64 => AArch64}/TargetInfo/Makefile | 4 +- .../Utils/AArch64BaseInfo.cpp} | 102 +- .../Utils/AArch64BaseInfo.h} | 416 ++-- lib/Target/AArch64/Utils/CMakeLists.txt | 3 + .../Utils}/LLVMBuild.txt | 8 +- lib/Target/{ARM64 => AArch64}/Utils/Makefile | 7 +- lib/Target/ARM64/ARM64.h | 48 - lib/Target/ARM64/ARM64RegisterInfo.td | 593 ----- lib/Target/ARM64/ARM64TargetMachine.h | 92 - lib/Target/ARM64/CMakeLists.txt | 51 - .../ARM64/InstPrinter/ARM64InstPrinter.cpp | 1312 ----------- lib/Target/ARM64/InstPrinter/CMakeLists.txt | 7 - .../ARM64/MCTargetDesc/ARM64FixupKinds.h | 76 - .../ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp | 210 -- lib/Target/ARM64/MCTargetDesc/CMakeLists.txt | 14 - lib/Target/ARM64/Makefile | 25 - lib/Target/ARM64/TargetInfo/CMakeLists.txt | 7 - lib/Target/ARM64/Utils/CMakeLists.txt | 3 - lib/Target/LLVMBuild.txt | 2 +- .../InstCombine/InstCombineCalls.cpp | 6 +- .../{ARM64 => AArch64}/lit.local.cfg | 2 +- .../CostModel/{ARM64 => AArch64}/select.ll | 0 .../CostModel/{ARM64 => AArch64}/store.ll | 0 test/CodeGen/AArch64/128bit_load_store.ll | 10 +- .../aarch64-neon-v1i1-setcc.ll | 0 test/CodeGen/AArch64/addsub.ll | 2 +- test/CodeGen/AArch64/addsub_ext.ll | 2 +- test/CodeGen/AArch64/alloca.ll | 36 +- test/CodeGen/AArch64/analyze-branch.ll | 2 +- .../arm64-2011-03-09-CPSRSpill.ll} | 0 .../arm64-2011-03-17-AsmPrinterCrash.ll} | 0 ...arm64-2011-03-21-Unaligned-Frame-Index.ll} | 0 .../arm64-2011-04-21-CPSRBug.ll} | 0 .../arm64-2011-10-18-LdStOptBug.ll} | 0 .../arm64-2012-01-11-ComparisonDAGCrash.ll} | 0 ...m64-2012-05-07-DAGCombineVectorExtract.ll} | 0 .../arm64-2012-05-07-MemcpyAlignBug.ll} | 0 .../arm64-2012-05-09-LOADgot-bug.ll} | 0 .../arm64-2012-05-22-LdStOptBug.ll} | 0 .../arm64-2012-06-06-FPToUI.ll} | 0 .../arm64-2012-07-11-InstrEmitterBug.ll} | 0 .../arm64-2013-01-13-ffast-fcmp.ll} | 4 +- .../arm64-2013-01-23-frem-crash.ll} | 0 .../arm64-2013-01-23-sext-crash.ll} | 0 .../arm64-2013-02-12-shufv8i8.ll} | 2 +- ...-2014-04-16-AnInfiniteLoopInDAGCombine.ll} | 0 ...rm64-2014-04-28-sqshl-uqshl-i64Contant.ll} | 8 +- .../arm64-2014-04-29-EXT-undef-mask.ll} | 2 +- .../arm64-AdvSIMD-Scalar.ll} | 4 +- .../aapcs.ll => AArch64/arm64-aapcs.ll} | 0 .../arm64-abi-varargs.ll} | 0 .../{ARM64/abi.ll => AArch64/arm64-abi.ll} | 0 .../arm64-abi_align.ll} | 0 .../{ARM64/addp.ll => AArch64/arm64-addp.ll} | 2 +- .../arm64-addr-mode-folding.ll} | 0 .../arm64-addr-type-promotion.ll} | 0 .../addrmode.ll => AArch64/arm64-addrmode.ll} | 0 .../arm64-alloc-no-stack-realign.ll} | 0 .../arm64-alloca-frame-pointer-offset.ll} | 0 .../arm64-andCmpBrToTBZ.ll} | 0 .../arm64-ands-bad-peephole.ll} | 0 .../arm64-anyregcc-crash.ll} | 0 .../anyregcc.ll => AArch64/arm64-anyregcc.ll} | 0 .../arm64-arith-saturating.ll} | 60 +- .../arith.ll => AArch64/arm64-arith.ll} | 16 +- .../arm64-arm64-dead-def-elimination-flag.ll} | 2 +- .../arm64-atomic-128.ll} | 0 .../atomic.ll => AArch64/arm64-atomic.ll} | 0 .../arm64-basic-pic.ll} | 0 .../arm64-big-endian-bitconverts.ll} | 4 +- .../arm64-big-endian-eh.ll} | 0 .../arm64-big-endian-varargs.ll} | 0 .../arm64-big-endian-vector-callee.ll} | 4 +- .../arm64-big-endian-vector-caller.ll} | 4 +- .../arm64-big-imm-offsets.ll} | 0 .../arm64-big-stack.ll} | 0 .../arm64-bitfield-extract.ll} | 0 .../arm64-blockaddress.ll} | 0 .../arm64-build-vector.ll} | 2 +- .../arm64-call-tailcalls.ll} | 0 .../cast-opt.ll => AArch64/arm64-cast-opt.ll} | 0 .../arm64-ccmp-heuristics.ll} | 2 +- .../{ARM64/ccmp.ll => AArch64/arm64-ccmp.ll} | 2 +- .../clrsb.ll => AArch64/arm64-clrsb.ll} | 0 .../arm64-coalesce-ext.ll} | 0 .../arm64-code-model-large-abs.ll} | 0 .../arm64-collect-loh-garbage-crash.ll} | 2 +- .../arm64-collect-loh-str.ll} | 2 +- .../arm64-collect-loh.ll} | 4 +- .../arm64-complex-copy-noneon.ll} | 0 .../arm64-complex-ret.ll} | 0 .../arm64-const-addr.ll} | 0 .../arm64-convert-v2f64-v2i32.ll} | 2 +- .../arm64-convert-v2i32-v2f64.ll} | 2 +- .../arm64-copy-tuple.ll} | 58 +- .../crc32.ll => AArch64/arm64-crc32.ll} | 32 +- .../crypto.ll => AArch64/arm64-crypto.ll} | 62 +- .../{ARM64/cse.ll => AArch64/arm64-cse.ll} | 0 .../{ARM64/csel.ll => AArch64/arm64-csel.ll} | 0 .../{ARM64/cvt.ll => AArch64/arm64-cvt.ll} | 162 +- .../arm64-dagcombiner-convergence.ll} | 0 .../arm64-dagcombiner-dead-indexed-load.ll} | 0 .../arm64-dagcombiner-indexed-load.ll} | 0 .../arm64-dagcombiner-load-slicing.ll} | 0 .../arm64-dead-def-frame-index.ll} | 0 .../arm64-dead-register-def-bug.ll} | 0 .../{ARM64/dup.ll => AArch64/arm64-dup.ll} | 2 +- .../arm64-early-ifcvt.ll} | 0 .../arm64-elf-calls.ll} | 0 .../arm64-elf-constpool.ll} | 0 .../arm64-elf-globals.ll} | 0 .../{ARM64/ext.ll => AArch64/arm64-ext.ll} | 2 +- .../arm64-extend-int-to-fp.ll} | 2 +- .../extend.ll => AArch64/arm64-extend.ll} | 0 .../arm64-extern-weak.ll} | 0 .../arm64-extload-knownzero.ll} | 0 .../extract.ll => AArch64/arm64-extract.ll} | 2 +- .../arm64-extract_subvector.ll} | 2 +- .../arm64-fast-isel-addr-offset.ll} | 0 .../arm64-fast-isel-alloca.ll} | 0 .../arm64-fast-isel-br.ll} | 0 .../arm64-fast-isel-call.ll} | 0 .../arm64-fast-isel-conversion.ll} | 0 .../arm64-fast-isel-fcmp.ll} | 0 .../arm64-fast-isel-gv.ll} | 0 .../arm64-fast-isel-icmp.ll} | 0 .../arm64-fast-isel-indirectbr.ll} | 0 .../arm64-fast-isel-intrinsic.ll} | 0 .../arm64-fast-isel-materialize.ll} | 0 .../arm64-fast-isel-noconvert.ll} | 0 .../arm64-fast-isel-rem.ll} | 0 .../arm64-fast-isel-ret.ll} | 0 .../arm64-fast-isel-select.ll} | 0 .../arm64-fast-isel.ll} | 0 .../arm64-fastcc-tailcall.ll} | 0 .../arm64-fastisel-gep-promote-before-add.ll} | 0 .../fcmp-opt.ll => AArch64/arm64-fcmp-opt.ll} | 2 +- .../arm64-fcopysign.ll} | 0 ...rm64-fixed-point-scalar-cvt-dagcombine.ll} | 6 +- .../fmadd.ll => AArch64/arm64-fmadd.ll} | 0 .../{ARM64/fmax.ll => AArch64/arm64-fmax.ll} | 0 test/CodeGen/AArch64/arm64-fminv.ll | 101 + .../fmuladd.ll => AArch64/arm64-fmuladd.ll} | 2 +- .../arm64-fold-address.ll} | 0 .../fold-lsl.ll => AArch64/arm64-fold-lsl.ll} | 2 +- .../arm64-fp-contract-zero.ll} | 0 .../fp-imm.ll => AArch64/arm64-fp-imm.ll} | 0 .../{ARM64/fp.ll => AArch64/arm64-fp.ll} | 0 .../arm64-fp128-folding.ll} | 0 .../fp128.ll => AArch64/arm64-fp128.ll} | 0 .../arm64-frame-index.ll} | 0 .../arm64-frameaddr.ll} | 0 .../arm64-global-address.ll} | 0 .../hello.ll => AArch64/arm64-hello.ll} | 0 .../arm64-i16-subreg-extract.ll} | 2 +- .../icmp-opt.ll => AArch64/arm64-icmp-opt.ll} | 0 .../arm64-illegal-float-ops.ll} | 0 .../arm64-indexed-memory.ll} | 2 +- .../arm64-indexed-vector-ldst-2.ll} | 0 .../arm64-indexed-vector-ldst.ll} | 1518 ++++++------ .../arm64-inline-asm-error-I.ll} | 0 .../arm64-inline-asm-error-J.ll} | 0 .../arm64-inline-asm-error-K.ll} | 0 .../arm64-inline-asm-error-L.ll} | 0 .../arm64-inline-asm-error-M.ll} | 0 .../arm64-inline-asm-error-N.ll} | 0 .../arm64-inline-asm-zero-reg-error.ll} | 0 .../arm64-inline-asm.ll} | 2 +- .../arm64-join-reserved.ll} | 0 .../arm64-jumptable.ll} | 0 .../arm64-large-frame.ll} | 0 .../{ARM64/ld1.ll => AArch64/arm64-ld1.ll} | 398 ++-- .../{ARM64/ldp.ll => AArch64/arm64-ldp.ll} | 2 +- .../{ARM64/ldur.ll => AArch64/arm64-ldur.ll} | 0 .../arm64-ldxr-stxr.ll} | 84 +- .../{ARM64/leaf.ll => AArch64/arm64-leaf.ll} | 0 .../arm64-long-shift.ll} | 0 .../arm64-memcpy-inline.ll} | 0 .../arm64-memset-inline.ll} | 0 .../arm64-memset-to-bzero.ll} | 0 .../arm64-misched-basic-A53.ll} | 4 +- .../arm64-misched-forwarding-A53.ll} | 0 .../{ARM64/movi.ll => AArch64/arm64-movi.ll} | 0 .../{ARM64/mul.ll => AArch64/arm64-mul.ll} | 0 .../arm64-named-reg-alloc.ll} | 0 .../arm64-named-reg-notareg.ll} | 0 .../{ARM64/neg.ll => AArch64/arm64-neg.ll} | 0 .../arm64-neon-2velem-high.ll} | 64 +- .../arm64-neon-2velem.ll} | 386 +-- .../arm64-neon-3vdiff.ll} | 268 +-- .../arm64-neon-aba-abd.ll} | 86 +- .../arm64-neon-across.ll} | 166 +- .../arm64-neon-add-pairwise.ll} | 44 +- .../arm64-neon-add-sub.ll} | 22 +- .../arm64-neon-compare-instructions.ll} | 0 .../arm64-neon-copy.ll} | 14 +- .../arm64-neon-copyPhysReg-tuple.ll} | 18 +- .../arm64-neon-mul-div.ll} | 52 +- .../arm64-neon-scalar-by-elem-mul.ll} | 18 +- .../arm64-neon-select_cc.ll} | 0 .../arm64-neon-simd-ldst-one.ll} | 0 .../arm64-neon-simd-shift.ll} | 124 +- .../arm64-neon-simd-vget.ll} | 0 .../arm64-neon-v1i1-setcc.ll} | 0 .../arm64-neon-vector-list-spill.ll} | 36 +- .../arm64-patchpoint.ll} | 0 .../arm64-pic-local-symbol.ll} | 0 .../arm64-platform-reg.ll} | 0 .../popcnt.ll => AArch64/arm64-popcnt.ll} | 2 +- .../prefetch.ll => AArch64/arm64-prefetch.ll} | 0 .../arm64-promote-const.ll} | 4 +- .../redzone.ll => AArch64/arm64-redzone.ll} | 2 +- .../arm64-reg-copy-noneon.ll} | 0 .../arm64-register-offset-addressing.ll} | 0 .../arm64-register-pairing.ll} | 0 .../arm64-regress-f128csel-flags.ll} | 0 .../arm64-regress-interphase-shift.ll} | 0 .../arm64-return-vector.ll} | 0 .../arm64-returnaddr.ll} | 0 .../{ARM64/rev.ll => AArch64/arm64-rev.ll} | 2 +- .../rounding.ll => AArch64/arm64-rounding.ll} | 0 .../arm64-scaled_iv.ll} | 0 .../{ARM64/scvt.ll => AArch64/arm64-scvt.ll} | 2 +- .../arm64-shifted-sext.ll} | 0 .../AArch64/arm64-simd-scalar-to-vector.ll | 22 + .../arm64-simplest-elf.ll} | 0 .../sincos.ll => AArch64/arm64-sincos.ll} | 0 .../arm64-sitofp-combine-chains.ll} | 0 .../arm64-sli-sri-opt.ll} | 2 +- .../smaxv.ll => AArch64/arm64-smaxv.ll} | 26 +- .../sminv.ll => AArch64/arm64-sminv.ll} | 26 +- .../spill-lr.ll => AArch64/arm64-spill-lr.ll} | 0 .../spill.ll => AArch64/arm64-spill.ll} | 2 +- .../{ARM64/st1.ll => AArch64/arm64-st1.ll} | 290 +-- .../arm64-stack-no-frame.ll} | 0 .../stackmap.ll => AArch64/arm64-stackmap.ll} | 0 .../arm64-stackpointer.ll} | 0 .../arm64-stacksave.ll} | 0 .../{ARM64/stp.ll => AArch64/arm64-stp.ll} | 4 +- .../arm64-strict-align.ll} | 4 +- .../{ARM64/stur.ll => AArch64/arm64-stur.ll} | 2 +- .../arm64-subsections.ll} | 0 .../arm64-subvector-extend.ll} | 2 +- .../arm64-swizzle-tbl-i16-layout.ll} | 0 test/CodeGen/AArch64/arm64-tbl.ll | 132 ++ .../arm64-this-return.ll} | 0 .../arm64-tls-darwin.ll} | 0 .../arm64-tls-dynamic-together.ll} | 0 .../arm64-tls-dynamics.ll} | 0 .../arm64-tls-execs.ll} | 0 .../{ARM64/trap.ll => AArch64/arm64-trap.ll} | 0 .../{ARM64/trn.ll => AArch64/arm64-trn.ll} | 2 +- .../arm64-trunc-store.ll} | 0 .../umaxv.ll => AArch64/arm64-umaxv.ll} | 18 +- .../uminv.ll => AArch64/arm64-uminv.ll} | 18 +- .../{ARM64/umov.ll => AArch64/arm64-umov.ll} | 2 +- .../arm64-unaligned_ldst.ll} | 0 .../{ARM64/uzp.ll => AArch64/arm64-uzp.ll} | 2 +- .../vaargs.ll => AArch64/arm64-vaargs.ll} | 0 .../{ARM64/vabs.ll => AArch64/arm64-vabs.ll} | 226 +- .../{ARM64/vadd.ll => AArch64/arm64-vadd.ll} | 158 +- .../vaddlv.ll => AArch64/arm64-vaddlv.ll} | 10 +- .../vaddv.ll => AArch64/arm64-vaddv.ll} | 76 +- .../arm64-variadic-aapcs.ll} | 0 .../vbitwise.ll => AArch64/arm64-vbitwise.ll} | 10 +- .../{ARM64/vclz.ll => AArch64/arm64-vclz.ll} | 2 +- .../{ARM64/vcmp.ll => AArch64/arm64-vcmp.ll} | 42 +- test/CodeGen/AArch64/arm64-vcnt.ll | 56 + .../vcombine.ll => AArch64/arm64-vcombine.ll} | 2 +- .../{ARM64/vcvt.ll => AArch64/arm64-vcvt.ll} | 164 +- .../vcvt_f.ll => AArch64/arm64-vcvt_f.ll} | 18 +- .../arm64-vcvt_f32_su32.ll} | 14 +- test/CodeGen/AArch64/arm64-vcvt_n.ll | 49 + .../arm64-vcvt_su32_f32.ll} | 2 +- .../arm64-vcvtxd_f32_f64.ll} | 4 +- .../vecCmpBr.ll => AArch64/arm64-vecCmpBr.ll} | 26 +- .../vecFold.ll => AArch64/arm64-vecFold.ll} | 40 +- .../arm64-vector-ext.ll} | 2 +- .../arm64-vector-imm.ll} | 2 +- .../arm64-vector-insertion.ll} | 2 +- .../arm64-vector-ldst.ll} | 2 +- .../{ARM64/vext.ll => AArch64/arm64-vext.ll} | 2 +- .../arm64-vext_reverse.ll} | 0 .../arm64-vfloatintrinsics.ll} | 2 +- .../vhadd.ll => AArch64/arm64-vhadd.ll} | 98 +- .../vhsub.ll => AArch64/arm64-vhsub.ll} | 50 +- .../arm64-virtual_base.ll} | 0 .../{ARM64/vmax.ll => AArch64/arm64-vmax.ll} | 270 +-- test/CodeGen/AArch64/arm64-vminmaxnm.ll | 68 + .../vmovn.ll => AArch64/arm64-vmovn.ll} | 56 +- .../{ARM64/vmul.ll => AArch64/arm64-vmul.ll} | 370 +-- .../volatile.ll => AArch64/arm64-volatile.ll} | 0 .../vpopcnt.ll => AArch64/arm64-vpopcnt.ll} | 0 .../vqadd.ll => AArch64/arm64-vqadd.ll} | 140 +- test/CodeGen/AArch64/arm64-vqsub.ll | 147 ++ .../vselect.ll => AArch64/arm64-vselect.ll} | 2 +- .../arm64-vsetcc_fp.ll} | 2 +- .../vshift.ll => AArch64/arm64-vshift.ll} | 484 ++-- .../{ARM64/vshr.ll => AArch64/arm64-vshr.ll} | 2 +- .../vshuffle.ll => AArch64/arm64-vshuffle.ll} | 0 .../vsqrt.ll => AArch64/arm64-vsqrt.ll} | 98 +- .../{ARM64/vsra.ll => AArch64/arm64-vsra.ll} | 2 +- .../{ARM64/vsub.ll => AArch64/arm64-vsub.ll} | 50 +- .../arm64-weak-reference.ll} | 0 .../xaluo.ll => AArch64/arm64-xaluo.ll} | 0 .../arm64-zero-cycle-regmov.ll} | 0 .../arm64-zero-cycle-zeroing.ll} | 0 .../{ARM64/zext.ll => AArch64/arm64-zext.ll} | 0 .../arm64-zextload-unscaled.ll} | 0 .../{ARM64/zip.ll => AArch64/arm64-zip.ll} | 2 +- .../AArch64/atomic-ops-not-barriers.ll | 2 +- test/CodeGen/AArch64/atomic-ops.ll | 76 +- test/CodeGen/AArch64/basic-pic.ll | 2 +- test/CodeGen/AArch64/bitfield-insert-0.ll | 2 +- test/CodeGen/AArch64/bitfield-insert.ll | 10 +- test/CodeGen/AArch64/bitfield.ll | 8 +- test/CodeGen/AArch64/blockaddress.ll | 4 +- test/CodeGen/AArch64/bool-loads.ll | 2 +- test/CodeGen/AArch64/breg.ll | 2 +- test/CodeGen/AArch64/callee-save.ll | 28 +- test/CodeGen/AArch64/code-model-large-abs.ll | 2 +- test/CodeGen/AArch64/compare-branch.ll | 2 +- test/CodeGen/AArch64/complex-copy-noneon.ll | 2 +- test/CodeGen/AArch64/cond-sel.ll | 6 +- test/CodeGen/AArch64/directcond.ll | 12 +- test/CodeGen/AArch64/dp1.ll | 2 +- test/CodeGen/AArch64/eliminate-trunc.ll | 8 +- test/CodeGen/AArch64/extern-weak.ll | 18 +- test/CodeGen/AArch64/fastcc-reserved.ll | 33 +- test/CodeGen/AArch64/fastcc.ll | 136 +- test/CodeGen/AArch64/fcmp.ll | 2 +- test/CodeGen/AArch64/fcvt-fixed.ll | 2 +- test/CodeGen/AArch64/flags-multiuse.ll | 2 +- test/CodeGen/AArch64/floatdp_2source.ll | 2 +- test/CodeGen/AArch64/fp-cond-sel.ll | 4 +- test/CodeGen/AArch64/fp-dp3.ll | 2 +- test/CodeGen/AArch64/fp128-folding.ll | 2 +- test/CodeGen/AArch64/fpimm.ll | 2 +- test/CodeGen/AArch64/func-argpassing.ll | 18 +- test/CodeGen/AArch64/func-calls.ll | 36 +- test/CodeGen/AArch64/global-alignment.ll | 2 +- test/CodeGen/AArch64/got-abuse.ll | 4 +- test/CodeGen/AArch64/illegal-float-ops.ll | 2 +- test/CodeGen/AArch64/init-array.ll | 4 +- .../AArch64/inline-asm-constraints-badI.ll | 2 +- .../AArch64/inline-asm-constraints-badK2.ll | 2 +- test/CodeGen/AArch64/jump-table.ll | 6 +- test/CodeGen/AArch64/large-consts.ll | 10 +- test/CodeGen/AArch64/ldst-regoffset.ll | 4 +- test/CodeGen/AArch64/ldst-unscaledimm.ll | 4 +- test/CodeGen/AArch64/ldst-unsignedimm.ll | 4 +- test/CodeGen/{ARM64 => AArch64}/lit.local.cfg | 2 +- test/CodeGen/AArch64/literal_pools_float.ll | 8 +- test/CodeGen/AArch64/local_vars.ll | 4 +- test/CodeGen/AArch64/logical_shifted_reg.ll | 2 +- test/CodeGen/AArch64/mature-mc-support.ll | 8 +- test/CodeGen/AArch64/movw-consts.ll | 28 +- test/CodeGen/AArch64/movw-shift-encoding.ll | 10 +- test/CodeGen/AArch64/neon-bitcast.ll | 2 +- .../AArch64/neon-bitwise-instructions.ll | 2 +- .../AArch64/neon-compare-instructions.ll | 2 +- test/CodeGen/AArch64/neon-diagnostics.ll | 2 +- test/CodeGen/AArch64/neon-extract.ll | 2 +- test/CodeGen/AArch64/neon-fma.ll | 2 +- test/CodeGen/AArch64/neon-fpround_f128.ll | 2 +- test/CodeGen/AArch64/neon-idiv.ll | 2 +- test/CodeGen/AArch64/neon-mla-mls.ll | 2 +- test/CodeGen/AArch64/neon-mov.ll | 34 +- test/CodeGen/AArch64/neon-or-combine.ll | 2 +- test/CodeGen/AArch64/neon-perm.ll | 110 +- .../AArch64/neon-scalar-by-elem-fma.ll | 2 +- test/CodeGen/AArch64/neon-scalar-copy.ll | 10 +- test/CodeGen/AArch64/neon-shift-left-long.ll | 2 +- .../AArch64/neon-truncStore-extLoad.ll | 2 +- test/CodeGen/AArch64/pic-eh-stubs.ll | 2 +- .../CodeGen/AArch64/regress-f128csel-flags.ll | 2 +- test/CodeGen/AArch64/regress-fp128-livein.ll | 2 +- test/CodeGen/AArch64/regress-tblgen-chains.ll | 14 +- .../AArch64/regress-w29-reserved-with-fp.ll | 2 +- test/CodeGen/AArch64/setcc-takes-i32.ll | 2 +- test/CodeGen/AArch64/sibling-call.ll | 2 +- test/CodeGen/AArch64/sincos-expansion.ll | 2 +- .../AArch64/sincospow-vector-expansion.ll | 2 +- test/CodeGen/AArch64/tail-call.ll | 64 +- test/CodeGen/AArch64/zero-reg.ll | 2 +- .../ARM64/compact-unwind-unhandled-cfi.S | 17 - test/CodeGen/ARM64/fminv.ll | 101 - test/CodeGen/ARM64/simd-scalar-to-vector.ll | 22 - test/CodeGen/ARM64/tbl.ll | 132 -- test/CodeGen/ARM64/vcnt.ll | 56 - test/CodeGen/ARM64/vcvt_n.ll | 49 - test/CodeGen/ARM64/vminmaxnm.ll | 68 - test/CodeGen/ARM64/vqsub.ll | 147 -- .../{ARM64 => AArch64}/struct_by_value.ll | 0 test/MC/AArch64/adrp-relocation.s | 2 +- test/MC/{ARM64/adr.s => AArch64/arm64-adr.s} | 8 +- .../advsimd.s => AArch64/arm64-advsimd.s} | 0 .../aliases.s => AArch64/arm64-aliases.s} | 0 .../arm64-arithmetic-encoding.s} | 0 .../arm64-arm64-fixup.s} | 4 +- .../arm64-basic-a64-instructions.s} | 0 .../arm64-be-datalayout.s} | 0 .../arm64-bitfield-encoding.s} | 0 .../arm64-branch-encoding.s} | 34 +- .../arm64-condbr-without-dots.s} | 0 .../crypto.s => AArch64/arm64-crypto.s} | 0 .../arm64-diagno-predicate.s} | 0 .../{ARM64/diags.s => AArch64/arm64-diags.s} | 2 +- .../arm64-directive_loh.s} | 0 .../arm64-elf-reloc-condbr.s} | 0 .../arm64-elf-relocs.s} | 0 .../arm64-fp-encoding.s} | 0 .../arm64-large-relocs.s} | 14 +- .../arm64-leaf-compact-unwind.s} | 0 .../arm64-logical-encoding.s} | 0 .../arm64-mapping-across-sections.s} | 0 .../arm64-mapping-within-section.s} | 0 .../memory.s => AArch64/arm64-memory.s} | 0 .../nv-cond.s => AArch64/arm64-nv-cond.s} | 0 .../arm64-optional-hash.s} | 0 .../separator.s => AArch64/arm64-separator.s} | 0 .../simd-ldst.s => AArch64/arm64-simd-ldst.s} | 0 .../arm64-small-data-fixups.s} | 0 .../arm64-spsel-sysreg.s} | 0 .../arm64-system-encoding.s} | 0 .../arm64-target-specific-sysreg.s} | 0 .../arm64-tls-modifiers-darwin.s} | 0 .../arm64-tls-relocs.s} | 106 +- .../arm64-v128_lo-diagnostics.s} | 0 .../arm64-variable-exprs.s} | 0 .../arm64-vector-lists.s} | 0 .../arm64-verbose-vector-case.s} | 0 test/MC/AArch64/basic-a64-diagnostics.s | 2 +- test/MC/AArch64/basic-a64-instructions.s | 418 ++-- test/MC/AArch64/basic-pic.s | 2 +- test/MC/AArch64/elf-extern.s | 2 +- test/MC/AArch64/elf-objdump.s | 2 +- test/MC/AArch64/elf-reloc-addsubimm.s | 2 +- test/MC/AArch64/elf-reloc-ldrlit.s | 2 +- test/MC/AArch64/elf-reloc-ldstunsimm.s | 2 +- test/MC/AArch64/elf-reloc-movw.s | 2 +- test/MC/AArch64/elf-reloc-pcreladdressing.s | 2 +- test/MC/AArch64/elf-reloc-tstb.s | 2 +- test/MC/AArch64/elf-reloc-uncondbrimm.s | 2 +- test/MC/AArch64/gicv3-regs-diagnostics.s | 2 +- test/MC/AArch64/gicv3-regs.s | 2 +- test/MC/AArch64/inline-asm-modifiers.s | 2 +- test/MC/AArch64/jump-table.s | 2 +- test/MC/AArch64/lit.local.cfg | 2 +- test/MC/AArch64/mapping-across-sections.s | 2 +- test/MC/AArch64/mapping-within-section.s | 2 +- test/MC/AArch64/neon-3vdiff.s | 2 +- test/MC/AArch64/neon-aba-abd.s | 2 +- test/MC/AArch64/neon-add-pairwise.s | 2 +- test/MC/AArch64/neon-add-sub-instructions.s | 2 +- test/MC/AArch64/neon-bitwise-instructions.s | 2 +- test/MC/AArch64/neon-compare-instructions.s | 2 +- test/MC/AArch64/neon-diagnostics.s | 453 ++-- test/MC/AArch64/neon-facge-facgt.s | 2 +- test/MC/AArch64/neon-frsqrt-frecp.s | 2 +- test/MC/AArch64/neon-halving-add-sub.s | 2 +- test/MC/AArch64/neon-max-min-pairwise.s | 2 +- test/MC/AArch64/neon-max-min.s | 2 +- test/MC/AArch64/neon-mla-mls-instructions.s | 2 +- test/MC/AArch64/neon-mov.s | 2 +- test/MC/AArch64/neon-mul-div-instructions.s | 2 +- test/MC/AArch64/neon-rounding-halving-add.s | 2 +- test/MC/AArch64/neon-rounding-shift.s | 2 +- test/MC/AArch64/neon-saturating-add-sub.s | 2 +- .../AArch64/neon-saturating-rounding-shift.s | 2 +- test/MC/AArch64/neon-saturating-shift.s | 2 +- test/MC/AArch64/neon-scalar-abs.s | 2 +- test/MC/AArch64/neon-scalar-add-sub.s | 2 +- test/MC/AArch64/neon-scalar-by-elem-mla.s | 2 +- test/MC/AArch64/neon-scalar-by-elem-mul.s | 2 +- .../neon-scalar-by-elem-saturating-mla.s | 2 +- .../neon-scalar-by-elem-saturating-mul.s | 2 +- test/MC/AArch64/neon-scalar-compare.s | 2 +- test/MC/AArch64/neon-scalar-cvt.s | 2 +- test/MC/AArch64/neon-scalar-dup.s | 2 +- test/MC/AArch64/neon-scalar-extract-narrow.s | 2 +- test/MC/AArch64/neon-scalar-fp-compare.s | 2 +- test/MC/AArch64/neon-scalar-mul.s | 2 +- test/MC/AArch64/neon-scalar-neg.s | 2 +- test/MC/AArch64/neon-scalar-recip.s | 2 +- test/MC/AArch64/neon-scalar-reduce-pairwise.s | 2 +- test/MC/AArch64/neon-scalar-rounding-shift.s | 2 +- .../AArch64/neon-scalar-saturating-add-sub.s | 2 +- .../neon-scalar-saturating-rounding-shift.s | 2 +- .../MC/AArch64/neon-scalar-saturating-shift.s | 2 +- test/MC/AArch64/neon-scalar-shift-imm.s | 2 +- test/MC/AArch64/neon-scalar-shift.s | 2 +- test/MC/AArch64/neon-shift-left-long.s | 2 +- test/MC/AArch64/neon-shift.s | 2 +- test/MC/AArch64/neon-simd-copy.s | 2 +- test/MC/AArch64/neon-simd-shift.s | 2 +- test/MC/AArch64/neon-sxtl.s | 2 +- test/MC/AArch64/neon-uxtl.s | 2 +- test/MC/AArch64/noneon-diagnostics.s | 37 +- test/MC/AArch64/optional-hash.s | 2 +- test/MC/AArch64/tls-relocs.s | 282 +-- test/MC/AArch64/trace-regs-diagnostics.s | 2 +- test/MC/AArch64/trace-regs.s | 2 +- test/MC/ARM64/lit.local.cfg | 6 - .../advsimd.txt => AArch64/arm64-advsimd.txt} | 0 .../arm64-arithmetic.txt} | 0 .../arm64-basic-a64-undefined.txt} | 0 .../arm64-bitfield.txt} | 0 .../branch.txt => AArch64/arm64-branch.txt} | 0 .../arm64-canonical-form.txt} | 0 .../crc32.txt => AArch64/arm64-crc32.txt} | 0 .../crypto.txt => AArch64/arm64-crypto.txt} | 0 .../arm64-invalid-logical.txt} | 0 .../logical.txt => AArch64/arm64-logical.txt} | 0 .../memory.txt => AArch64/arm64-memory.txt} | 0 .../arm64-non-apple-fmov.txt} | 0 .../arm64-scalar-fp.txt} | 0 .../system.txt => AArch64/arm64-system.txt} | 0 test/MC/Disassembler/AArch64/lit.local.cfg | 2 +- test/MC/Disassembler/ARM64/lit.local.cfg | 5 - .../darwin-ARM64-local-label-diff.s | 0 .../{ARM64 => AArch64}/darwin-ARM64-reloc.s | 0 .../ARM64 => MC/MachO/AArch64}/lit.local.cfg | 2 +- .../{ARM64 => AArch64}/const-addr.ll | 0 .../{ARM64 => AArch64}/large-immediate.ll | 0 .../{ARM64 => AArch64}/lit.local.cfg | 2 +- .../GlobalMerge/{ARM64 => AArch64}/arm64.ll | 0 .../GlobalMerge/AArch64}/lit.local.cfg | 2 +- .../InstCombine/2012-04-23-Neon-Intrinsics.ll | 20 +- .../{ARM64 => AArch64}/lit.local.cfg | 2 +- .../{ARM64 => AArch64}/lsr-memcpy.ll | 0 .../{ARM64 => AArch64}/lsr-memset.ll | 0 .../{ARM64 => AArch64}/req-regs.ll | 0 .../{ARM64 => AArch64}/arm64-unroll.ll | 0 .../{ARM64 => AArch64}/gather-cost.ll | 0 .../LoopVectorize/ARM64/lit.local.cfg | 6 - .../SLPVectorizer/AArch64}/lit.local.cfg | 3 +- .../mismatched-intrinsics.ll | 0 .../SLPVectorizer/ARM64/lit.local.cfg | 3 - 636 files changed, 14412 insertions(+), 14518 deletions(-) rename include/llvm/IR/{IntrinsicsARM64.td => IntrinsicsAArch64.td} (55%) create mode 100644 lib/Target/AArch64/AArch64.h rename lib/Target/{ARM64/ARM64.td => AArch64/AArch64.td} (90%) rename lib/Target/{ARM64/ARM64AddressTypePromotion.cpp => AArch64/AArch64AddressTypePromotion.cpp} (90%) rename lib/Target/{ARM64/ARM64AdvSIMDScalarPass.cpp => AArch64/AArch64AdvSIMDScalarPass.cpp} (85%) rename lib/Target/{ARM64/ARM64AsmPrinter.cpp => AArch64/AArch64AsmPrinter.cpp} (74%) rename lib/Target/{ARM64/ARM64BranchRelaxation.cpp => AArch64/AArch64BranchRelaxation.cpp} (78%) rename lib/Target/{ARM64/ARM64CallingConv.h => AArch64/AArch64CallingConv.h} (65%) rename lib/Target/{ARM64/ARM64CallingConvention.td => AArch64/AArch64CallingConvention.td} (92%) rename lib/Target/{ARM64/ARM64CleanupLocalDynamicTLSPass.cpp => AArch64/AArch64CleanupLocalDynamicTLSPass.cpp} (81%) rename lib/Target/{ARM64/ARM64CollectLOH.cpp => AArch64/AArch64CollectLOH.cpp} (91%) rename lib/Target/{ARM64/ARM64ConditionalCompares.cpp => AArch64/AArch64ConditionalCompares.cpp} (85%) rename lib/Target/{ARM64/ARM64DeadRegisterDefinitionsPass.cpp => AArch64/AArch64DeadRegisterDefinitionsPass.cpp} (79%) rename lib/Target/{ARM64/ARM64ExpandPseudoInsts.cpp => AArch64/AArch64ExpandPseudoInsts.cpp} (76%) rename lib/Target/{ARM64/ARM64FastISel.cpp => AArch64/AArch64FastISel.cpp} (78%) rename lib/Target/{ARM64/ARM64FrameLowering.cpp => AArch64/AArch64FrameLowering.cpp} (82%) rename lib/Target/{ARM64/ARM64FrameLowering.h => AArch64/AArch64FrameLowering.h} (86%) rename lib/Target/{ARM64/ARM64ISelDAGToDAG.cpp => AArch64/AArch64ISelDAGToDAG.cpp} (70%) rename lib/Target/{ARM64/ARM64ISelLowering.cpp => AArch64/AArch64ISelLowering.cpp} (80%) rename lib/Target/{ARM64/ARM64ISelLowering.h => AArch64/AArch64ISelLowering.h} (96%) rename lib/Target/{ARM64/ARM64InstrAtomics.td => AArch64/AArch64InstrAtomics.td} (92%) rename lib/Target/{ARM64/ARM64InstrFormats.td => AArch64/AArch64InstrFormats.td} (98%) rename lib/Target/{ARM64/ARM64InstrInfo.cpp => AArch64/AArch64InstrInfo.cpp} (55%) rename lib/Target/{ARM64/ARM64InstrInfo.h => AArch64/AArch64InstrInfo.h} (81%) rename lib/Target/{ARM64/ARM64InstrInfo.td => AArch64/AArch64InstrInfo.td} (83%) rename lib/Target/{ARM64/ARM64LoadStoreOptimizer.cpp => AArch64/AArch64LoadStoreOptimizer.cpp} (80%) rename lib/Target/{ARM64/ARM64MCInstLower.cpp => AArch64/AArch64MCInstLower.cpp} (58%) rename lib/Target/{ARM64/ARM64MCInstLower.h => AArch64/AArch64MCInstLower.h} (78%) rename lib/Target/{ARM64/ARM64MachineFunctionInfo.h => AArch64/AArch64MachineFunctionInfo.h} (90%) rename lib/Target/{ARM64/ARM64PerfectShuffle.h => AArch64/AArch64PerfectShuffle.h} (99%) rename lib/Target/{ARM64/ARM64PromoteConstant.cpp => AArch64/AArch64PromoteConstant.cpp} (92%) rename lib/Target/{ARM64/ARM64RegisterInfo.cpp => AArch64/AArch64RegisterInfo.cpp} (66%) rename lib/Target/{ARM64/ARM64RegisterInfo.h => AArch64/AArch64RegisterInfo.h} (85%) create mode 100644 lib/Target/AArch64/AArch64RegisterInfo.td rename lib/Target/{ARM64/ARM64SchedA53.td => AArch64/AArch64SchedA53.td} (99%) rename lib/Target/{ARM64/ARM64SchedCyclone.td => AArch64/AArch64SchedCyclone.td} (98%) rename lib/Target/{ARM64/ARM64Schedule.td => AArch64/AArch64Schedule.td} (95%) rename lib/Target/{ARM64/ARM64SelectionDAGInfo.cpp => AArch64/AArch64SelectionDAGInfo.cpp} (74%) rename lib/Target/{ARM64/ARM64SelectionDAGInfo.h => AArch64/AArch64SelectionDAGInfo.h} (69%) rename lib/Target/{ARM64/ARM64StorePairSuppress.cpp => AArch64/AArch64StorePairSuppress.cpp} (82%) rename lib/Target/{ARM64/ARM64Subtarget.cpp => AArch64/AArch64Subtarget.cpp} (71%) rename lib/Target/{ARM64/ARM64Subtarget.h => AArch64/AArch64Subtarget.h} (88%) rename lib/Target/{ARM64/ARM64TargetMachine.cpp => AArch64/AArch64TargetMachine.cpp} (51%) create mode 100644 lib/Target/AArch64/AArch64TargetMachine.h rename lib/Target/{ARM64/ARM64TargetObjectFile.cpp => AArch64/AArch64TargetObjectFile.cpp} (80%) rename lib/Target/{ARM64/ARM64TargetObjectFile.h => AArch64/AArch64TargetObjectFile.h} (73%) rename lib/Target/{ARM64/ARM64TargetTransformInfo.cpp => AArch64/AArch64TargetTransformInfo.cpp} (87%) rename lib/Target/{ARM64/AsmParser/ARM64AsmParser.cpp => AArch64/AsmParser/AArch64AsmParser.cpp} (78%) rename lib/Target/{ARM64 => AArch64}/AsmParser/CMakeLists.txt (59%) rename lib/Target/{ARM64 => AArch64}/AsmParser/LLVMBuild.txt (70%) rename lib/Target/{ARM64 => AArch64}/AsmParser/Makefile (82%) create mode 100644 lib/Target/AArch64/CMakeLists.txt rename lib/Target/{ARM64/Disassembler/ARM64Disassembler.cpp => AArch64/Disassembler/AArch64Disassembler.cpp} (69%) rename lib/Target/{ARM64/Disassembler/ARM64Disassembler.h => AArch64/Disassembler/AArch64Disassembler.h} (75%) rename lib/Target/{ARM64/Disassembler/ARM64ExternalSymbolizer.cpp => AArch64/Disassembler/AArch64ExternalSymbolizer.cpp} (86%) rename lib/Target/{ARM64/Disassembler/ARM64ExternalSymbolizer.h => AArch64/Disassembler/AArch64ExternalSymbolizer.h} (50%) rename lib/Target/{ARM64 => AArch64}/Disassembler/CMakeLists.txt (66%) rename lib/Target/{ARM64 => AArch64}/Disassembler/LLVMBuild.txt (71%) rename lib/Target/{ARM64 => AArch64}/Disassembler/Makefile (81%) create mode 100644 lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp rename lib/Target/{ARM64/InstPrinter/ARM64InstPrinter.h => AArch64/InstPrinter/AArch64InstPrinter.h} (88%) create mode 100644 lib/Target/AArch64/InstPrinter/CMakeLists.txt rename lib/Target/{ARM64 => AArch64}/InstPrinter/LLVMBuild.txt (73%) rename lib/Target/{ARM64 => AArch64}/InstPrinter/Makefile (82%) rename lib/Target/{ARM64 => AArch64}/LLVMBuild.txt (70%) rename lib/Target/{ARM64/MCTargetDesc/ARM64AddressingModes.h => AArch64/MCTargetDesc/AArch64AddressingModes.h} (89%) rename lib/Target/{ARM64/MCTargetDesc/ARM64AsmBackend.cpp => AArch64/MCTargetDesc/AArch64AsmBackend.cpp} (66%) rename lib/Target/{ARM64/MCTargetDesc/ARM64ELFObjectWriter.cpp => AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp} (56%) rename lib/Target/{ARM64/MCTargetDesc/ARM64ELFStreamer.cpp => AArch64/MCTargetDesc/AArch64ELFStreamer.cpp} (86%) rename lib/Target/{ARM64/MCTargetDesc/ARM64ELFStreamer.h => AArch64/MCTargetDesc/AArch64ELFStreamer.h} (55%) create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h rename lib/Target/{ARM64/MCTargetDesc/ARM64MCAsmInfo.cpp => AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp} (82%) rename lib/Target/{ARM64/MCTargetDesc/ARM64MCAsmInfo.h => AArch64/MCTargetDesc/AArch64MCAsmInfo.h} (61%) rename lib/Target/{ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp => AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp} (73%) rename lib/Target/{ARM64/MCTargetDesc/ARM64MCExpr.cpp => AArch64/MCTargetDesc/AArch64MCExpr.cpp} (89%) rename lib/Target/{ARM64/MCTargetDesc/ARM64MCExpr.h => AArch64/MCTargetDesc/AArch64MCExpr.h} (92%) create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp rename lib/Target/{ARM64/MCTargetDesc/ARM64MCTargetDesc.h => AArch64/MCTargetDesc/AArch64MCTargetDesc.h} (51%) rename lib/Target/{ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp => AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp} (88%) create mode 100644 lib/Target/AArch64/MCTargetDesc/CMakeLists.txt rename lib/Target/{ARM64 => AArch64}/MCTargetDesc/LLVMBuild.txt (72%) rename lib/Target/{ARM64 => AArch64}/MCTargetDesc/Makefile (82%) create mode 100644 lib/Target/AArch64/Makefile rename lib/Target/{ARM64/TargetInfo/ARM64TargetInfo.cpp => AArch64/TargetInfo/AArch64TargetInfo.cpp} (64%) create mode 100644 lib/Target/AArch64/TargetInfo/CMakeLists.txt rename lib/Target/{ARM64/Utils => AArch64/TargetInfo}/LLVMBuild.txt (79%) rename lib/Target/{ARM64 => AArch64}/TargetInfo/Makefile (82%) rename lib/Target/{ARM64/Utils/ARM64BaseInfo.cpp => AArch64/Utils/AArch64BaseInfo.cpp} (89%) rename lib/Target/{ARM64/Utils/ARM64BaseInfo.h => AArch64/Utils/AArch64BaseInfo.h} (84%) create mode 100644 lib/Target/AArch64/Utils/CMakeLists.txt rename lib/Target/{ARM64/TargetInfo => AArch64/Utils}/LLVMBuild.txt (79%) rename lib/Target/{ARM64 => AArch64}/Utils/Makefile (64%) delete mode 100644 lib/Target/ARM64/ARM64.h delete mode 100644 lib/Target/ARM64/ARM64RegisterInfo.td delete mode 100644 lib/Target/ARM64/ARM64TargetMachine.h delete mode 100644 lib/Target/ARM64/CMakeLists.txt delete mode 100644 lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp delete mode 100644 lib/Target/ARM64/InstPrinter/CMakeLists.txt delete mode 100644 lib/Target/ARM64/MCTargetDesc/ARM64FixupKinds.h delete mode 100644 lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp delete mode 100644 lib/Target/ARM64/MCTargetDesc/CMakeLists.txt delete mode 100644 lib/Target/ARM64/Makefile delete mode 100644 lib/Target/ARM64/TargetInfo/CMakeLists.txt delete mode 100644 lib/Target/ARM64/Utils/CMakeLists.txt rename test/Analysis/CostModel/{ARM64 => AArch64}/lit.local.cfg (73%) rename test/Analysis/CostModel/{ARM64 => AArch64}/select.ll (100%) rename test/Analysis/CostModel/{ARM64 => AArch64}/store.ll (100%) rename test/CodeGen/{ARM64 => AArch64}/aarch64-neon-v1i1-setcc.ll (100%) rename test/CodeGen/{ARM64/2011-03-09-CPSRSpill.ll => AArch64/arm64-2011-03-09-CPSRSpill.ll} (100%) rename test/CodeGen/{ARM64/2011-03-17-AsmPrinterCrash.ll => AArch64/arm64-2011-03-17-AsmPrinterCrash.ll} (100%) rename test/CodeGen/{ARM64/2011-03-21-Unaligned-Frame-Index.ll => AArch64/arm64-2011-03-21-Unaligned-Frame-Index.ll} (100%) rename test/CodeGen/{ARM64/2011-04-21-CPSRBug.ll => AArch64/arm64-2011-04-21-CPSRBug.ll} (100%) rename test/CodeGen/{ARM64/2011-10-18-LdStOptBug.ll => AArch64/arm64-2011-10-18-LdStOptBug.ll} (100%) rename test/CodeGen/{ARM64/2012-01-11-ComparisonDAGCrash.ll => AArch64/arm64-2012-01-11-ComparisonDAGCrash.ll} (100%) rename test/CodeGen/{ARM64/2012-05-07-DAGCombineVectorExtract.ll => AArch64/arm64-2012-05-07-DAGCombineVectorExtract.ll} (100%) rename test/CodeGen/{ARM64/2012-05-07-MemcpyAlignBug.ll => AArch64/arm64-2012-05-07-MemcpyAlignBug.ll} (100%) rename test/CodeGen/{ARM64/2012-05-09-LOADgot-bug.ll => AArch64/arm64-2012-05-09-LOADgot-bug.ll} (100%) rename test/CodeGen/{ARM64/2012-05-22-LdStOptBug.ll => AArch64/arm64-2012-05-22-LdStOptBug.ll} (100%) rename test/CodeGen/{ARM64/2012-06-06-FPToUI.ll => AArch64/arm64-2012-06-06-FPToUI.ll} (100%) rename test/CodeGen/{ARM64/2012-07-11-InstrEmitterBug.ll => AArch64/arm64-2012-07-11-InstrEmitterBug.ll} (100%) rename test/CodeGen/{ARM64/2013-01-13-ffast-fcmp.ll => AArch64/arm64-2013-01-13-ffast-fcmp.ll} (72%) rename test/CodeGen/{ARM64/2013-01-23-frem-crash.ll => AArch64/arm64-2013-01-23-frem-crash.ll} (100%) rename test/CodeGen/{ARM64/2013-01-23-sext-crash.ll => AArch64/arm64-2013-01-23-sext-crash.ll} (100%) rename test/CodeGen/{ARM64/2013-02-12-shufv8i8.ll => AArch64/arm64-2013-02-12-shufv8i8.ll} (83%) rename test/CodeGen/{ARM64/2014-04-16-AnInfiniteLoopInDAGCombine.ll => AArch64/arm64-2014-04-16-AnInfiniteLoopInDAGCombine.ll} (100%) rename test/CodeGen/{ARM64/2014-04-28-sqshl-uqshl-i64Contant.ll => AArch64/arm64-2014-04-28-sqshl-uqshl-i64Contant.ll} (63%) rename test/CodeGen/{ARM64/2014-04-29-EXT-undef-mask.ll => AArch64/arm64-2014-04-29-EXT-undef-mask.ll} (94%) rename test/CodeGen/{ARM64/AdvSIMD-Scalar.ll => AArch64/arm64-AdvSIMD-Scalar.ll} (89%) rename test/CodeGen/{ARM64/aapcs.ll => AArch64/arm64-aapcs.ll} (100%) rename test/CodeGen/{ARM64/abi-varargs.ll => AArch64/arm64-abi-varargs.ll} (100%) rename test/CodeGen/{ARM64/abi.ll => AArch64/arm64-abi.ll} (100%) rename test/CodeGen/{ARM64/abi_align.ll => AArch64/arm64-abi_align.ll} (100%) rename test/CodeGen/{ARM64/addp.ll => AArch64/arm64-addp.ll} (90%) rename test/CodeGen/{ARM64/addr-mode-folding.ll => AArch64/arm64-addr-mode-folding.ll} (100%) rename test/CodeGen/{ARM64/addr-type-promotion.ll => AArch64/arm64-addr-type-promotion.ll} (100%) rename test/CodeGen/{ARM64/addrmode.ll => AArch64/arm64-addrmode.ll} (100%) rename test/CodeGen/{ARM64/alloc-no-stack-realign.ll => AArch64/arm64-alloc-no-stack-realign.ll} (100%) rename test/CodeGen/{ARM64/alloca-frame-pointer-offset.ll => AArch64/arm64-alloca-frame-pointer-offset.ll} (100%) rename test/CodeGen/{ARM64/andCmpBrToTBZ.ll => AArch64/arm64-andCmpBrToTBZ.ll} (100%) rename test/CodeGen/{ARM64/ands-bad-peephole.ll => AArch64/arm64-ands-bad-peephole.ll} (100%) rename test/CodeGen/{ARM64/anyregcc-crash.ll => AArch64/arm64-anyregcc-crash.ll} (100%) rename test/CodeGen/{ARM64/anyregcc.ll => AArch64/arm64-anyregcc.ll} (100%) rename test/CodeGen/{ARM64/arith-saturating.ll => AArch64/arm64-arith-saturating.ll} (58%) rename test/CodeGen/{ARM64/arith.ll => AArch64/arm64-arith.ll} (90%) rename test/CodeGen/{ARM64/arm64-dead-def-elimination-flag.ll => AArch64/arm64-arm64-dead-def-elimination-flag.ll} (80%) rename test/CodeGen/{ARM64/atomic-128.ll => AArch64/arm64-atomic-128.ll} (100%) rename test/CodeGen/{ARM64/atomic.ll => AArch64/arm64-atomic.ll} (100%) rename test/CodeGen/{ARM64/basic-pic.ll => AArch64/arm64-basic-pic.ll} (100%) rename test/CodeGen/{ARM64/big-endian-bitconverts.ll => AArch64/arm64-big-endian-bitconverts.ll} (99%) rename test/CodeGen/{ARM64/big-endian-eh.ll => AArch64/arm64-big-endian-eh.ll} (100%) rename test/CodeGen/{ARM64/big-endian-varargs.ll => AArch64/arm64-big-endian-varargs.ll} (100%) rename test/CodeGen/{ARM64/big-endian-vector-callee.ll => AArch64/arm64-big-endian-vector-callee.ll} (99%) rename test/CodeGen/{ARM64/big-endian-vector-caller.ll => AArch64/arm64-big-endian-vector-caller.ll} (99%) rename test/CodeGen/{ARM64/big-imm-offsets.ll => AArch64/arm64-big-imm-offsets.ll} (100%) rename test/CodeGen/{ARM64/big-stack.ll => AArch64/arm64-big-stack.ll} (100%) rename test/CodeGen/{ARM64/bitfield-extract.ll => AArch64/arm64-bitfield-extract.ll} (100%) rename test/CodeGen/{ARM64/blockaddress.ll => AArch64/arm64-blockaddress.ll} (100%) rename test/CodeGen/{ARM64/build-vector.ll => AArch64/arm64-build-vector.ll} (94%) rename test/CodeGen/{ARM64/call-tailcalls.ll => AArch64/arm64-call-tailcalls.ll} (100%) rename test/CodeGen/{ARM64/cast-opt.ll => AArch64/arm64-cast-opt.ll} (100%) rename test/CodeGen/{ARM64/ccmp-heuristics.ll => AArch64/arm64-ccmp-heuristics.ll} (99%) rename test/CodeGen/{ARM64/ccmp.ll => AArch64/arm64-ccmp.ll} (98%) rename test/CodeGen/{ARM64/clrsb.ll => AArch64/arm64-clrsb.ll} (100%) rename test/CodeGen/{ARM64/coalesce-ext.ll => AArch64/arm64-coalesce-ext.ll} (100%) rename test/CodeGen/{ARM64/code-model-large-abs.ll => AArch64/arm64-code-model-large-abs.ll} (100%) rename test/CodeGen/{ARM64/collect-loh-garbage-crash.ll => AArch64/arm64-collect-loh-garbage-crash.ll} (91%) rename test/CodeGen/{ARM64/collect-loh-str.ll => AArch64/arm64-collect-loh-str.ll} (86%) rename test/CodeGen/{ARM64/collect-loh.ll => AArch64/arm64-collect-loh.ll} (85%) rename test/CodeGen/{ARM64/complex-copy-noneon.ll => AArch64/arm64-complex-copy-noneon.ll} (100%) rename test/CodeGen/{ARM64/complex-ret.ll => AArch64/arm64-complex-ret.ll} (100%) rename test/CodeGen/{ARM64/const-addr.ll => AArch64/arm64-const-addr.ll} (100%) rename test/CodeGen/{ARM64/convert-v2f64-v2i32.ll => AArch64/arm64-convert-v2f64-v2i32.ll} (86%) rename test/CodeGen/{ARM64/convert-v2i32-v2f64.ll => AArch64/arm64-convert-v2i32-v2f64.ll} (90%) rename test/CodeGen/{ARM64/copy-tuple.ll => AArch64/arm64-copy-tuple.ll} (69%) rename test/CodeGen/{ARM64/crc32.ll => AArch64/arm64-crc32.ll} (58%) rename test/CodeGen/{ARM64/crypto.ll => AArch64/arm64-crypto.ll} (50%) rename test/CodeGen/{ARM64/cse.ll => AArch64/arm64-cse.ll} (100%) rename test/CodeGen/{ARM64/csel.ll => AArch64/arm64-csel.ll} (100%) rename test/CodeGen/{ARM64/cvt.ll => AArch64/arm64-cvt.ll} (52%) rename test/CodeGen/{ARM64/dagcombiner-convergence.ll => AArch64/arm64-dagcombiner-convergence.ll} (100%) rename test/CodeGen/{ARM64/dagcombiner-dead-indexed-load.ll => AArch64/arm64-dagcombiner-dead-indexed-load.ll} (100%) rename test/CodeGen/{ARM64/dagcombiner-indexed-load.ll => AArch64/arm64-dagcombiner-indexed-load.ll} (100%) rename test/CodeGen/{ARM64/dagcombiner-load-slicing.ll => AArch64/arm64-dagcombiner-load-slicing.ll} (100%) rename test/CodeGen/{ARM64/dead-def-frame-index.ll => AArch64/arm64-dead-def-frame-index.ll} (100%) rename test/CodeGen/{ARM64/dead-register-def-bug.ll => AArch64/arm64-dead-register-def-bug.ll} (100%) rename test/CodeGen/{ARM64/dup.ll => AArch64/arm64-dup.ll} (99%) rename test/CodeGen/{ARM64/early-ifcvt.ll => AArch64/arm64-early-ifcvt.ll} (100%) rename test/CodeGen/{ARM64/elf-calls.ll => AArch64/arm64-elf-calls.ll} (100%) rename test/CodeGen/{ARM64/elf-constpool.ll => AArch64/arm64-elf-constpool.ll} (100%) rename test/CodeGen/{ARM64/elf-globals.ll => AArch64/arm64-elf-globals.ll} (100%) rename test/CodeGen/{ARM64/ext.ll => AArch64/arm64-ext.ll} (98%) rename test/CodeGen/{ARM64/extend-int-to-fp.ll => AArch64/arm64-extend-int-to-fp.ll} (86%) rename test/CodeGen/{ARM64/extend.ll => AArch64/arm64-extend.ll} (100%) rename test/CodeGen/{ARM64/extern-weak.ll => AArch64/arm64-extern-weak.ll} (100%) rename test/CodeGen/{ARM64/extload-knownzero.ll => AArch64/arm64-extload-knownzero.ll} (100%) rename test/CodeGen/{ARM64/extract.ll => AArch64/arm64-extract.ll} (95%) rename test/CodeGen/{ARM64/extract_subvector.ll => AArch64/arm64-extract_subvector.ll} (95%) rename test/CodeGen/{ARM64/fast-isel-addr-offset.ll => AArch64/arm64-fast-isel-addr-offset.ll} (100%) rename test/CodeGen/{ARM64/fast-isel-alloca.ll => AArch64/arm64-fast-isel-alloca.ll} (100%) rename test/CodeGen/{ARM64/fast-isel-br.ll => AArch64/arm64-fast-isel-br.ll} (100%) rename test/CodeGen/{ARM64/fast-isel-call.ll => AArch64/arm64-fast-isel-call.ll} (100%) rename test/CodeGen/{ARM64/fast-isel-conversion.ll => AArch64/arm64-fast-isel-conversion.ll} (100%) rename test/CodeGen/{ARM64/fast-isel-fcmp.ll => AArch64/arm64-fast-isel-fcmp.ll} (100%) rename test/CodeGen/{ARM64/fast-isel-gv.ll => AArch64/arm64-fast-isel-gv.ll} (100%) rename test/CodeGen/{ARM64/fast-isel-icmp.ll => AArch64/arm64-fast-isel-icmp.ll} (100%) rename test/CodeGen/{ARM64/fast-isel-indirectbr.ll => AArch64/arm64-fast-isel-indirectbr.ll} (100%) rename test/CodeGen/{ARM64/fast-isel-intrinsic.ll => AArch64/arm64-fast-isel-intrinsic.ll} (100%) rename test/CodeGen/{ARM64/fast-isel-materialize.ll => AArch64/arm64-fast-isel-materialize.ll} (100%) rename test/CodeGen/{ARM64/fast-isel-noconvert.ll => AArch64/arm64-fast-isel-noconvert.ll} (100%) rename test/CodeGen/{ARM64/fast-isel-rem.ll => AArch64/arm64-fast-isel-rem.ll} (100%) rename test/CodeGen/{ARM64/fast-isel-ret.ll => AArch64/arm64-fast-isel-ret.ll} (100%) rename test/CodeGen/{ARM64/fast-isel-select.ll => AArch64/arm64-fast-isel-select.ll} (100%) rename test/CodeGen/{ARM64/fast-isel.ll => AArch64/arm64-fast-isel.ll} (100%) rename test/CodeGen/{ARM64/fastcc-tailcall.ll => AArch64/arm64-fastcc-tailcall.ll} (100%) rename test/CodeGen/{ARM64/fastisel-gep-promote-before-add.ll => AArch64/arm64-fastisel-gep-promote-before-add.ll} (100%) rename test/CodeGen/{ARM64/fcmp-opt.ll => AArch64/arm64-fcmp-opt.ll} (98%) rename test/CodeGen/{ARM64/fcopysign.ll => AArch64/arm64-fcopysign.ll} (100%) rename test/CodeGen/{ARM64/fixed-point-scalar-cvt-dagcombine.ll => AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll} (62%) rename test/CodeGen/{ARM64/fmadd.ll => AArch64/arm64-fmadd.ll} (100%) rename test/CodeGen/{ARM64/fmax.ll => AArch64/arm64-fmax.ll} (100%) create mode 100644 test/CodeGen/AArch64/arm64-fminv.ll rename test/CodeGen/{ARM64/fmuladd.ll => AArch64/arm64-fmuladd.ll} (97%) rename test/CodeGen/{ARM64/fold-address.ll => AArch64/arm64-fold-address.ll} (100%) rename test/CodeGen/{ARM64/fold-lsl.ll => AArch64/arm64-fold-lsl.ll} (97%) rename test/CodeGen/{ARM64/fp-contract-zero.ll => AArch64/arm64-fp-contract-zero.ll} (100%) rename test/CodeGen/{ARM64/fp-imm.ll => AArch64/arm64-fp-imm.ll} (100%) rename test/CodeGen/{ARM64/fp.ll => AArch64/arm64-fp.ll} (100%) rename test/CodeGen/{ARM64/fp128-folding.ll => AArch64/arm64-fp128-folding.ll} (100%) rename test/CodeGen/{ARM64/fp128.ll => AArch64/arm64-fp128.ll} (100%) rename test/CodeGen/{ARM64/frame-index.ll => AArch64/arm64-frame-index.ll} (100%) rename test/CodeGen/{ARM64/frameaddr.ll => AArch64/arm64-frameaddr.ll} (100%) rename test/CodeGen/{ARM64/global-address.ll => AArch64/arm64-global-address.ll} (100%) rename test/CodeGen/{ARM64/hello.ll => AArch64/arm64-hello.ll} (100%) rename test/CodeGen/{ARM64/i16-subreg-extract.ll => AArch64/arm64-i16-subreg-extract.ll} (80%) rename test/CodeGen/{ARM64/icmp-opt.ll => AArch64/arm64-icmp-opt.ll} (100%) rename test/CodeGen/{ARM64/illegal-float-ops.ll => AArch64/arm64-illegal-float-ops.ll} (100%) rename test/CodeGen/{ARM64/indexed-memory.ll => AArch64/arm64-indexed-memory.ll} (99%) rename test/CodeGen/{ARM64/indexed-vector-ldst-2.ll => AArch64/arm64-indexed-vector-ldst-2.ll} (100%) rename test/CodeGen/{ARM64/indexed-vector-ldst.ll => AArch64/arm64-indexed-vector-ldst.ll} (75%) rename test/CodeGen/{ARM64/inline-asm-error-I.ll => AArch64/arm64-inline-asm-error-I.ll} (100%) rename test/CodeGen/{ARM64/inline-asm-error-J.ll => AArch64/arm64-inline-asm-error-J.ll} (100%) rename test/CodeGen/{ARM64/inline-asm-error-K.ll => AArch64/arm64-inline-asm-error-K.ll} (100%) rename test/CodeGen/{ARM64/inline-asm-error-L.ll => AArch64/arm64-inline-asm-error-L.ll} (100%) rename test/CodeGen/{ARM64/inline-asm-error-M.ll => AArch64/arm64-inline-asm-error-M.ll} (100%) rename test/CodeGen/{ARM64/inline-asm-error-N.ll => AArch64/arm64-inline-asm-error-N.ll} (100%) rename test/CodeGen/{ARM64/inline-asm-zero-reg-error.ll => AArch64/arm64-inline-asm-zero-reg-error.ll} (100%) rename test/CodeGen/{ARM64/inline-asm.ll => AArch64/arm64-inline-asm.ll} (98%) rename test/CodeGen/{ARM64/join-reserved.ll => AArch64/arm64-join-reserved.ll} (100%) rename test/CodeGen/{ARM64/jumptable.ll => AArch64/arm64-jumptable.ll} (100%) rename test/CodeGen/{ARM64/aarch64-large-frame.ll => AArch64/arm64-large-frame.ll} (100%) rename test/CodeGen/{ARM64/ld1.ll => AArch64/arm64-ld1.ll} (67%) rename test/CodeGen/{ARM64/ldp.ll => AArch64/arm64-ldp.ll} (98%) rename test/CodeGen/{ARM64/ldur.ll => AArch64/arm64-ldur.ll} (100%) rename test/CodeGen/{ARM64/ldxr-stxr.ll => AArch64/arm64-ldxr-stxr.ll} (69%) rename test/CodeGen/{ARM64/leaf.ll => AArch64/arm64-leaf.ll} (100%) rename test/CodeGen/{ARM64/long-shift.ll => AArch64/arm64-long-shift.ll} (100%) rename test/CodeGen/{ARM64/memcpy-inline.ll => AArch64/arm64-memcpy-inline.ll} (100%) rename test/CodeGen/{ARM64/memset-inline.ll => AArch64/arm64-memset-inline.ll} (100%) rename test/CodeGen/{ARM64/memset-to-bzero.ll => AArch64/arm64-memset-to-bzero.ll} (100%) rename test/CodeGen/{ARM64/misched-basic-A53.ll => AArch64/arm64-misched-basic-A53.ll} (96%) rename test/CodeGen/{ARM64/misched-forwarding-A53.ll => AArch64/arm64-misched-forwarding-A53.ll} (100%) rename test/CodeGen/{ARM64/movi.ll => AArch64/arm64-movi.ll} (100%) rename test/CodeGen/{ARM64/mul.ll => AArch64/arm64-mul.ll} (100%) rename test/CodeGen/{ARM64/named-reg-alloc.ll => AArch64/arm64-named-reg-alloc.ll} (100%) rename test/CodeGen/{ARM64/named-reg-notareg.ll => AArch64/arm64-named-reg-notareg.ll} (100%) rename test/CodeGen/{ARM64/neg.ll => AArch64/arm64-neg.ll} (100%) rename test/CodeGen/{ARM64/aarch64-neon-2velem-high.ll => AArch64/arm64-neon-2velem-high.ll} (82%) rename test/CodeGen/{ARM64/aarch64-neon-2velem.ll => AArch64/arm64-neon-2velem.ll} (84%) rename test/CodeGen/{ARM64/aarch64-neon-3vdiff.ll => AArch64/arm64-neon-3vdiff.ll} (85%) rename test/CodeGen/{ARM64/aarch64-neon-aba-abd.ll => AArch64/arm64-neon-aba-abd.ll} (58%) rename test/CodeGen/{ARM64/aarch64-neon-across.ll => AArch64/arm64-neon-across.ll} (56%) rename test/CodeGen/{ARM64/aarch64-neon-add-pairwise.ll => AArch64/arm64-neon-add-pairwise.ll} (51%) rename test/CodeGen/{ARM64/aarch64-neon-add-sub.ll => AArch64/arm64-neon-add-sub.ll} (89%) rename test/CodeGen/{ARM64/neon-compare-instructions.ll => AArch64/arm64-neon-compare-instructions.ll} (100%) rename test/CodeGen/{ARM64/aarch64-neon-copy.ll => AArch64/arm64-neon-copy.ll} (99%) rename test/CodeGen/{ARM64/aarch64-neon-copyPhysReg-tuple.ll => AArch64/arm64-neon-copyPhysReg-tuple.ll} (56%) rename test/CodeGen/{ARM64/aarch64-neon-mul-div.ll => AArch64/arm64-neon-mul-div.ll} (92%) rename test/CodeGen/{ARM64/aarch64-neon-scalar-by-elem-mul.ll => AArch64/arm64-neon-scalar-by-elem-mul.ll} (85%) rename test/CodeGen/{ARM64/aarch64-neon-select_cc.ll => AArch64/arm64-neon-select_cc.ll} (100%) rename test/CodeGen/{ARM64/aarch64-neon-simd-ldst-one.ll => AArch64/arm64-neon-simd-ldst-one.ll} (100%) rename test/CodeGen/{ARM64/aarch64-neon-simd-shift.ll => AArch64/arm64-neon-simd-shift.ll} (81%) rename test/CodeGen/{ARM64/aarch64-neon-simd-vget.ll => AArch64/arm64-neon-simd-vget.ll} (100%) rename test/CodeGen/{ARM64/neon-v1i1-setcc.ll => AArch64/arm64-neon-v1i1-setcc.ll} (100%) rename test/CodeGen/{ARM64/aarch64-neon-vector-list-spill.ll => AArch64/arm64-neon-vector-list-spill.ll} (77%) rename test/CodeGen/{ARM64/patchpoint.ll => AArch64/arm64-patchpoint.ll} (100%) rename test/CodeGen/{ARM64/pic-local-symbol.ll => AArch64/arm64-pic-local-symbol.ll} (100%) rename test/CodeGen/{ARM64/platform-reg.ll => AArch64/arm64-platform-reg.ll} (100%) rename test/CodeGen/{ARM64/popcnt.ll => AArch64/arm64-popcnt.ll} (93%) rename test/CodeGen/{ARM64/prefetch.ll => AArch64/arm64-prefetch.ll} (100%) rename test/CodeGen/{ARM64/promote-const.ll => AArch64/arm64-promote-const.ll} (98%) rename test/CodeGen/{ARM64/redzone.ll => AArch64/arm64-redzone.ll} (88%) rename test/CodeGen/{ARM64/reg-copy-noneon.ll => AArch64/arm64-reg-copy-noneon.ll} (100%) rename test/CodeGen/{ARM64/register-offset-addressing.ll => AArch64/arm64-register-offset-addressing.ll} (100%) rename test/CodeGen/{ARM64/register-pairing.ll => AArch64/arm64-register-pairing.ll} (100%) rename test/CodeGen/{ARM64/regress-f128csel-flags.ll => AArch64/arm64-regress-f128csel-flags.ll} (100%) rename test/CodeGen/{ARM64/regress-interphase-shift.ll => AArch64/arm64-regress-interphase-shift.ll} (100%) rename test/CodeGen/{ARM64/return-vector.ll => AArch64/arm64-return-vector.ll} (100%) rename test/CodeGen/{ARM64/returnaddr.ll => AArch64/arm64-returnaddr.ll} (100%) rename test/CodeGen/{ARM64/rev.ll => AArch64/arm64-rev.ll} (99%) rename test/CodeGen/{ARM64/rounding.ll => AArch64/arm64-rounding.ll} (100%) rename test/CodeGen/{ARM64/scaled_iv.ll => AArch64/arm64-scaled_iv.ll} (100%) rename test/CodeGen/{ARM64/scvt.ll => AArch64/arm64-scvt.ll} (99%) rename test/CodeGen/{ARM64/shifted-sext.ll => AArch64/arm64-shifted-sext.ll} (100%) create mode 100644 test/CodeGen/AArch64/arm64-simd-scalar-to-vector.ll rename test/CodeGen/{ARM64/simplest-elf.ll => AArch64/arm64-simplest-elf.ll} (100%) rename test/CodeGen/{ARM64/sincos.ll => AArch64/arm64-sincos.ll} (100%) rename test/CodeGen/{ARM64/sitofp-combine-chains.ll => AArch64/arm64-sitofp-combine-chains.ll} (100%) rename test/CodeGen/{ARM64/sli-sri-opt.ll => AArch64/arm64-sli-sri-opt.ll} (95%) rename test/CodeGen/{ARM64/smaxv.ll => AArch64/arm64-smaxv.ll} (61%) rename test/CodeGen/{ARM64/sminv.ll => AArch64/arm64-sminv.ll} (61%) rename test/CodeGen/{ARM64/spill-lr.ll => AArch64/arm64-spill-lr.ll} (100%) rename test/CodeGen/{ARM64/spill.ll => AArch64/arm64-spill.ll} (88%) rename test/CodeGen/{ARM64/st1.ll => AArch64/arm64-st1.ll} (50%) rename test/CodeGen/{ARM64/stack-no-frame.ll => AArch64/arm64-stack-no-frame.ll} (100%) rename test/CodeGen/{ARM64/stackmap.ll => AArch64/arm64-stackmap.ll} (100%) rename test/CodeGen/{ARM64/stackpointer.ll => AArch64/arm64-stackpointer.ll} (100%) rename test/CodeGen/{ARM64/stacksave.ll => AArch64/arm64-stacksave.ll} (100%) rename test/CodeGen/{ARM64/stp.ll => AArch64/arm64-stp.ll} (94%) rename test/CodeGen/{ARM64/strict-align.ll => AArch64/arm64-strict-align.ll} (75%) rename test/CodeGen/{ARM64/stur.ll => AArch64/arm64-stur.ll} (96%) rename test/CodeGen/{ARM64/subsections.ll => AArch64/arm64-subsections.ll} (100%) rename test/CodeGen/{ARM64/subvector-extend.ll => AArch64/arm64-subvector-extend.ll} (97%) rename test/CodeGen/{ARM64/swizzle-tbl-i16-layout.ll => AArch64/arm64-swizzle-tbl-i16-layout.ll} (100%) create mode 100644 test/CodeGen/AArch64/arm64-tbl.ll rename test/CodeGen/{ARM64/this-return.ll => AArch64/arm64-this-return.ll} (100%) rename test/CodeGen/{ARM64/tls-darwin.ll => AArch64/arm64-tls-darwin.ll} (100%) rename test/CodeGen/{ARM64/tls-dynamic-together.ll => AArch64/arm64-tls-dynamic-together.ll} (100%) rename test/CodeGen/{ARM64/tls-dynamics.ll => AArch64/arm64-tls-dynamics.ll} (100%) rename test/CodeGen/{ARM64/tls-execs.ll => AArch64/arm64-tls-execs.ll} (100%) rename test/CodeGen/{ARM64/trap.ll => AArch64/arm64-trap.ll} (100%) rename test/CodeGen/{ARM64/trn.ll => AArch64/arm64-trn.ll} (98%) rename test/CodeGen/{ARM64/trunc-store.ll => AArch64/arm64-trunc-store.ll} (100%) rename test/CodeGen/{ARM64/umaxv.ll => AArch64/arm64-umaxv.ll} (75%) rename test/CodeGen/{ARM64/uminv.ll => AArch64/arm64-uminv.ll} (75%) rename test/CodeGen/{ARM64/umov.ll => AArch64/arm64-umov.ll} (90%) rename test/CodeGen/{ARM64/unaligned_ldst.ll => AArch64/arm64-unaligned_ldst.ll} (100%) rename test/CodeGen/{ARM64/uzp.ll => AArch64/arm64-uzp.ll} (98%) rename test/CodeGen/{ARM64/vaargs.ll => AArch64/arm64-vaargs.ll} (100%) rename test/CodeGen/{ARM64/vabs.ll => AArch64/arm64-vabs.ll} (68%) rename test/CodeGen/{ARM64/vadd.ll => AArch64/arm64-vadd.ll} (80%) rename test/CodeGen/{ARM64/vaddlv.ll => AArch64/arm64-vaddlv.ll} (55%) rename test/CodeGen/{ARM64/vaddv.ll => AArch64/arm64-vaddv.ll} (63%) rename test/CodeGen/{ARM64/variadic-aapcs.ll => AArch64/arm64-variadic-aapcs.ll} (100%) rename test/CodeGen/{ARM64/vbitwise.ll => AArch64/arm64-vbitwise.ll} (86%) rename test/CodeGen/{ARM64/vclz.ll => AArch64/arm64-vclz.ll} (98%) rename test/CodeGen/{ARM64/vcmp.ll => AArch64/arm64-vcmp.ll} (76%) create mode 100644 test/CodeGen/AArch64/arm64-vcnt.ll rename test/CodeGen/{ARM64/vcombine.ll => AArch64/arm64-vcombine.ll} (90%) rename test/CodeGen/{ARM64/vcvt.ll => AArch64/arm64-vcvt.ll} (67%) rename test/CodeGen/{ARM64/vcvt_f.ll => AArch64/arm64-vcvt_f.ll} (73%) rename test/CodeGen/{ARM64/vcvt_f32_su32.ll => AArch64/arm64-vcvt_f32_su32.ll} (75%) create mode 100644 test/CodeGen/AArch64/arm64-vcvt_n.ll rename test/CodeGen/{ARM64/vcvt_su32_f32.ll => AArch64/arm64-vcvt_su32_f32.ll} (91%) rename test/CodeGen/{ARM64/vcvtxd_f32_f64.ll => AArch64/arm64-vcvtxd_f32_f64.ll} (54%) rename test/CodeGen/{ARM64/vecCmpBr.ll => AArch64/arm64-vecCmpBr.ll} (87%) rename test/CodeGen/{ARM64/vecFold.ll => AArch64/arm64-vecFold.ll} (74%) rename test/CodeGen/{ARM64/vector-ext.ll => AArch64/arm64-vector-ext.ll} (81%) rename test/CodeGen/{ARM64/vector-imm.ll => AArch64/arm64-vector-imm.ll} (98%) rename test/CodeGen/{ARM64/vector-insertion.ll => AArch64/arm64-vector-insertion.ll} (91%) rename test/CodeGen/{ARM64/vector-ldst.ll => AArch64/arm64-vector-ldst.ll} (99%) rename test/CodeGen/{ARM64/vext.ll => AArch64/arm64-vext.ll} (99%) rename test/CodeGen/{ARM64/vext_reverse.ll => AArch64/arm64-vext_reverse.ll} (100%) rename test/CodeGen/{ARM64/vfloatintrinsics.ll => AArch64/arm64-vfloatintrinsics.ll} (99%) rename test/CodeGen/{ARM64/vhadd.ll => AArch64/arm64-vhadd.ll} (51%) rename test/CodeGen/{ARM64/vhsub.ll => AArch64/arm64-vhsub.ll} (50%) rename test/CodeGen/{ARM64/virtual_base.ll => AArch64/arm64-virtual_base.ll} (100%) rename test/CodeGen/{ARM64/vmax.ll => AArch64/arm64-vmax.ll} (52%) create mode 100644 test/CodeGen/AArch64/arm64-vminmaxnm.ll rename test/CodeGen/{ARM64/vmovn.ll => AArch64/arm64-vmovn.ll} (74%) rename test/CodeGen/{ARM64/vmul.ll => AArch64/arm64-vmul.ll} (80%) rename test/CodeGen/{ARM64/volatile.ll => AArch64/arm64-volatile.ll} (100%) rename test/CodeGen/{ARM64/vpopcnt.ll => AArch64/arm64-vpopcnt.ll} (100%) rename test/CodeGen/{ARM64/vqadd.ll => AArch64/arm64-vqadd.ll} (50%) create mode 100644 test/CodeGen/AArch64/arm64-vqsub.ll rename test/CodeGen/{ARM64/vselect.ll => AArch64/arm64-vselect.ll} (89%) rename test/CodeGen/{ARM64/vsetcc_fp.ll => AArch64/arm64-vsetcc_fp.ll} (80%) rename test/CodeGen/{ARM64/vshift.ll => AArch64/arm64-vshift.ll} (68%) rename test/CodeGen/{ARM64/vshr.ll => AArch64/arm64-vshr.ll} (95%) rename test/CodeGen/{ARM64/vshuffle.ll => AArch64/arm64-vshuffle.ll} (100%) rename test/CodeGen/{ARM64/vsqrt.ll => AArch64/arm64-vsqrt.ll} (51%) rename test/CodeGen/{ARM64/vsra.ll => AArch64/arm64-vsra.ll} (98%) rename test/CodeGen/{ARM64/vsub.ll => AArch64/arm64-vsub.ll} (85%) rename test/CodeGen/{ARM64/weak-reference.ll => AArch64/arm64-weak-reference.ll} (100%) rename test/CodeGen/{ARM64/xaluo.ll => AArch64/arm64-xaluo.ll} (100%) rename test/CodeGen/{ARM64/zero-cycle-regmov.ll => AArch64/arm64-zero-cycle-regmov.ll} (100%) rename test/CodeGen/{ARM64/zero-cycle-zeroing.ll => AArch64/arm64-zero-cycle-zeroing.ll} (100%) rename test/CodeGen/{ARM64/zext.ll => AArch64/arm64-zext.ll} (100%) rename test/CodeGen/{ARM64/zextload-unscaled.ll => AArch64/arm64-zextload-unscaled.ll} (100%) rename test/CodeGen/{ARM64/zip.ll => AArch64/arm64-zip.ll} (98%) rename test/CodeGen/{ARM64 => AArch64}/lit.local.cfg (89%) delete mode 100644 test/CodeGen/ARM64/compact-unwind-unhandled-cfi.S delete mode 100644 test/CodeGen/ARM64/fminv.ll delete mode 100644 test/CodeGen/ARM64/simd-scalar-to-vector.ll delete mode 100644 test/CodeGen/ARM64/tbl.ll delete mode 100644 test/CodeGen/ARM64/vcnt.ll delete mode 100644 test/CodeGen/ARM64/vcvt_n.ll delete mode 100644 test/CodeGen/ARM64/vminmaxnm.ll delete mode 100644 test/CodeGen/ARM64/vqsub.ll rename test/DebugInfo/{ARM64 => AArch64}/struct_by_value.ll (100%) rename test/MC/{ARM64/adr.s => AArch64/arm64-adr.s} (88%) rename test/MC/{ARM64/advsimd.s => AArch64/arm64-advsimd.s} (100%) rename test/MC/{ARM64/aliases.s => AArch64/arm64-aliases.s} (100%) rename test/MC/{ARM64/arithmetic-encoding.s => AArch64/arm64-arithmetic-encoding.s} (100%) rename test/MC/{ARM64/arm64-fixup.s => AArch64/arm64-arm64-fixup.s} (76%) rename test/MC/{ARM64/basic-a64-instructions.s => AArch64/arm64-basic-a64-instructions.s} (100%) rename test/MC/{ARM64/be-datalayout.s => AArch64/arm64-be-datalayout.s} (100%) rename test/MC/{ARM64/bitfield-encoding.s => AArch64/arm64-bitfield-encoding.s} (100%) rename test/MC/{ARM64/branch-encoding.s => AArch64/arm64-branch-encoding.s} (77%) rename test/MC/{ARM64/condbr-without-dots.s => AArch64/arm64-condbr-without-dots.s} (100%) rename test/MC/{ARM64/crypto.s => AArch64/arm64-crypto.s} (100%) rename test/MC/{ARM64/diagno-predicate.s => AArch64/arm64-diagno-predicate.s} (100%) rename test/MC/{ARM64/diags.s => AArch64/arm64-diags.s} (99%) rename test/MC/{ARM64/directive_loh.s => AArch64/arm64-directive_loh.s} (100%) rename test/MC/{ARM64/elf-reloc-condbr.s => AArch64/arm64-elf-reloc-condbr.s} (100%) rename test/MC/{ARM64/elf-relocs.s => AArch64/arm64-elf-relocs.s} (100%) rename test/MC/{ARM64/fp-encoding.s => AArch64/arm64-fp-encoding.s} (100%) rename test/MC/{ARM64/large-relocs.s => AArch64/arm64-large-relocs.s} (83%) rename test/MC/{ARM64/leaf-compact-unwind.s => AArch64/arm64-leaf-compact-unwind.s} (100%) rename test/MC/{ARM64/logical-encoding.s => AArch64/arm64-logical-encoding.s} (100%) rename test/MC/{ARM64/mapping-across-sections.s => AArch64/arm64-mapping-across-sections.s} (100%) rename test/MC/{ARM64/mapping-within-section.s => AArch64/arm64-mapping-within-section.s} (100%) rename test/MC/{ARM64/memory.s => AArch64/arm64-memory.s} (100%) rename test/MC/{ARM64/nv-cond.s => AArch64/arm64-nv-cond.s} (100%) rename test/MC/{ARM64/optional-hash.s => AArch64/arm64-optional-hash.s} (100%) rename test/MC/{ARM64/separator.s => AArch64/arm64-separator.s} (100%) rename test/MC/{ARM64/simd-ldst.s => AArch64/arm64-simd-ldst.s} (100%) rename test/MC/{ARM64/small-data-fixups.s => AArch64/arm64-small-data-fixups.s} (100%) rename test/MC/{ARM64/spsel-sysreg.s => AArch64/arm64-spsel-sysreg.s} (100%) rename test/MC/{ARM64/system-encoding.s => AArch64/arm64-system-encoding.s} (100%) rename test/MC/{ARM64/target-specific-sysreg.s => AArch64/arm64-target-specific-sysreg.s} (100%) rename test/MC/{ARM64/tls-modifiers-darwin.s => AArch64/arm64-tls-modifiers-darwin.s} (100%) rename test/MC/{ARM64/tls-relocs.s => AArch64/arm64-tls-relocs.s} (82%) rename test/MC/{ARM64/v128_lo-diagnostics.s => AArch64/arm64-v128_lo-diagnostics.s} (100%) rename test/MC/{ARM64/variable-exprs.s => AArch64/arm64-variable-exprs.s} (100%) rename test/MC/{ARM64/vector-lists.s => AArch64/arm64-vector-lists.s} (100%) rename test/MC/{ARM64/verbose-vector-case.s => AArch64/arm64-verbose-vector-case.s} (100%) delete mode 100644 test/MC/ARM64/lit.local.cfg rename test/MC/Disassembler/{ARM64/advsimd.txt => AArch64/arm64-advsimd.txt} (100%) rename test/MC/Disassembler/{ARM64/arithmetic.txt => AArch64/arm64-arithmetic.txt} (100%) rename test/MC/Disassembler/{ARM64/basic-a64-undefined.txt => AArch64/arm64-basic-a64-undefined.txt} (100%) rename test/MC/Disassembler/{ARM64/bitfield.txt => AArch64/arm64-bitfield.txt} (100%) rename test/MC/Disassembler/{ARM64/branch.txt => AArch64/arm64-branch.txt} (100%) rename test/MC/Disassembler/{ARM64/canonical-form.txt => AArch64/arm64-canonical-form.txt} (100%) rename test/MC/Disassembler/{ARM64/crc32.txt => AArch64/arm64-crc32.txt} (100%) rename test/MC/Disassembler/{ARM64/crypto.txt => AArch64/arm64-crypto.txt} (100%) rename test/MC/Disassembler/{ARM64/invalid-logical.txt => AArch64/arm64-invalid-logical.txt} (100%) rename test/MC/Disassembler/{ARM64/logical.txt => AArch64/arm64-logical.txt} (100%) rename test/MC/Disassembler/{ARM64/memory.txt => AArch64/arm64-memory.txt} (100%) rename test/MC/Disassembler/{ARM64/non-apple-fmov.txt => AArch64/arm64-non-apple-fmov.txt} (100%) rename test/MC/Disassembler/{ARM64/scalar-fp.txt => AArch64/arm64-scalar-fp.txt} (100%) rename test/MC/Disassembler/{ARM64/system.txt => AArch64/arm64-system.txt} (100%) delete mode 100644 test/MC/Disassembler/ARM64/lit.local.cfg rename test/MC/MachO/{ARM64 => AArch64}/darwin-ARM64-local-label-diff.s (100%) rename test/MC/MachO/{ARM64 => AArch64}/darwin-ARM64-reloc.s (100%) rename test/{Transforms/GlobalMerge/ARM64 => MC/MachO/AArch64}/lit.local.cfg (74%) rename test/Transforms/ConstantHoisting/{ARM64 => AArch64}/const-addr.ll (100%) rename test/Transforms/ConstantHoisting/{ARM64 => AArch64}/large-immediate.ll (100%) rename test/Transforms/ConstantHoisting/{ARM64 => AArch64}/lit.local.cfg (73%) rename test/Transforms/GlobalMerge/{ARM64 => AArch64}/arm64.ll (100%) rename test/{DebugInfo/ARM64 => Transforms/GlobalMerge/AArch64}/lit.local.cfg (74%) rename test/Transforms/LoopStrengthReduce/{ARM64 => AArch64}/lit.local.cfg (78%) rename test/Transforms/LoopStrengthReduce/{ARM64 => AArch64}/lsr-memcpy.ll (100%) rename test/Transforms/LoopStrengthReduce/{ARM64 => AArch64}/lsr-memset.ll (100%) rename test/Transforms/LoopStrengthReduce/{ARM64 => AArch64}/req-regs.ll (100%) rename test/Transforms/LoopVectorize/{ARM64 => AArch64}/arm64-unroll.ll (100%) rename test/Transforms/LoopVectorize/{ARM64 => AArch64}/gather-cost.ll (100%) delete mode 100644 test/Transforms/LoopVectorize/ARM64/lit.local.cfg rename test/{MC/MachO/ARM64 => Transforms/SLPVectorizer/AArch64}/lit.local.cfg (73%) rename test/Transforms/SLPVectorizer/{ARM64 => AArch64}/mismatched-intrinsics.ll (100%) delete mode 100644 test/Transforms/SLPVectorizer/ARM64/lit.local.cfg diff --git a/CMakeLists.txt b/CMakeLists.txt index b19ab0271ab9..0d6eead42f67 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,7 +127,7 @@ set(LLVM_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/include) set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name (32/64)" ) set(LLVM_ALL_TARGETS - ARM64 + AArch64 ARM CppBackend Hexagon @@ -143,7 +143,7 @@ set(LLVM_ALL_TARGETS ) # List of targets with JIT support: -set(LLVM_TARGETS_WITH_JIT X86 PowerPC ARM64 ARM Mips SystemZ) +set(LLVM_TARGETS_WITH_JIT X86 PowerPC AArch64 ARM Mips SystemZ) set(LLVM_TARGETS_TO_BUILD "all" CACHE STRING "Semicolon-separated list of targets to build, or \"all\".") diff --git a/autoconf/configure.ac b/autoconf/configure.ac index 344e66af65d7..08f756c92148 100644 --- a/autoconf/configure.ac +++ b/autoconf/configure.ac @@ -419,9 +419,9 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch], amd64-* | x86_64-*) llvm_cv_target_arch="x86_64" ;; sparc*-*) llvm_cv_target_arch="Sparc" ;; powerpc*-*) llvm_cv_target_arch="PowerPC" ;; - arm64*-*) llvm_cv_target_arch="ARM64" ;; + arm64*-*) llvm_cv_target_arch="AArch64" ;; arm*-*) llvm_cv_target_arch="ARM" ;; - aarch64*-*) llvm_cv_target_arch="ARM64" ;; + aarch64*-*) llvm_cv_target_arch="AArch64" ;; mips-* | mips64-*) llvm_cv_target_arch="Mips" ;; mipsel-* | mips64el-*) llvm_cv_target_arch="Mips" ;; xcore-*) llvm_cv_target_arch="XCore" ;; @@ -455,9 +455,9 @@ case $host in amd64-* | x86_64-*) host_arch="x86_64" ;; sparc*-*) host_arch="Sparc" ;; powerpc*-*) host_arch="PowerPC" ;; - arm64*-*) host_arch="ARM64" ;; + arm64*-*) host_arch="AArch64" ;; arm*-*) host_arch="ARM" ;; - aarch64*-*) host_arch="ARM64" ;; + aarch64*-*) host_arch="AArch64" ;; mips-* | mips64-*) host_arch="Mips" ;; mipsel-* | mips64el-*) host_arch="Mips" ;; xcore-*) host_arch="XCore" ;; @@ -796,7 +796,7 @@ else esac fi -TARGETS_WITH_JIT="ARM ARM64 Mips PowerPC SystemZ X86" +TARGETS_WITH_JIT="ARM AArch64 Mips PowerPC SystemZ X86" AC_SUBST(TARGETS_WITH_JIT,$TARGETS_WITH_JIT) dnl Allow enablement of building and installing docs @@ -949,7 +949,7 @@ if test "$llvm_cv_enable_crash_overrides" = "yes" ; then fi dnl List all possible targets -ALL_TARGETS="X86 Sparc PowerPC ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600" +ALL_TARGETS="X86 Sparc PowerPC ARM AArch64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600" AC_SUBST(ALL_TARGETS,$ALL_TARGETS) dnl Allow specific targets to be specified for building (or not) @@ -970,8 +970,8 @@ case "$enableval" in x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;; powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;; - aarch64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;; - arm64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;; + aarch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;; + arm64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;; arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;; mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; mipsel) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; @@ -989,7 +989,7 @@ case "$enableval" in x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; Sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;; PowerPC) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;; - AArch64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;; + AArch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;; ARM) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;; Mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index ca4af73d92c8..1325e790c803 100755 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -372,7 +372,7 @@ elseif (LLVM_NATIVE_ARCH MATCHES "powerpc") elseif (LLVM_NATIVE_ARCH MATCHES "aarch64") set(LLVM_NATIVE_ARCH AArch64) elseif (LLVM_NATIVE_ARCH MATCHES "arm64") - set(LLVM_NATIVE_ARCH ARM64) + set(LLVM_NATIVE_ARCH AArch64) elseif (LLVM_NATIVE_ARCH MATCHES "arm") set(LLVM_NATIVE_ARCH ARM) elseif (LLVM_NATIVE_ARCH MATCHES "mips") diff --git a/configure b/configure index a5babe9c2301..e1959dfee6c0 100755 --- a/configure +++ b/configure @@ -4151,9 +4151,9 @@ else amd64-* | x86_64-*) llvm_cv_target_arch="x86_64" ;; sparc*-*) llvm_cv_target_arch="Sparc" ;; powerpc*-*) llvm_cv_target_arch="PowerPC" ;; - arm64*-*) llvm_cv_target_arch="ARM64" ;; + arm64*-*) llvm_cv_target_arch="AArch64" ;; arm*-*) llvm_cv_target_arch="ARM" ;; - aarch64*-*) llvm_cv_target_arch="ARM64" ;; + aarch64*-*) llvm_cv_target_arch="AArch64" ;; mips-* | mips64-*) llvm_cv_target_arch="Mips" ;; mipsel-* | mips64el-*) llvm_cv_target_arch="Mips" ;; xcore-*) llvm_cv_target_arch="XCore" ;; @@ -4188,9 +4188,9 @@ case $host in amd64-* | x86_64-*) host_arch="x86_64" ;; sparc*-*) host_arch="Sparc" ;; powerpc*-*) host_arch="PowerPC" ;; - arm64*-*) host_arch="ARM64" ;; + arm64*-*) host_arch="AArch64" ;; arm*-*) host_arch="ARM" ;; - aarch64*-*) host_arch="ARM64" ;; + aarch64*-*) host_arch="AArch64" ;; mips-* | mips64-*) host_arch="Mips" ;; mipsel-* | mips64el-*) host_arch="Mips" ;; xcore-*) host_arch="XCore" ;; @@ -5120,7 +5120,7 @@ else esac fi -TARGETS_WITH_JIT="ARM ARM64 Mips PowerPC SystemZ X86" +TARGETS_WITH_JIT="ARM AArch64 Mips PowerPC SystemZ X86" TARGETS_WITH_JIT=$TARGETS_WITH_JIT @@ -5357,7 +5357,7 @@ _ACEOF fi -ALL_TARGETS="X86 Sparc PowerPC ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600" +ALL_TARGETS="X86 Sparc PowerPC ARM AArch64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600" ALL_TARGETS=$ALL_TARGETS @@ -5380,8 +5380,8 @@ case "$enableval" in x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;; powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;; - aarch64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;; - arm64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;; + aarch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;; + arm64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;; arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;; mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; mipsel) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; @@ -5399,7 +5399,7 @@ case "$enableval" in x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; Sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;; PowerPC) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;; - AArch64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;; + AArch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;; ARM) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;; Mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; diff --git a/docs/LangRef.rst b/docs/LangRef.rst index fa8d3c0b75fe..9b72eca7de5d 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -6877,7 +6877,7 @@ register in surrounding code, including inline assembly. Because of that, allocatable registers are not supported. Warning: So far it only works with the stack pointer on selected -architectures (ARM, ARM64, AArch64, PowerPC and x86_64). Significant amount of +architectures (ARM, AArch64, PowerPC and x86_64). Significant amount of work is needed to support other registers and even more so, allocatable registers. diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index b133b4e40962..edd1621ef25d 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -533,7 +533,7 @@ def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], include "llvm/IR/IntrinsicsPowerPC.td" include "llvm/IR/IntrinsicsX86.td" include "llvm/IR/IntrinsicsARM.td" -include "llvm/IR/IntrinsicsARM64.td" +include "llvm/IR/IntrinsicsAArch64.td" include "llvm/IR/IntrinsicsXCore.td" include "llvm/IR/IntrinsicsHexagon.td" include "llvm/IR/IntrinsicsNVVM.td" diff --git a/include/llvm/IR/IntrinsicsARM64.td b/include/llvm/IR/IntrinsicsAArch64.td similarity index 55% rename from include/llvm/IR/IntrinsicsARM64.td rename to include/llvm/IR/IntrinsicsAArch64.td index 146ea5d970cf..23757aaef5cc 100644 --- a/include/llvm/IR/IntrinsicsARM64.td +++ b/include/llvm/IR/IntrinsicsAArch64.td @@ -1,4 +1,4 @@ -//===- IntrinsicsARM64.td - Defines ARM64 intrinsics -------*- tablegen -*-===// +//===- IntrinsicsAARCH64.td - Defines AARCH64 intrinsics ---*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -7,36 +7,36 @@ // //===----------------------------------------------------------------------===// // -// This file defines all of the ARM64-specific intrinsics. +// This file defines all of the AARCH64-specific intrinsics. // //===----------------------------------------------------------------------===// -let TargetPrefix = "arm64" in { +let TargetPrefix = "aarch64" in { -def int_arm64_ldxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>; -def int_arm64_ldaxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>; -def int_arm64_stxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty]>; -def int_arm64_stlxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty]>; +def int_aarch64_ldxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>; +def int_aarch64_ldaxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>; +def int_aarch64_stxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty]>; +def int_aarch64_stlxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty]>; -def int_arm64_ldxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty]>; -def int_arm64_ldaxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty]>; -def int_arm64_stxp : Intrinsic<[llvm_i32_ty], +def int_aarch64_ldxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty]>; +def int_aarch64_ldaxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty]>; +def int_aarch64_stxp : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty]>; -def int_arm64_stlxp : Intrinsic<[llvm_i32_ty], +def int_aarch64_stlxp : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty]>; -def int_arm64_clrex : Intrinsic<[]>; +def int_aarch64_clrex : Intrinsic<[]>; -def int_arm64_sdiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, +def int_aarch64_sdiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; -def int_arm64_udiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, +def int_aarch64_udiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; } //===----------------------------------------------------------------------===// // Advanced SIMD (NEON) -let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.". +let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". class AdvSIMD_2Scalar_Float_Intrinsic : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; @@ -139,269 +139,269 @@ let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.". let Properties = [IntrNoMem] in { // Vector Add Across Lanes - def int_arm64_neon_saddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic; - def int_arm64_neon_uaddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic; - def int_arm64_neon_faddv : AdvSIMD_1VectorArg_Float_Across_Intrinsic; + def int_aarch64_neon_saddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic; + def int_aarch64_neon_uaddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic; + def int_aarch64_neon_faddv : AdvSIMD_1VectorArg_Float_Across_Intrinsic; // Vector Long Add Across Lanes - def int_arm64_neon_saddlv : AdvSIMD_1VectorArg_Int_Across_Intrinsic; - def int_arm64_neon_uaddlv : AdvSIMD_1VectorArg_Int_Across_Intrinsic; + def int_aarch64_neon_saddlv : AdvSIMD_1VectorArg_Int_Across_Intrinsic; + def int_aarch64_neon_uaddlv : AdvSIMD_1VectorArg_Int_Across_Intrinsic; // Vector Halving Add - def int_arm64_neon_shadd : AdvSIMD_2VectorArg_Intrinsic; - def int_arm64_neon_uhadd : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_shadd : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_uhadd : AdvSIMD_2VectorArg_Intrinsic; // Vector Rounding Halving Add - def int_arm64_neon_srhadd : AdvSIMD_2VectorArg_Intrinsic; - def int_arm64_neon_urhadd : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_srhadd : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_urhadd : AdvSIMD_2VectorArg_Intrinsic; // Vector Saturating Add - def int_arm64_neon_sqadd : AdvSIMD_2IntArg_Intrinsic; - def int_arm64_neon_suqadd : AdvSIMD_2IntArg_Intrinsic; - def int_arm64_neon_usqadd : AdvSIMD_2IntArg_Intrinsic; - def int_arm64_neon_uqadd : AdvSIMD_2IntArg_Intrinsic; + def int_aarch64_neon_sqadd : AdvSIMD_2IntArg_Intrinsic; + def int_aarch64_neon_suqadd : AdvSIMD_2IntArg_Intrinsic; + def int_aarch64_neon_usqadd : AdvSIMD_2IntArg_Intrinsic; + def int_aarch64_neon_uqadd : AdvSIMD_2IntArg_Intrinsic; // Vector Add High-Half // FIXME: this is a legacy intrinsic for aarch64_simd.h. Remove it when that // header is no longer supported. - def int_arm64_neon_addhn : AdvSIMD_2VectorArg_Narrow_Intrinsic; + def int_aarch64_neon_addhn : AdvSIMD_2VectorArg_Narrow_Intrinsic; // Vector Rounding Add High-Half - def int_arm64_neon_raddhn : AdvSIMD_2VectorArg_Narrow_Intrinsic; + def int_aarch64_neon_raddhn : AdvSIMD_2VectorArg_Narrow_Intrinsic; // Vector Saturating Doubling Multiply High - def int_arm64_neon_sqdmulh : AdvSIMD_2IntArg_Intrinsic; + def int_aarch64_neon_sqdmulh : AdvSIMD_2IntArg_Intrinsic; // Vector Saturating Rounding Doubling Multiply High - def int_arm64_neon_sqrdmulh : AdvSIMD_2IntArg_Intrinsic; + def int_aarch64_neon_sqrdmulh : AdvSIMD_2IntArg_Intrinsic; // Vector Polynominal Multiply - def int_arm64_neon_pmul : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_pmul : AdvSIMD_2VectorArg_Intrinsic; // Vector Long Multiply - def int_arm64_neon_smull : AdvSIMD_2VectorArg_Long_Intrinsic; - def int_arm64_neon_umull : AdvSIMD_2VectorArg_Long_Intrinsic; - def int_arm64_neon_pmull : AdvSIMD_2VectorArg_Long_Intrinsic; + def int_aarch64_neon_smull : AdvSIMD_2VectorArg_Long_Intrinsic; + def int_aarch64_neon_umull : AdvSIMD_2VectorArg_Long_Intrinsic; + def int_aarch64_neon_pmull : AdvSIMD_2VectorArg_Long_Intrinsic; // 64-bit polynomial multiply really returns an i128, which is not legal. Fake // it with a v16i8. - def int_arm64_neon_pmull64 : + def int_aarch64_neon_pmull64 : Intrinsic<[llvm_v16i8_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; // Vector Extending Multiply - def int_arm64_neon_fmulx : AdvSIMD_2FloatArg_Intrinsic { + def int_aarch64_neon_fmulx : AdvSIMD_2FloatArg_Intrinsic { let Properties = [IntrNoMem, Commutative]; } // Vector Saturating Doubling Long Multiply - def int_arm64_neon_sqdmull : AdvSIMD_2VectorArg_Long_Intrinsic; - def int_arm64_neon_sqdmulls_scalar + def int_aarch64_neon_sqdmull : AdvSIMD_2VectorArg_Long_Intrinsic; + def int_aarch64_neon_sqdmulls_scalar : Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; // Vector Halving Subtract - def int_arm64_neon_shsub : AdvSIMD_2VectorArg_Intrinsic; - def int_arm64_neon_uhsub : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_shsub : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_uhsub : AdvSIMD_2VectorArg_Intrinsic; // Vector Saturating Subtract - def int_arm64_neon_sqsub : AdvSIMD_2IntArg_Intrinsic; - def int_arm64_neon_uqsub : AdvSIMD_2IntArg_Intrinsic; + def int_aarch64_neon_sqsub : AdvSIMD_2IntArg_Intrinsic; + def int_aarch64_neon_uqsub : AdvSIMD_2IntArg_Intrinsic; // Vector Subtract High-Half // FIXME: this is a legacy intrinsic for aarch64_simd.h. Remove it when that // header is no longer supported. - def int_arm64_neon_subhn : AdvSIMD_2VectorArg_Narrow_Intrinsic; + def int_aarch64_neon_subhn : AdvSIMD_2VectorArg_Narrow_Intrinsic; // Vector Rounding Subtract High-Half - def int_arm64_neon_rsubhn : AdvSIMD_2VectorArg_Narrow_Intrinsic; + def int_aarch64_neon_rsubhn : AdvSIMD_2VectorArg_Narrow_Intrinsic; // Vector Compare Absolute Greater-than-or-equal - def int_arm64_neon_facge : AdvSIMD_2Arg_FloatCompare_Intrinsic; + def int_aarch64_neon_facge : AdvSIMD_2Arg_FloatCompare_Intrinsic; // Vector Compare Absolute Greater-than - def int_arm64_neon_facgt : AdvSIMD_2Arg_FloatCompare_Intrinsic; + def int_aarch64_neon_facgt : AdvSIMD_2Arg_FloatCompare_Intrinsic; // Vector Absolute Difference - def int_arm64_neon_sabd : AdvSIMD_2VectorArg_Intrinsic; - def int_arm64_neon_uabd : AdvSIMD_2VectorArg_Intrinsic; - def int_arm64_neon_fabd : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_sabd : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_uabd : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_fabd : AdvSIMD_2VectorArg_Intrinsic; // Scalar Absolute Difference - def int_arm64_sisd_fabd : AdvSIMD_2Scalar_Float_Intrinsic; + def int_aarch64_sisd_fabd : AdvSIMD_2Scalar_Float_Intrinsic; // Vector Max - def int_arm64_neon_smax : AdvSIMD_2VectorArg_Intrinsic; - def int_arm64_neon_umax : AdvSIMD_2VectorArg_Intrinsic; - def int_arm64_neon_fmax : AdvSIMD_2VectorArg_Intrinsic; - def int_arm64_neon_fmaxnmp : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_smax : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_umax : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_fmax : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_fmaxnmp : AdvSIMD_2VectorArg_Intrinsic; // Vector Max Across Lanes - def int_arm64_neon_smaxv : AdvSIMD_1VectorArg_Int_Across_Intrinsic; - def int_arm64_neon_umaxv : AdvSIMD_1VectorArg_Int_Across_Intrinsic; - def int_arm64_neon_fmaxv : AdvSIMD_1VectorArg_Float_Across_Intrinsic; - def int_arm64_neon_fmaxnmv : AdvSIMD_1VectorArg_Float_Across_Intrinsic; + def int_aarch64_neon_smaxv : AdvSIMD_1VectorArg_Int_Across_Intrinsic; + def int_aarch64_neon_umaxv : AdvSIMD_1VectorArg_Int_Across_Intrinsic; + def int_aarch64_neon_fmaxv : AdvSIMD_1VectorArg_Float_Across_Intrinsic; + def int_aarch64_neon_fmaxnmv : AdvSIMD_1VectorArg_Float_Across_Intrinsic; // Vector Min - def int_arm64_neon_smin : AdvSIMD_2VectorArg_Intrinsic; - def int_arm64_neon_umin : AdvSIMD_2VectorArg_Intrinsic; - def int_arm64_neon_fmin : AdvSIMD_2VectorArg_Intrinsic; - def int_arm64_neon_fminnmp : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_smin : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_umin : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_fmin : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_fminnmp : AdvSIMD_2VectorArg_Intrinsic; // Vector Min/Max Number - def int_arm64_neon_fminnm : AdvSIMD_2FloatArg_Intrinsic; - def int_arm64_neon_fmaxnm : AdvSIMD_2FloatArg_Intrinsic; + def int_aarch64_neon_fminnm : AdvSIMD_2FloatArg_Intrinsic; + def int_aarch64_neon_fmaxnm : AdvSIMD_2FloatArg_Intrinsic; // Vector Min Across Lanes - def int_arm64_neon_sminv : AdvSIMD_1VectorArg_Int_Across_Intrinsic; - def int_arm64_neon_uminv : AdvSIMD_1VectorArg_Int_Across_Intrinsic; - def int_arm64_neon_fminv : AdvSIMD_1VectorArg_Float_Across_Intrinsic; - def int_arm64_neon_fminnmv : AdvSIMD_1VectorArg_Float_Across_Intrinsic; + def int_aarch64_neon_sminv : AdvSIMD_1VectorArg_Int_Across_Intrinsic; + def int_aarch64_neon_uminv : AdvSIMD_1VectorArg_Int_Across_Intrinsic; + def int_aarch64_neon_fminv : AdvSIMD_1VectorArg_Float_Across_Intrinsic; + def int_aarch64_neon_fminnmv : AdvSIMD_1VectorArg_Float_Across_Intrinsic; // Pairwise Add - def int_arm64_neon_addp : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_addp : AdvSIMD_2VectorArg_Intrinsic; // Long Pairwise Add // FIXME: In theory, we shouldn't need intrinsics for saddlp or // uaddlp, but tblgen's type inference currently can't handle the // pattern fragments this ends up generating. - def int_arm64_neon_saddlp : AdvSIMD_1VectorArg_Expand_Intrinsic; - def int_arm64_neon_uaddlp : AdvSIMD_1VectorArg_Expand_Intrinsic; + def int_aarch64_neon_saddlp : AdvSIMD_1VectorArg_Expand_Intrinsic; + def int_aarch64_neon_uaddlp : AdvSIMD_1VectorArg_Expand_Intrinsic; // Folding Maximum - def int_arm64_neon_smaxp : AdvSIMD_2VectorArg_Intrinsic; - def int_arm64_neon_umaxp : AdvSIMD_2VectorArg_Intrinsic; - def int_arm64_neon_fmaxp : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_smaxp : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_umaxp : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_fmaxp : AdvSIMD_2VectorArg_Intrinsic; // Folding Minimum - def int_arm64_neon_sminp : AdvSIMD_2VectorArg_Intrinsic; - def int_arm64_neon_uminp : AdvSIMD_2VectorArg_Intrinsic; - def int_arm64_neon_fminp : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_sminp : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_uminp : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_fminp : AdvSIMD_2VectorArg_Intrinsic; // Reciprocal Estimate/Step - def int_arm64_neon_frecps : AdvSIMD_2FloatArg_Intrinsic; - def int_arm64_neon_frsqrts : AdvSIMD_2FloatArg_Intrinsic; + def int_aarch64_neon_frecps : AdvSIMD_2FloatArg_Intrinsic; + def int_aarch64_neon_frsqrts : AdvSIMD_2FloatArg_Intrinsic; // Reciprocal Exponent - def int_arm64_neon_frecpx : AdvSIMD_1FloatArg_Intrinsic; + def int_aarch64_neon_frecpx : AdvSIMD_1FloatArg_Intrinsic; // Vector Saturating Shift Left - def int_arm64_neon_sqshl : AdvSIMD_2IntArg_Intrinsic; - def int_arm64_neon_uqshl : AdvSIMD_2IntArg_Intrinsic; + def int_aarch64_neon_sqshl : AdvSIMD_2IntArg_Intrinsic; + def int_aarch64_neon_uqshl : AdvSIMD_2IntArg_Intrinsic; // Vector Rounding Shift Left - def int_arm64_neon_srshl : AdvSIMD_2IntArg_Intrinsic; - def int_arm64_neon_urshl : AdvSIMD_2IntArg_Intrinsic; + def int_aarch64_neon_srshl : AdvSIMD_2IntArg_Intrinsic; + def int_aarch64_neon_urshl : AdvSIMD_2IntArg_Intrinsic; // Vector Saturating Rounding Shift Left - def int_arm64_neon_sqrshl : AdvSIMD_2IntArg_Intrinsic; - def int_arm64_neon_uqrshl : AdvSIMD_2IntArg_Intrinsic; + def int_aarch64_neon_sqrshl : AdvSIMD_2IntArg_Intrinsic; + def int_aarch64_neon_uqrshl : AdvSIMD_2IntArg_Intrinsic; // Vector Signed->Unsigned Shift Left by Constant - def int_arm64_neon_sqshlu : AdvSIMD_2IntArg_Intrinsic; + def int_aarch64_neon_sqshlu : AdvSIMD_2IntArg_Intrinsic; // Vector Signed->Unsigned Narrowing Saturating Shift Right by Constant - def int_arm64_neon_sqshrun : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic; + def int_aarch64_neon_sqshrun : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic; // Vector Signed->Unsigned Rounding Narrowing Saturating Shift Right by Const - def int_arm64_neon_sqrshrun : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic; + def int_aarch64_neon_sqrshrun : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic; // Vector Narrowing Shift Right by Constant - def int_arm64_neon_sqshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic; - def int_arm64_neon_uqshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic; + def int_aarch64_neon_sqshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic; + def int_aarch64_neon_uqshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic; // Vector Rounding Narrowing Shift Right by Constant - def int_arm64_neon_rshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic; + def int_aarch64_neon_rshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic; // Vector Rounding Narrowing Saturating Shift Right by Constant - def int_arm64_neon_sqrshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic; - def int_arm64_neon_uqrshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic; + def int_aarch64_neon_sqrshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic; + def int_aarch64_neon_uqrshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic; // Vector Shift Left - def int_arm64_neon_sshl : AdvSIMD_2IntArg_Intrinsic; - def int_arm64_neon_ushl : AdvSIMD_2IntArg_Intrinsic; + def int_aarch64_neon_sshl : AdvSIMD_2IntArg_Intrinsic; + def int_aarch64_neon_ushl : AdvSIMD_2IntArg_Intrinsic; // Vector Widening Shift Left by Constant - def int_arm64_neon_shll : AdvSIMD_2VectorArg_Scalar_Wide_BySize_Intrinsic; - def int_arm64_neon_sshll : AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic; - def int_arm64_neon_ushll : AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic; + def int_aarch64_neon_shll : AdvSIMD_2VectorArg_Scalar_Wide_BySize_Intrinsic; + def int_aarch64_neon_sshll : AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic; + def int_aarch64_neon_ushll : AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic; // Vector Shift Right by Constant and Insert - def int_arm64_neon_vsri : AdvSIMD_3VectorArg_Scalar_Intrinsic; + def int_aarch64_neon_vsri : AdvSIMD_3VectorArg_Scalar_Intrinsic; // Vector Shift Left by Constant and Insert - def int_arm64_neon_vsli : AdvSIMD_3VectorArg_Scalar_Intrinsic; + def int_aarch64_neon_vsli : AdvSIMD_3VectorArg_Scalar_Intrinsic; // Vector Saturating Narrow - def int_arm64_neon_scalar_sqxtn: AdvSIMD_1IntArg_Narrow_Intrinsic; - def int_arm64_neon_scalar_uqxtn : AdvSIMD_1IntArg_Narrow_Intrinsic; - def int_arm64_neon_sqxtn : AdvSIMD_1VectorArg_Narrow_Intrinsic; - def int_arm64_neon_uqxtn : AdvSIMD_1VectorArg_Narrow_Intrinsic; + def int_aarch64_neon_scalar_sqxtn: AdvSIMD_1IntArg_Narrow_Intrinsic; + def int_aarch64_neon_scalar_uqxtn : AdvSIMD_1IntArg_Narrow_Intrinsic; + def int_aarch64_neon_sqxtn : AdvSIMD_1VectorArg_Narrow_Intrinsic; + def int_aarch64_neon_uqxtn : AdvSIMD_1VectorArg_Narrow_Intrinsic; // Vector Saturating Extract and Unsigned Narrow - def int_arm64_neon_scalar_sqxtun : AdvSIMD_1IntArg_Narrow_Intrinsic; - def int_arm64_neon_sqxtun : AdvSIMD_1VectorArg_Narrow_Intrinsic; + def int_aarch64_neon_scalar_sqxtun : AdvSIMD_1IntArg_Narrow_Intrinsic; + def int_aarch64_neon_sqxtun : AdvSIMD_1VectorArg_Narrow_Intrinsic; // Vector Absolute Value - def int_arm64_neon_abs : AdvSIMD_1IntArg_Intrinsic; + def int_aarch64_neon_abs : AdvSIMD_1IntArg_Intrinsic; // Vector Saturating Absolute Value - def int_arm64_neon_sqabs : AdvSIMD_1IntArg_Intrinsic; + def int_aarch64_neon_sqabs : AdvSIMD_1IntArg_Intrinsic; // Vector Saturating Negation - def int_arm64_neon_sqneg : AdvSIMD_1IntArg_Intrinsic; + def int_aarch64_neon_sqneg : AdvSIMD_1IntArg_Intrinsic; // Vector Count Leading Sign Bits - def int_arm64_neon_cls : AdvSIMD_1VectorArg_Intrinsic; + def int_aarch64_neon_cls : AdvSIMD_1VectorArg_Intrinsic; // Vector Reciprocal Estimate - def int_arm64_neon_urecpe : AdvSIMD_1VectorArg_Intrinsic; - def int_arm64_neon_frecpe : AdvSIMD_1FloatArg_Intrinsic; + def int_aarch64_neon_urecpe : AdvSIMD_1VectorArg_Intrinsic; + def int_aarch64_neon_frecpe : AdvSIMD_1FloatArg_Intrinsic; // Vector Square Root Estimate - def int_arm64_neon_ursqrte : AdvSIMD_1VectorArg_Intrinsic; - def int_arm64_neon_frsqrte : AdvSIMD_1FloatArg_Intrinsic; + def int_aarch64_neon_ursqrte : AdvSIMD_1VectorArg_Intrinsic; + def int_aarch64_neon_frsqrte : AdvSIMD_1FloatArg_Intrinsic; // Vector Bitwise Reverse - def int_arm64_neon_rbit : AdvSIMD_1VectorArg_Intrinsic; + def int_aarch64_neon_rbit : AdvSIMD_1VectorArg_Intrinsic; // Vector Conversions Between Half-Precision and Single-Precision. - def int_arm64_neon_vcvtfp2hf + def int_aarch64_neon_vcvtfp2hf : Intrinsic<[llvm_v4i16_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_arm64_neon_vcvthf2fp + def int_aarch64_neon_vcvthf2fp : Intrinsic<[llvm_v4f32_ty], [llvm_v4i16_ty], [IntrNoMem]>; // Vector Conversions Between Floating-point and Fixed-point. - def int_arm64_neon_vcvtfp2fxs : AdvSIMD_CvtFPToFx_Intrinsic; - def int_arm64_neon_vcvtfp2fxu : AdvSIMD_CvtFPToFx_Intrinsic; - def int_arm64_neon_vcvtfxs2fp : AdvSIMD_CvtFxToFP_Intrinsic; - def int_arm64_neon_vcvtfxu2fp : AdvSIMD_CvtFxToFP_Intrinsic; + def int_aarch64_neon_vcvtfp2fxs : AdvSIMD_CvtFPToFx_Intrinsic; + def int_aarch64_neon_vcvtfp2fxu : AdvSIMD_CvtFPToFx_Intrinsic; + def int_aarch64_neon_vcvtfxs2fp : AdvSIMD_CvtFxToFP_Intrinsic; + def int_aarch64_neon_vcvtfxu2fp : AdvSIMD_CvtFxToFP_Intrinsic; // Vector FP->Int Conversions - def int_arm64_neon_fcvtas : AdvSIMD_FPToIntRounding_Intrinsic; - def int_arm64_neon_fcvtau : AdvSIMD_FPToIntRounding_Intrinsic; - def int_arm64_neon_fcvtms : AdvSIMD_FPToIntRounding_Intrinsic; - def int_arm64_neon_fcvtmu : AdvSIMD_FPToIntRounding_Intrinsic; - def int_arm64_neon_fcvtns : AdvSIMD_FPToIntRounding_Intrinsic; - def int_arm64_neon_fcvtnu : AdvSIMD_FPToIntRounding_Intrinsic; - def int_arm64_neon_fcvtps : AdvSIMD_FPToIntRounding_Intrinsic; - def int_arm64_neon_fcvtpu : AdvSIMD_FPToIntRounding_Intrinsic; - def int_arm64_neon_fcvtzs : AdvSIMD_FPToIntRounding_Intrinsic; - def int_arm64_neon_fcvtzu : AdvSIMD_FPToIntRounding_Intrinsic; + def int_aarch64_neon_fcvtas : AdvSIMD_FPToIntRounding_Intrinsic; + def int_aarch64_neon_fcvtau : AdvSIMD_FPToIntRounding_Intrinsic; + def int_aarch64_neon_fcvtms : AdvSIMD_FPToIntRounding_Intrinsic; + def int_aarch64_neon_fcvtmu : AdvSIMD_FPToIntRounding_Intrinsic; + def int_aarch64_neon_fcvtns : AdvSIMD_FPToIntRounding_Intrinsic; + def int_aarch64_neon_fcvtnu : AdvSIMD_FPToIntRounding_Intrinsic; + def int_aarch64_neon_fcvtps : AdvSIMD_FPToIntRounding_Intrinsic; + def int_aarch64_neon_fcvtpu : AdvSIMD_FPToIntRounding_Intrinsic; + def int_aarch64_neon_fcvtzs : AdvSIMD_FPToIntRounding_Intrinsic; + def int_aarch64_neon_fcvtzu : AdvSIMD_FPToIntRounding_Intrinsic; // Vector FP Rounding: only ties to even is unrepresented by a normal // intrinsic. - def int_arm64_neon_frintn : AdvSIMD_1FloatArg_Intrinsic; + def int_aarch64_neon_frintn : AdvSIMD_1FloatArg_Intrinsic; // Scalar FP->Int conversions // Vector FP Inexact Narrowing - def int_arm64_neon_fcvtxn : AdvSIMD_1VectorArg_Expand_Intrinsic; + def int_aarch64_neon_fcvtxn : AdvSIMD_1VectorArg_Expand_Intrinsic; // Scalar FP Inexact Narrowing - def int_arm64_sisd_fcvtxn : Intrinsic<[llvm_float_ty], [llvm_double_ty], + def int_aarch64_sisd_fcvtxn : Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>; } -let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.". +let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". class AdvSIMD_2Vector2Index_Intrinsic : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i64_ty, LLVMMatchType<0>, llvm_i64_ty], @@ -409,9 +409,9 @@ let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.". } // Vector element to element moves -def int_arm64_neon_vcopy_lane: AdvSIMD_2Vector2Index_Intrinsic; +def int_aarch64_neon_vcopy_lane: AdvSIMD_2Vector2Index_Intrinsic; -let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.". +let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". class AdvSIMD_1Vec_Load_Intrinsic : Intrinsic<[llvm_anyvector_ty], [LLVMAnyPointerType>], [IntrReadArgMem]>; @@ -482,35 +482,35 @@ let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.". // Memory ops -def int_arm64_neon_ld1x2 : AdvSIMD_2Vec_Load_Intrinsic; -def int_arm64_neon_ld1x3 : AdvSIMD_3Vec_Load_Intrinsic; -def int_arm64_neon_ld1x4 : AdvSIMD_4Vec_Load_Intrinsic; +def int_aarch64_neon_ld1x2 : AdvSIMD_2Vec_Load_Intrinsic; +def int_aarch64_neon_ld1x3 : AdvSIMD_3Vec_Load_Intrinsic; +def int_aarch64_neon_ld1x4 : AdvSIMD_4Vec_Load_Intrinsic; -def int_arm64_neon_st1x2 : AdvSIMD_2Vec_Store_Intrinsic; -def int_arm64_neon_st1x3 : AdvSIMD_3Vec_Store_Intrinsic; -def int_arm64_neon_st1x4 : AdvSIMD_4Vec_Store_Intrinsic; +def int_aarch64_neon_st1x2 : AdvSIMD_2Vec_Store_Intrinsic; +def int_aarch64_neon_st1x3 : AdvSIMD_3Vec_Store_Intrinsic; +def int_aarch64_neon_st1x4 : AdvSIMD_4Vec_Store_Intrinsic; -def int_arm64_neon_ld2 : AdvSIMD_2Vec_Load_Intrinsic; -def int_arm64_neon_ld3 : AdvSIMD_3Vec_Load_Intrinsic; -def int_arm64_neon_ld4 : AdvSIMD_4Vec_Load_Intrinsic; +def int_aarch64_neon_ld2 : AdvSIMD_2Vec_Load_Intrinsic; +def int_aarch64_neon_ld3 : AdvSIMD_3Vec_Load_Intrinsic; +def int_aarch64_neon_ld4 : AdvSIMD_4Vec_Load_Intrinsic; -def int_arm64_neon_ld2lane : AdvSIMD_2Vec_Load_Lane_Intrinsic; -def int_arm64_neon_ld3lane : AdvSIMD_3Vec_Load_Lane_Intrinsic; -def int_arm64_neon_ld4lane : AdvSIMD_4Vec_Load_Lane_Intrinsic; +def int_aarch64_neon_ld2lane : AdvSIMD_2Vec_Load_Lane_Intrinsic; +def int_aarch64_neon_ld3lane : AdvSIMD_3Vec_Load_Lane_Intrinsic; +def int_aarch64_neon_ld4lane : AdvSIMD_4Vec_Load_Lane_Intrinsic; -def int_arm64_neon_ld2r : AdvSIMD_2Vec_Load_Intrinsic; -def int_arm64_neon_ld3r : AdvSIMD_3Vec_Load_Intrinsic; -def int_arm64_neon_ld4r : AdvSIMD_4Vec_Load_Intrinsic; +def int_aarch64_neon_ld2r : AdvSIMD_2Vec_Load_Intrinsic; +def int_aarch64_neon_ld3r : AdvSIMD_3Vec_Load_Intrinsic; +def int_aarch64_neon_ld4r : AdvSIMD_4Vec_Load_Intrinsic; -def int_arm64_neon_st2 : AdvSIMD_2Vec_Store_Intrinsic; -def int_arm64_neon_st3 : AdvSIMD_3Vec_Store_Intrinsic; -def int_arm64_neon_st4 : AdvSIMD_4Vec_Store_Intrinsic; +def int_aarch64_neon_st2 : AdvSIMD_2Vec_Store_Intrinsic; +def int_aarch64_neon_st3 : AdvSIMD_3Vec_Store_Intrinsic; +def int_aarch64_neon_st4 : AdvSIMD_4Vec_Store_Intrinsic; -def int_arm64_neon_st2lane : AdvSIMD_2Vec_Store_Lane_Intrinsic; -def int_arm64_neon_st3lane : AdvSIMD_3Vec_Store_Lane_Intrinsic; -def int_arm64_neon_st4lane : AdvSIMD_4Vec_Store_Lane_Intrinsic; +def int_aarch64_neon_st2lane : AdvSIMD_2Vec_Store_Lane_Intrinsic; +def int_aarch64_neon_st3lane : AdvSIMD_3Vec_Store_Lane_Intrinsic; +def int_aarch64_neon_st4lane : AdvSIMD_4Vec_Store_Lane_Intrinsic; -let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.". +let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". class AdvSIMD_Tbl1_Intrinsic : Intrinsic<[llvm_anyvector_ty], [llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>; @@ -548,17 +548,17 @@ let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.". llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>; } -def int_arm64_neon_tbl1 : AdvSIMD_Tbl1_Intrinsic; -def int_arm64_neon_tbl2 : AdvSIMD_Tbl2_Intrinsic; -def int_arm64_neon_tbl3 : AdvSIMD_Tbl3_Intrinsic; -def int_arm64_neon_tbl4 : AdvSIMD_Tbl4_Intrinsic; +def int_aarch64_neon_tbl1 : AdvSIMD_Tbl1_Intrinsic; +def int_aarch64_neon_tbl2 : AdvSIMD_Tbl2_Intrinsic; +def int_aarch64_neon_tbl3 : AdvSIMD_Tbl3_Intrinsic; +def int_aarch64_neon_tbl4 : AdvSIMD_Tbl4_Intrinsic; -def int_arm64_neon_tbx1 : AdvSIMD_Tbx1_Intrinsic; -def int_arm64_neon_tbx2 : AdvSIMD_Tbx2_Intrinsic; -def int_arm64_neon_tbx3 : AdvSIMD_Tbx3_Intrinsic; -def int_arm64_neon_tbx4 : AdvSIMD_Tbx4_Intrinsic; +def int_aarch64_neon_tbx1 : AdvSIMD_Tbx1_Intrinsic; +def int_aarch64_neon_tbx2 : AdvSIMD_Tbx2_Intrinsic; +def int_aarch64_neon_tbx3 : AdvSIMD_Tbx3_Intrinsic; +def int_aarch64_neon_tbx4 : AdvSIMD_Tbx4_Intrinsic; -let TargetPrefix = "arm64" in { +let TargetPrefix = "aarch64" in { class Crypto_AES_DataKey_Intrinsic : Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; @@ -592,45 +592,45 @@ let TargetPrefix = "arm64" in { } // AES -def int_arm64_crypto_aese : Crypto_AES_DataKey_Intrinsic; -def int_arm64_crypto_aesd : Crypto_AES_DataKey_Intrinsic; -def int_arm64_crypto_aesmc : Crypto_AES_Data_Intrinsic; -def int_arm64_crypto_aesimc : Crypto_AES_Data_Intrinsic; +def int_aarch64_crypto_aese : Crypto_AES_DataKey_Intrinsic; +def int_aarch64_crypto_aesd : Crypto_AES_DataKey_Intrinsic; +def int_aarch64_crypto_aesmc : Crypto_AES_Data_Intrinsic; +def int_aarch64_crypto_aesimc : Crypto_AES_Data_Intrinsic; // SHA1 -def int_arm64_crypto_sha1c : Crypto_SHA_5Hash4Schedule_Intrinsic; -def int_arm64_crypto_sha1p : Crypto_SHA_5Hash4Schedule_Intrinsic; -def int_arm64_crypto_sha1m : Crypto_SHA_5Hash4Schedule_Intrinsic; -def int_arm64_crypto_sha1h : Crypto_SHA_1Hash_Intrinsic; +def int_aarch64_crypto_sha1c : Crypto_SHA_5Hash4Schedule_Intrinsic; +def int_aarch64_crypto_sha1p : Crypto_SHA_5Hash4Schedule_Intrinsic; +def int_aarch64_crypto_sha1m : Crypto_SHA_5Hash4Schedule_Intrinsic; +def int_aarch64_crypto_sha1h : Crypto_SHA_1Hash_Intrinsic; -def int_arm64_crypto_sha1su0 : Crypto_SHA_12Schedule_Intrinsic; -def int_arm64_crypto_sha1su1 : Crypto_SHA_8Schedule_Intrinsic; +def int_aarch64_crypto_sha1su0 : Crypto_SHA_12Schedule_Intrinsic; +def int_aarch64_crypto_sha1su1 : Crypto_SHA_8Schedule_Intrinsic; // SHA256 -def int_arm64_crypto_sha256h : Crypto_SHA_8Hash4Schedule_Intrinsic; -def int_arm64_crypto_sha256h2 : Crypto_SHA_8Hash4Schedule_Intrinsic; -def int_arm64_crypto_sha256su0 : Crypto_SHA_8Schedule_Intrinsic; -def int_arm64_crypto_sha256su1 : Crypto_SHA_12Schedule_Intrinsic; +def int_aarch64_crypto_sha256h : Crypto_SHA_8Hash4Schedule_Intrinsic; +def int_aarch64_crypto_sha256h2 : Crypto_SHA_8Hash4Schedule_Intrinsic; +def int_aarch64_crypto_sha256su0 : Crypto_SHA_8Schedule_Intrinsic; +def int_aarch64_crypto_sha256su1 : Crypto_SHA_12Schedule_Intrinsic; //===----------------------------------------------------------------------===// // CRC32 -let TargetPrefix = "arm64" in { +let TargetPrefix = "aarch64" in { -def int_arm64_crc32b : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], +def int_aarch64_crc32b : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm64_crc32cb : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], +def int_aarch64_crc32cb : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm64_crc32h : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], +def int_aarch64_crc32h : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm64_crc32ch : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], +def int_aarch64_crc32ch : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm64_crc32w : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], +def int_aarch64_crc32w : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm64_crc32cw : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], +def int_aarch64_crc32cw : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_arm64_crc32x : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty], +def int_aarch64_crc32x : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>; -def int_arm64_crc32cx : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty], +def int_aarch64_crc32cx : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>; } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp index a70b03d95cf8..2b425fbdd339 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -168,8 +168,9 @@ void RuntimeDyldMachO::resolveRelocation(const RelocationEntry &RE, case Triple::thumb: resolveARMRelocation(RE, Value); break; + case Triple::aarch64: case Triple::arm64: - resolveARM64Relocation(RE, Value); + resolveAArch64Relocation(RE, Value); break; } } @@ -289,8 +290,8 @@ bool RuntimeDyldMachO::resolveARMRelocation(const RelocationEntry &RE, return false; } -bool RuntimeDyldMachO::resolveARM64Relocation(const RelocationEntry &RE, - uint64_t Value) { +bool RuntimeDyldMachO::resolveAArch64Relocation(const RelocationEntry &RE, + uint64_t Value) { const SectionEntry &Section = Sections[RE.SectionID]; uint8_t* LocalAddress = Section.Address + RE.Offset; diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h index 08573eed5c87..060eb8c29a2b 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h @@ -41,7 +41,7 @@ class RuntimeDyldMachO : public RuntimeDyldImpl { bool resolveI386Relocation(const RelocationEntry &RE, uint64_t Value); bool resolveX86_64Relocation(const RelocationEntry &RE, uint64_t Value); bool resolveARMRelocation(const RelocationEntry &RE, uint64_t Value); - bool resolveARM64Relocation(const RelocationEntry &RE, uint64_t Value); + bool resolveAArch64Relocation(const RelocationEntry &RE, uint64_t Value); // Populate stubs in __jump_table section. void populateJumpTable(MachOObjectFile &Obj, const SectionRef &JTSection, diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp index 028c19127170..99236bd24eaa 100644 --- a/lib/LTO/LTOCodeGenerator.cpp +++ b/lib/LTO/LTOCodeGenerator.cpp @@ -312,7 +312,8 @@ bool LTOCodeGenerator::determineTarget(std::string &errMsg) { MCpu = "core2"; else if (Triple.getArch() == llvm::Triple::x86) MCpu = "yonah"; - else if (Triple.getArch() == llvm::Triple::arm64) + else if (Triple.getArch() == llvm::Triple::arm64 || + Triple.getArch() == llvm::Triple::aarch64) MCpu = "cyclone"; } diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp index 255951a70706..d1175142651b 100644 --- a/lib/LTO/LTOModule.cpp +++ b/lib/LTO/LTOModule.cpp @@ -168,7 +168,8 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer, CPU = "core2"; else if (Triple.getArch() == llvm::Triple::x86) CPU = "yonah"; - else if (Triple.getArch() == llvm::Triple::arm64) + else if (Triple.getArch() == llvm::Triple::arm64 || + Triple.getArch() == llvm::Triple::aarch64) CPU = "cyclone"; } diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index bb1327995047..9d413afe5db1 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -23,7 +23,8 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { IsFunctionEHFrameSymbolPrivate = false; SupportsWeakOmittedEHFrame = false; - if (T.isOSDarwin() && T.getArch() == Triple::arm64) + if (T.isOSDarwin() && + (T.getArch() == Triple::arm64 || T.getArch() == Triple::aarch64)) SupportsCompactUnwindWithoutEHFrame = true; PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel @@ -151,7 +152,8 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { COFFDebugSymbolsSection = nullptr; if ((T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) || - (T.isOSDarwin() && T.getArch() == Triple::arm64)) { + (T.isOSDarwin() && + (T.getArch() == Triple::arm64 || T.getArch() == Triple::aarch64))) { CompactUnwindSection = Ctx->getMachOSection("__LD", "__compact_unwind", MachO::S_ATTR_DEBUG, @@ -159,7 +161,7 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86) CompactUnwindDwarfEHFrameOnly = 0x04000000; - else if (T.getArch() == Triple::arm64) + else if (T.getArch() == Triple::arm64 || T.getArch() == Triple::aarch64) CompactUnwindDwarfEHFrameOnly = 0x03000000; } @@ -785,7 +787,7 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm, // cellspu-apple-darwin. Perhaps we should fix in Triple? if ((Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm || Arch == Triple::thumb || - Arch == Triple::arm64 || + Arch == Triple::arm64 || Arch == Triple::aarch64 || Arch == Triple::ppc || Arch == Triple::ppc64 || Arch == Triple::UnknownArch) && (T.isOSDarwin() || T.isOSBinFormatMachO())) { diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h new file mode 100644 index 000000000000..1c022aaf86bd --- /dev/null +++ b/lib/Target/AArch64/AArch64.h @@ -0,0 +1,49 @@ +//==-- AArch64.h - Top-level interface for AArch64 --------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// AArch64 back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef TARGET_AArch64_H +#define TARGET_AArch64_H + +#include "Utils/AArch64BaseInfo.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/DataTypes.h" + +namespace llvm { + +class AArch64TargetMachine; +class FunctionPass; +class MachineFunctionPass; + +FunctionPass *createAArch64DeadRegisterDefinitions(); +FunctionPass *createAArch64ConditionalCompares(); +FunctionPass *createAArch64AdvSIMDScalar(); +FunctionPass *createAArch64BranchRelaxation(); +FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM, + CodeGenOpt::Level OptLevel); +FunctionPass *createAArch64StorePairSuppressPass(); +FunctionPass *createAArch64ExpandPseudoPass(); +FunctionPass *createAArch64LoadStoreOptimizationPass(); +ModulePass *createAArch64PromoteConstantPass(); +FunctionPass *createAArch64AddressTypePromotionPass(); +/// \brief Creates an ARM-specific Target Transformation Info pass. +ImmutablePass * +createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM); + +FunctionPass *createAArch64CleanupLocalDynamicTLSPass(); + +FunctionPass *createAArch64CollectLOHPass(); +} // end namespace llvm + +#endif diff --git a/lib/Target/ARM64/ARM64.td b/lib/Target/AArch64/AArch64.td similarity index 90% rename from lib/Target/ARM64/ARM64.td rename to lib/Target/AArch64/AArch64.td index c473205f17ca..1ad5ac8c6f38 100644 --- a/lib/Target/ARM64/ARM64.td +++ b/lib/Target/AArch64/AArch64.td @@ -1,4 +1,4 @@ -//===- ARM64.td - Describe the ARM64 Target Machine --------*- tablegen -*-===// +//=- AArch64.td - Describe the AArch64 Target Machine --------*- tablegen -*-=// // // The LLVM Compiler Infrastructure // @@ -17,7 +17,7 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// -// ARM64 Subtarget features. +// AArch64 Subtarget features. // def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true", @@ -44,23 +44,23 @@ def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", // Register File Description //===----------------------------------------------------------------------===// -include "ARM64RegisterInfo.td" -include "ARM64CallingConvention.td" +include "AArch64RegisterInfo.td" +include "AArch64CallingConvention.td" //===----------------------------------------------------------------------===// // Instruction Descriptions //===----------------------------------------------------------------------===// -include "ARM64Schedule.td" -include "ARM64InstrInfo.td" +include "AArch64Schedule.td" +include "AArch64InstrInfo.td" -def ARM64InstrInfo : InstrInfo; +def AArch64InstrInfo : InstrInfo; //===----------------------------------------------------------------------===// -// ARM64 Processors supported. +// AArch64 Processors supported. // -include "ARM64SchedA53.td" -include "ARM64SchedCyclone.td" +include "AArch64SchedA53.td" +include "AArch64SchedCyclone.td" def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", "Cortex-A53 ARM processors", @@ -109,7 +109,7 @@ def AppleAsmParserVariant : AsmParserVariant { //===----------------------------------------------------------------------===// // Assembly printer //===----------------------------------------------------------------------===// -// ARM64 Uses the MC printer for asm output, so make sure the TableGen +// AArch64 Uses the MC printer for asm output, so make sure the TableGen // AsmWriter bits get associated with the correct class. def GenericAsmWriter : AsmWriter { string AsmWriterClassName = "InstPrinter"; @@ -127,8 +127,8 @@ def AppleAsmWriter : AsmWriter { // Target Declaration //===----------------------------------------------------------------------===// -def ARM64 : Target { - let InstructionSet = ARM64InstrInfo; +def AArch64 : Target { + let InstructionSet = AArch64InstrInfo; let AssemblyParserVariants = [GenericAsmParserVariant, AppleAsmParserVariant]; let AssemblyWriters = [GenericAsmWriter, AppleAsmWriter]; } diff --git a/lib/Target/ARM64/ARM64AddressTypePromotion.cpp b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp similarity index 90% rename from lib/Target/ARM64/ARM64AddressTypePromotion.cpp rename to lib/Target/AArch64/AArch64AddressTypePromotion.cpp index be2b5eed2ad2..04906f6078f8 100644 --- a/lib/Target/ARM64/ARM64AddressTypePromotion.cpp +++ b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp @@ -1,5 +1,4 @@ - -//===-- ARM64AddressTypePromotion.cpp --- Promote type for addr accesses -===// +//===-- AArch64AddressTypePromotion.cpp --- Promote type for addr accesses -==// // // The LLVM Compiler Infrastructure // @@ -29,7 +28,7 @@ // FIXME: This pass may be useful for other targets too. // ===---------------------------------------------------------------------===// -#include "ARM64.h" +#include "AArch64.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -45,38 +44,38 @@ using namespace llvm; -#define DEBUG_TYPE "arm64-type-promotion" +#define DEBUG_TYPE "aarch64-type-promotion" static cl::opt -EnableAddressTypePromotion("arm64-type-promotion", cl::Hidden, +EnableAddressTypePromotion("aarch64-type-promotion", cl::Hidden, cl::desc("Enable the type promotion pass"), cl::init(true)); static cl::opt -EnableMerge("arm64-type-promotion-merge", cl::Hidden, +EnableMerge("aarch64-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true)); //===----------------------------------------------------------------------===// -// ARM64AddressTypePromotion +// AArch64AddressTypePromotion //===----------------------------------------------------------------------===// namespace llvm { -void initializeARM64AddressTypePromotionPass(PassRegistry &); +void initializeAArch64AddressTypePromotionPass(PassRegistry &); } namespace { -class ARM64AddressTypePromotion : public FunctionPass { +class AArch64AddressTypePromotion : public FunctionPass { public: static char ID; - ARM64AddressTypePromotion() + AArch64AddressTypePromotion() : FunctionPass(ID), Func(nullptr), ConsideredSExtType(nullptr) { - initializeARM64AddressTypePromotionPass(*PassRegistry::getPassRegistry()); + initializeAArch64AddressTypePromotionPass(*PassRegistry::getPassRegistry()); } const char *getPassName() const override { - return "ARM64 Address Type Promotion"; + return "AArch64 Address Type Promotion"; } /// Iterate over the functions and promote the computation of interesting @@ -140,19 +139,19 @@ class ARM64AddressTypePromotion : public FunctionPass { }; } // end anonymous namespace. -char ARM64AddressTypePromotion::ID = 0; +char AArch64AddressTypePromotion::ID = 0; -INITIALIZE_PASS_BEGIN(ARM64AddressTypePromotion, "arm64-type-promotion", - "ARM64 Type Promotion Pass", false, false) +INITIALIZE_PASS_BEGIN(AArch64AddressTypePromotion, "aarch64-type-promotion", + "AArch64 Type Promotion Pass", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(ARM64AddressTypePromotion, "arm64-type-promotion", - "ARM64 Type Promotion Pass", false, false) +INITIALIZE_PASS_END(AArch64AddressTypePromotion, "aarch64-type-promotion", + "AArch64 Type Promotion Pass", false, false) -FunctionPass *llvm::createARM64AddressTypePromotionPass() { - return new ARM64AddressTypePromotion(); +FunctionPass *llvm::createAArch64AddressTypePromotionPass() { + return new AArch64AddressTypePromotion(); } -bool ARM64AddressTypePromotion::canGetThrough(const Instruction *Inst) { +bool AArch64AddressTypePromotion::canGetThrough(const Instruction *Inst) { if (isa(Inst)) return true; @@ -175,7 +174,7 @@ bool ARM64AddressTypePromotion::canGetThrough(const Instruction *Inst) { return false; } -bool ARM64AddressTypePromotion::shouldGetThrough(const Instruction *Inst) { +bool AArch64AddressTypePromotion::shouldGetThrough(const Instruction *Inst) { // If the type of the sext is the same as the considered one, this sext // will become useless. // Otherwise, we will have to do something to preserve the original value, @@ -211,7 +210,7 @@ static bool shouldSExtOperand(const Instruction *Inst, int OpIdx) { } bool -ARM64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const { +AArch64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const { if (SExt->getType() != ConsideredSExtType) return false; @@ -249,7 +248,7 @@ ARM64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const { // = a // Iterate on 'c'. bool -ARM64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) { +AArch64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) { DEBUG(dbgs() << "*** Propagate Sign Extension ***\n"); bool LocalChange = false; @@ -375,8 +374,8 @@ ARM64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) { return LocalChange; } -void ARM64AddressTypePromotion::mergeSExts(ValueToInsts &ValToSExtendedUses, - SetOfInstructions &ToRemove) { +void AArch64AddressTypePromotion::mergeSExts(ValueToInsts &ValToSExtendedUses, + SetOfInstructions &ToRemove) { DominatorTree &DT = getAnalysis().getDomTree(); for (auto &Entry : ValToSExtendedUses) { @@ -414,7 +413,7 @@ void ARM64AddressTypePromotion::mergeSExts(ValueToInsts &ValToSExtendedUses, } } -void ARM64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) { +void AArch64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) { DEBUG(dbgs() << "*** Analyze Sign Extensions ***\n"); DenseMap SeenChains; @@ -479,7 +478,7 @@ void ARM64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) { } } -bool ARM64AddressTypePromotion::runOnFunction(Function &F) { +bool AArch64AddressTypePromotion::runOnFunction(Function &F) { if (!EnableAddressTypePromotion || F.isDeclaration()) return false; Func = &F; diff --git a/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp similarity index 85% rename from lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp rename to lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp index 5950a8f18e1e..734fb215e6ee 100644 --- a/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp +++ b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp @@ -1,4 +1,4 @@ -//===-- ARM64AdvSIMDScalar.cpp - Replace dead defs w/ zero reg --===// +//===-- AArch64AdvSIMDScalar.cpp - Replace dead defs w/ zero reg --===// // // The LLVM Compiler Infrastructure // @@ -33,9 +33,9 @@ // solution. //===----------------------------------------------------------------------===// -#include "ARM64.h" -#include "ARM64InstrInfo.h" -#include "ARM64RegisterInfo.h" +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "AArch64RegisterInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFunction.h" @@ -47,12 +47,12 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -#define DEBUG_TYPE "arm64-simd-scalar" +#define DEBUG_TYPE "aarch64-simd-scalar" // Allow forcing all i64 operations with equivalent SIMD instructions to use // them. For stress-testing the transformation function. static cl::opt -TransformAll("arm64-simd-scalar-force-all", +TransformAll("aarch64-simd-scalar-force-all", cl::desc("Force use of AdvSIMD scalar instructions everywhere"), cl::init(false), cl::Hidden); @@ -61,9 +61,9 @@ STATISTIC(NumCopiesDeleted, "Number of cross-class copies deleted"); STATISTIC(NumCopiesInserted, "Number of cross-class copies inserted"); namespace { -class ARM64AdvSIMDScalar : public MachineFunctionPass { +class AArch64AdvSIMDScalar : public MachineFunctionPass { MachineRegisterInfo *MRI; - const ARM64InstrInfo *TII; + const AArch64InstrInfo *TII; private: // isProfitableToTransform - Predicate function to determine whether an @@ -81,7 +81,7 @@ class ARM64AdvSIMDScalar : public MachineFunctionPass { public: static char ID; // Pass identification, replacement for typeid. - explicit ARM64AdvSIMDScalar() : MachineFunctionPass(ID) {} + explicit AArch64AdvSIMDScalar() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &F) override; @@ -94,7 +94,7 @@ class ARM64AdvSIMDScalar : public MachineFunctionPass { MachineFunctionPass::getAnalysisUsage(AU); } }; -char ARM64AdvSIMDScalar::ID = 0; +char AArch64AdvSIMDScalar::ID = 0; } // end anonymous namespace static bool isGPR64(unsigned Reg, unsigned SubReg, @@ -102,20 +102,20 @@ static bool isGPR64(unsigned Reg, unsigned SubReg, if (SubReg) return false; if (TargetRegisterInfo::isVirtualRegister(Reg)) - return MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::GPR64RegClass); - return ARM64::GPR64RegClass.contains(Reg); + return MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::GPR64RegClass); + return AArch64::GPR64RegClass.contains(Reg); } static bool isFPR64(unsigned Reg, unsigned SubReg, const MachineRegisterInfo *MRI) { if (TargetRegisterInfo::isVirtualRegister(Reg)) - return (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::FPR64RegClass) && + return (MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::FPR64RegClass) && SubReg == 0) || - (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::FPR128RegClass) && - SubReg == ARM64::dsub); + (MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::FPR128RegClass) && + SubReg == AArch64::dsub); // Physical register references just check the register class directly. - return (ARM64::FPR64RegClass.contains(Reg) && SubReg == 0) || - (ARM64::FPR128RegClass.contains(Reg) && SubReg == ARM64::dsub); + return (AArch64::FPR64RegClass.contains(Reg) && SubReg == 0) || + (AArch64::FPR128RegClass.contains(Reg) && SubReg == AArch64::dsub); } // getSrcFromCopy - Get the original source register for a GPR64 <--> FPR64 @@ -125,17 +125,18 @@ static unsigned getSrcFromCopy(const MachineInstr *MI, unsigned &SubReg) { SubReg = 0; // The "FMOV Xd, Dn" instruction is the typical form. - if (MI->getOpcode() == ARM64::FMOVDXr || MI->getOpcode() == ARM64::FMOVXDr) + if (MI->getOpcode() == AArch64::FMOVDXr || + MI->getOpcode() == AArch64::FMOVXDr) return MI->getOperand(1).getReg(); // A lane zero extract "UMOV.d Xd, Vn[0]" is equivalent. We shouldn't see // these at this stage, but it's easy to check for. - if (MI->getOpcode() == ARM64::UMOVvi64 && MI->getOperand(2).getImm() == 0) { - SubReg = ARM64::dsub; + if (MI->getOpcode() == AArch64::UMOVvi64 && MI->getOperand(2).getImm() == 0) { + SubReg = AArch64::dsub; return MI->getOperand(1).getReg(); } // Or just a plain COPY instruction. This can be directly to/from FPR64, // or it can be a dsub subreg reference to an FPR128. - if (MI->getOpcode() == ARM64::COPY) { + if (MI->getOpcode() == AArch64::COPY) { if (isFPR64(MI->getOperand(0).getReg(), MI->getOperand(0).getSubReg(), MRI) && isGPR64(MI->getOperand(1).getReg(), MI->getOperand(1).getSubReg(), MRI)) @@ -161,10 +162,10 @@ static int getTransformOpcode(unsigned Opc) { default: break; // FIXME: Lots more possibilities. - case ARM64::ADDXrr: - return ARM64::ADDv1i64; - case ARM64::SUBXrr: - return ARM64::SUBv1i64; + case AArch64::ADDXrr: + return AArch64::ADDv1i64; + case AArch64::SUBXrr: + return AArch64::SUBv1i64; } // No AdvSIMD equivalent, so just return the original opcode. return Opc; @@ -178,7 +179,8 @@ static bool isTransformable(const MachineInstr *MI) { // isProfitableToTransform - Predicate function to determine whether an // instruction should be transformed to its equivalent AdvSIMD scalar // instruction. "add Xd, Xn, Xm" ==> "add Dd, Da, Db", for example. -bool ARM64AdvSIMDScalar::isProfitableToTransform(const MachineInstr *MI) const { +bool +AArch64AdvSIMDScalar::isProfitableToTransform(const MachineInstr *MI) const { // If this instruction isn't eligible to be transformed (no SIMD equivalent), // early exit since that's the common case. if (!isTransformable(MI)) @@ -238,8 +240,8 @@ bool ARM64AdvSIMDScalar::isProfitableToTransform(const MachineInstr *MI) const { // preferable to have it use the FPR64 in most cases, as if the source // vector is an IMPLICIT_DEF, the INSERT_SUBREG just goes away entirely. // Ditto for a lane insert. - else if (Use->getOpcode() == ARM64::INSERT_SUBREG || - Use->getOpcode() == ARM64::INSvi64gpr) + else if (Use->getOpcode() == AArch64::INSERT_SUBREG || + Use->getOpcode() == AArch64::INSvi64gpr) ; else AllUsesAreCopies = false; @@ -259,10 +261,10 @@ bool ARM64AdvSIMDScalar::isProfitableToTransform(const MachineInstr *MI) const { return TransformAll; } -static MachineInstr *insertCopy(const ARM64InstrInfo *TII, MachineInstr *MI, +static MachineInstr *insertCopy(const AArch64InstrInfo *TII, MachineInstr *MI, unsigned Dst, unsigned Src, bool IsKill) { MachineInstrBuilder MIB = - BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(ARM64::COPY), + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AArch64::COPY), Dst) .addReg(Src, getKillRegState(IsKill)); DEBUG(dbgs() << " adding copy: " << *MIB); @@ -273,7 +275,7 @@ static MachineInstr *insertCopy(const ARM64InstrInfo *TII, MachineInstr *MI, // transformInstruction - Perform the transformation of an instruction // to its equivalant AdvSIMD scalar instruction. Update inputs and outputs // to be the correct register class, minimizing cross-class copies. -void ARM64AdvSIMDScalar::transformInstruction(MachineInstr *MI) { +void AArch64AdvSIMDScalar::transformInstruction(MachineInstr *MI) { DEBUG(dbgs() << "Scalar transform: " << *MI); MachineBasicBlock *MBB = MI->getParent(); @@ -316,19 +318,19 @@ void ARM64AdvSIMDScalar::transformInstruction(MachineInstr *MI) { // copy. if (!Src0) { SubReg0 = 0; - Src0 = MRI->createVirtualRegister(&ARM64::FPR64RegClass); + Src0 = MRI->createVirtualRegister(&AArch64::FPR64RegClass); insertCopy(TII, MI, Src0, OrigSrc0, true); } if (!Src1) { SubReg1 = 0; - Src1 = MRI->createVirtualRegister(&ARM64::FPR64RegClass); + Src1 = MRI->createVirtualRegister(&AArch64::FPR64RegClass); insertCopy(TII, MI, Src1, OrigSrc1, true); } // Create a vreg for the destination. // FIXME: No need to do this if the ultimate user expects an FPR64. // Check for that and avoid the copy if possible. - unsigned Dst = MRI->createVirtualRegister(&ARM64::FPR64RegClass); + unsigned Dst = MRI->createVirtualRegister(&AArch64::FPR64RegClass); // For now, all of the new instructions have the same simple three-register // form, so no need to special case based on what instruction we're @@ -349,7 +351,7 @@ void ARM64AdvSIMDScalar::transformInstruction(MachineInstr *MI) { } // processMachineBasicBlock - Main optimzation loop. -bool ARM64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) { +bool AArch64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) { bool Changed = false; for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) { MachineInstr *MI = I; @@ -363,13 +365,13 @@ bool ARM64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) { } // runOnMachineFunction - Pass entry point from PassManager. -bool ARM64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) { +bool AArch64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) { bool Changed = false; - DEBUG(dbgs() << "***** ARM64AdvSIMDScalar *****\n"); + DEBUG(dbgs() << "***** AArch64AdvSIMDScalar *****\n"); const TargetMachine &TM = mf.getTarget(); MRI = &mf.getRegInfo(); - TII = static_cast(TM.getInstrInfo()); + TII = static_cast(TM.getInstrInfo()); // Just check things on a one-block-at-a-time basis. for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) @@ -378,8 +380,8 @@ bool ARM64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) { return Changed; } -// createARM64AdvSIMDScalar - Factory function used by ARM64TargetMachine +// createAArch64AdvSIMDScalar - Factory function used by AArch64TargetMachine // to add the pass to the PassManager. -FunctionPass *llvm::createARM64AdvSIMDScalar() { - return new ARM64AdvSIMDScalar(); +FunctionPass *llvm::createAArch64AdvSIMDScalar() { + return new AArch64AdvSIMDScalar(); } diff --git a/lib/Target/ARM64/ARM64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp similarity index 74% rename from lib/Target/ARM64/ARM64AsmPrinter.cpp rename to lib/Target/AArch64/AArch64AsmPrinter.cpp index 7e17985bf4a2..8553a591fee1 100644 --- a/lib/Target/ARM64/ARM64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -1,4 +1,4 @@ -//===-- ARM64AsmPrinter.cpp - ARM64 LLVM assembly writer ------------------===// +//===-- AArch64AsmPrinter.cpp - AArch64 LLVM assembly writer --------------===// // // The LLVM Compiler Infrastructure // @@ -8,16 +8,16 @@ //===----------------------------------------------------------------------===// // // This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to the ARM64 assembly language. +// of machine-dependent LLVM code to the AArch64 assembly language. // //===----------------------------------------------------------------------===// -#include "ARM64.h" -#include "ARM64MachineFunctionInfo.h" -#include "ARM64MCInstLower.h" -#include "ARM64RegisterInfo.h" -#include "ARM64Subtarget.h" -#include "InstPrinter/ARM64InstPrinter.h" +#include "AArch64.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64MCInstLower.h" +#include "AArch64RegisterInfo.h" +#include "AArch64Subtarget.h" +#include "InstPrinter/AArch64InstPrinter.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" @@ -42,21 +42,24 @@ using namespace llvm; namespace { -class ARM64AsmPrinter : public AsmPrinter { - /// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can +class AArch64AsmPrinter : public AsmPrinter { + /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can /// make the right decision when printing asm code for different targets. - const ARM64Subtarget *Subtarget; + const AArch64Subtarget *Subtarget; - ARM64MCInstLower MCInstLowering; + AArch64MCInstLower MCInstLowering; StackMaps SM; public: - ARM64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer), Subtarget(&TM.getSubtarget()), - MCInstLowering(OutContext, *Mang, *this), SM(*this), ARM64FI(nullptr), + AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer), + Subtarget(&TM.getSubtarget()), + MCInstLowering(OutContext, *Mang, *this), SM(*this), AArch64FI(nullptr), LOHLabelCounter(0) {} - const char *getPassName() const override { return "ARM64 Assembly Printer"; } + const char *getPassName() const override { + return "AArch64 Assembly Printer"; + } /// \brief Wrapper for MCInstLowering.lowerOperand() for the /// tblgen'erated pseudo lowering. @@ -81,7 +84,7 @@ class ARM64AsmPrinter : public AsmPrinter { } bool runOnMachineFunction(MachineFunction &F) override { - ARM64FI = F.getInfo(); + AArch64FI = F.getInfo(); return AsmPrinter::runOnMachineFunction(F); } @@ -106,9 +109,9 @@ class ARM64AsmPrinter : public AsmPrinter { MCSymbol *GetCPISymbol(unsigned CPID) const override; void EmitEndOfAsmFile(Module &M) override; - ARM64FunctionInfo *ARM64FI; + AArch64FunctionInfo *AArch64FI; - /// \brief Emit the LOHs contained in ARM64FI. + /// \brief Emit the LOHs contained in AArch64FI. void EmitLOHs(); typedef std::map MInstToMCSymbol; @@ -120,7 +123,7 @@ class ARM64AsmPrinter : public AsmPrinter { //===----------------------------------------------------------------------===// -void ARM64AsmPrinter::EmitEndOfAsmFile(Module &M) { +void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) { if (Subtarget->isTargetMachO()) { // Funny Darwin hack: This flag tells the linker that no global symbols // contain code that falls through to other global symbols (e.g. the obvious @@ -156,7 +159,7 @@ void ARM64AsmPrinter::EmitEndOfAsmFile(Module &M) { } MachineLocation -ARM64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { +AArch64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { MachineLocation Location; assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!"); // Frame address. Currently handles register +- offset only. @@ -168,10 +171,10 @@ ARM64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { return Location; } -void ARM64AsmPrinter::EmitLOHs() { +void AArch64AsmPrinter::EmitLOHs() { SmallVector MCArgs; - for (const auto &D : ARM64FI->getLOHContainer()) { + for (const auto &D : AArch64FI->getLOHContainer()) { for (const MachineInstr *MI : D.getArgs()) { MInstToMCSymbol::iterator LabelIt = LOHInstToLabel.find(MI); assert(LabelIt != LOHInstToLabel.end() && @@ -183,13 +186,13 @@ void ARM64AsmPrinter::EmitLOHs() { } } -void ARM64AsmPrinter::EmitFunctionBodyEnd() { - if (!ARM64FI->getLOHRelated().empty()) +void AArch64AsmPrinter::EmitFunctionBodyEnd() { + if (!AArch64FI->getLOHRelated().empty()) EmitLOHs(); } /// GetCPISymbol - Return the symbol for the specified constant pool entry. -MCSymbol *ARM64AsmPrinter::GetCPISymbol(unsigned CPID) const { +MCSymbol *AArch64AsmPrinter::GetCPISymbol(unsigned CPID) const { // Darwin uses a linker-private symbol name for constant-pools (to // avoid addends on the relocation?), ELF has no such concept and // uses a normal private symbol. @@ -203,8 +206,8 @@ MCSymbol *ARM64AsmPrinter::GetCPISymbol(unsigned CPID) const { Twine(getFunctionNumber()) + "_" + Twine(CPID)); } -void ARM64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum, - raw_ostream &O) { +void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum, + raw_ostream &O) { const MachineOperand &MO = MI->getOperand(OpNum); switch (MO.getType()) { default: @@ -213,7 +216,7 @@ void ARM64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum, unsigned Reg = MO.getReg(); assert(TargetRegisterInfo::isPhysicalRegister(Reg)); assert(!MO.getSubReg() && "Subregs should be eliminated!"); - O << ARM64InstPrinter::getRegisterName(Reg); + O << AArch64InstPrinter::getRegisterName(Reg); break; } case MachineOperand::MO_Immediate: { @@ -224,8 +227,8 @@ void ARM64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum, } } -bool ARM64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode, - raw_ostream &O) { +bool AArch64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode, + raw_ostream &O) { unsigned Reg = MO.getReg(); switch (Mode) { default: @@ -238,30 +241,30 @@ bool ARM64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode, break; } - O << ARM64InstPrinter::getRegisterName(Reg); + O << AArch64InstPrinter::getRegisterName(Reg); return false; } // Prints the register in MO using class RC using the offset in the // new register class. This should not be used for cross class // printing. -bool ARM64AsmPrinter::printAsmRegInClass(const MachineOperand &MO, - const TargetRegisterClass *RC, - bool isVector, raw_ostream &O) { +bool AArch64AsmPrinter::printAsmRegInClass(const MachineOperand &MO, + const TargetRegisterClass *RC, + bool isVector, raw_ostream &O) { assert(MO.isReg() && "Should only get here with a register!"); - const ARM64RegisterInfo *RI = - static_cast(TM.getRegisterInfo()); + const AArch64RegisterInfo *RI = + static_cast(TM.getRegisterInfo()); unsigned Reg = MO.getReg(); unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg)); assert(RI->regsOverlap(RegToPrint, Reg)); - O << ARM64InstPrinter::getRegisterName( - RegToPrint, isVector ? ARM64::vreg : ARM64::NoRegAltName); + O << AArch64InstPrinter::getRegisterName( + RegToPrint, isVector ? AArch64::vreg : AArch64::NoRegAltName); return false; } -bool ARM64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, - unsigned AsmVariant, - const char *ExtraCode, raw_ostream &O) { +bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, + unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O) { const MachineOperand &MO = MI->getOperand(OpNum); // Does this asm operand have a single letter operand modifier? if (ExtraCode && ExtraCode[0]) { @@ -276,8 +279,8 @@ bool ARM64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, if (MO.isReg()) return printAsmMRegister(MO, ExtraCode[0], O); if (MO.isImm() && MO.getImm() == 0) { - unsigned Reg = ExtraCode[0] == 'w' ? ARM64::WZR : ARM64::XZR; - O << ARM64InstPrinter::getRegisterName(Reg); + unsigned Reg = ExtraCode[0] == 'w' ? AArch64::WZR : AArch64::XZR; + O << AArch64InstPrinter::getRegisterName(Reg); return false; } printOperand(MI, OpNum, O); @@ -291,19 +294,19 @@ bool ARM64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, const TargetRegisterClass *RC; switch (ExtraCode[0]) { case 'b': - RC = &ARM64::FPR8RegClass; + RC = &AArch64::FPR8RegClass; break; case 'h': - RC = &ARM64::FPR16RegClass; + RC = &AArch64::FPR16RegClass; break; case 's': - RC = &ARM64::FPR32RegClass; + RC = &AArch64::FPR32RegClass; break; case 'd': - RC = &ARM64::FPR64RegClass; + RC = &AArch64::FPR64RegClass; break; case 'q': - RC = &ARM64::FPR128RegClass; + RC = &AArch64::FPR128RegClass; break; default: return true; @@ -321,33 +324,35 @@ bool ARM64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned Reg = MO.getReg(); // If this is a w or x register, print an x register. - if (ARM64::GPR32allRegClass.contains(Reg) || - ARM64::GPR64allRegClass.contains(Reg)) + if (AArch64::GPR32allRegClass.contains(Reg) || + AArch64::GPR64allRegClass.contains(Reg)) return printAsmMRegister(MO, 'x', O); // If this is a b, h, s, d, or q register, print it as a v register. - return printAsmRegInClass(MO, &ARM64::FPR128RegClass, true /* vector */, O); + return printAsmRegInClass(MO, &AArch64::FPR128RegClass, true /* vector */, + O); } printOperand(MI, OpNum, O); return false; } -bool ARM64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, - unsigned OpNum, unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { +bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNum, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { if (ExtraCode && ExtraCode[0]) return true; // Unknown modifier. const MachineOperand &MO = MI->getOperand(OpNum); assert(MO.isReg() && "unexpected inline asm memory operand"); - O << "[" << ARM64InstPrinter::getRegisterName(MO.getReg()) << "]"; + O << "[" << AArch64InstPrinter::getRegisterName(MO.getReg()) << "]"; return false; } -void ARM64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, - raw_ostream &OS) { +void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, + raw_ostream &OS) { unsigned NOps = MI->getNumOperands(); assert(NOps == 4); OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; @@ -366,21 +371,21 @@ void ARM64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, printOperand(MI, NOps - 2, OS); } -void ARM64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, - const MachineInstr &MI) { +void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, + const MachineInstr &MI) { unsigned NumNOPBytes = MI.getOperand(1).getImm(); SM.recordStackMap(MI); // Emit padding. assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); for (unsigned i = 0; i < NumNOPBytes; i += 4) - EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::HINT).addImm(0)); + EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0)); } // Lower a patchpoint of the form: // [], , , , -void ARM64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, - const MachineInstr &MI) { +void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, + const MachineInstr &MI) { SM.recordPatchPoint(MI); PatchPointOpers Opers(&MI); @@ -393,21 +398,21 @@ void ARM64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg(); EncodedBytes = 16; // Materialize the jump address: - EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVZWi) + EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::MOVZWi) .addReg(ScratchReg) .addImm((CallTarget >> 32) & 0xFFFF) .addImm(32)); - EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVKWi) + EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::MOVKWi) .addReg(ScratchReg) .addReg(ScratchReg) .addImm((CallTarget >> 16) & 0xFFFF) .addImm(16)); - EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVKWi) + EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::MOVKWi) .addReg(ScratchReg) .addReg(ScratchReg) .addImm(CallTarget & 0xFFFF) .addImm(0)); - EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::BLR).addReg(ScratchReg)); + EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::BLR).addReg(ScratchReg)); } // Emit padding. unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm(); @@ -416,19 +421,19 @@ void ARM64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, assert((NumBytes - EncodedBytes) % 4 == 0 && "Invalid number of NOP bytes requested!"); for (unsigned i = EncodedBytes; i < NumBytes; i += 4) - EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::HINT).addImm(0)); + EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0)); } // Simple pseudo-instructions have their lowering (with expansion to real // instructions) auto-generated. -#include "ARM64GenMCPseudoLowering.inc" +#include "AArch64GenMCPseudoLowering.inc" -void ARM64AsmPrinter::EmitInstruction(const MachineInstr *MI) { +void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { // Do any auto-generated pseudo lowerings. if (emitPseudoExpansionLowering(OutStreamer, MI)) return; - if (ARM64FI->getLOHRelated().count(MI)) { + if (AArch64FI->getLOHRelated().count(MI)) { // Generate a label for LOH related instruction MCSymbol *LOHLabel = GetTempSymbol("loh", LOHLabelCounter++); // Associate the instruction with the label @@ -440,7 +445,7 @@ void ARM64AsmPrinter::EmitInstruction(const MachineInstr *MI) { switch (MI->getOpcode()) { default: break; - case ARM64::DBG_VALUE: { + case AArch64::DBG_VALUE: { if (isVerbose() && OutStreamer.hasRawTextSupport()) { SmallString<128> TmpStr; raw_svector_ostream OS(TmpStr); @@ -453,23 +458,23 @@ void ARM64AsmPrinter::EmitInstruction(const MachineInstr *MI) { // Tail calls use pseudo instructions so they have the proper code-gen // attributes (isCall, isReturn, etc.). We lower them to the real // instruction here. - case ARM64::TCRETURNri: { + case AArch64::TCRETURNri: { MCInst TmpInst; - TmpInst.setOpcode(ARM64::BR); + TmpInst.setOpcode(AArch64::BR); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); EmitToStreamer(OutStreamer, TmpInst); return; } - case ARM64::TCRETURNdi: { + case AArch64::TCRETURNdi: { MCOperand Dest; MCInstLowering.lowerOperand(MI->getOperand(0), Dest); MCInst TmpInst; - TmpInst.setOpcode(ARM64::B); + TmpInst.setOpcode(AArch64::B); TmpInst.addOperand(Dest); EmitToStreamer(OutStreamer, TmpInst); return; } - case ARM64::TLSDESC_BLR: { + case AArch64::TLSDESC_BLR: { MCOperand Callee, Sym; MCInstLowering.lowerOperand(MI->getOperand(0), Callee); MCInstLowering.lowerOperand(MI->getOperand(1), Sym); @@ -477,14 +482,14 @@ void ARM64AsmPrinter::EmitInstruction(const MachineInstr *MI) { // First emit a relocation-annotation. This expands to no code, but requests // the following instruction gets an R_AARCH64_TLSDESC_CALL. MCInst TLSDescCall; - TLSDescCall.setOpcode(ARM64::TLSDESCCALL); + TLSDescCall.setOpcode(AArch64::TLSDESCCALL); TLSDescCall.addOperand(Sym); EmitToStreamer(OutStreamer, TLSDescCall); // Other than that it's just a normal indirect call to the function loaded // from the descriptor. MCInst BLR; - BLR.setOpcode(ARM64::BLR); + BLR.setOpcode(AArch64::BLR); BLR.addOperand(Callee); EmitToStreamer(OutStreamer, BLR); @@ -505,10 +510,10 @@ void ARM64AsmPrinter::EmitInstruction(const MachineInstr *MI) { } // Force static initialization. -extern "C" void LLVMInitializeARM64AsmPrinter() { - RegisterAsmPrinter X(TheARM64leTarget); - RegisterAsmPrinter Y(TheARM64beTarget); +extern "C" void LLVMInitializeAArch64AsmPrinter() { + RegisterAsmPrinter X(TheAArch64leTarget); + RegisterAsmPrinter Y(TheAArch64beTarget); - RegisterAsmPrinter Z(TheAArch64leTarget); - RegisterAsmPrinter W(TheAArch64beTarget); + RegisterAsmPrinter Z(TheARM64leTarget); + RegisterAsmPrinter W(TheARM64beTarget); } diff --git a/lib/Target/ARM64/ARM64BranchRelaxation.cpp b/lib/Target/AArch64/AArch64BranchRelaxation.cpp similarity index 78% rename from lib/Target/ARM64/ARM64BranchRelaxation.cpp rename to lib/Target/AArch64/AArch64BranchRelaxation.cpp index 73be3504790e..52094526727d 100644 --- a/lib/Target/ARM64/ARM64BranchRelaxation.cpp +++ b/lib/Target/AArch64/AArch64BranchRelaxation.cpp @@ -1,4 +1,4 @@ -//===-- ARM64BranchRelaxation.cpp - ARM64 branch relaxation ---------------===// +//===-- AArch64BranchRelaxation.cpp - AArch64 branch relaxation -----------===// // // The LLVM Compiler Infrastructure // @@ -9,9 +9,9 @@ // //===----------------------------------------------------------------------===// -#include "ARM64.h" -#include "ARM64InstrInfo.h" -#include "ARM64MachineFunctionInfo.h" +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "AArch64MachineFunctionInfo.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -23,29 +23,29 @@ #include "llvm/Support/CommandLine.h" using namespace llvm; -#define DEBUG_TYPE "arm64-branch-relax" +#define DEBUG_TYPE "aarch64-branch-relax" static cl::opt -BranchRelaxation("arm64-branch-relax", cl::Hidden, cl::init(true), +BranchRelaxation("aarch64-branch-relax", cl::Hidden, cl::init(true), cl::desc("Relax out of range conditional branches")); static cl::opt -TBZDisplacementBits("arm64-tbz-offset-bits", cl::Hidden, cl::init(14), +TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)")); static cl::opt -CBZDisplacementBits("arm64-cbz-offset-bits", cl::Hidden, cl::init(19), +CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)")); static cl::opt -BCCDisplacementBits("arm64-bcc-offset-bits", cl::Hidden, cl::init(19), +BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)")); STATISTIC(NumSplit, "Number of basic blocks split"); STATISTIC(NumRelaxed, "Number of conditional branches relaxed"); namespace { -class ARM64BranchRelaxation : public MachineFunctionPass { +class AArch64BranchRelaxation : public MachineFunctionPass { /// BasicBlockInfo - Information about the offset and size of a single /// basic block. struct BasicBlockInfo { @@ -77,7 +77,7 @@ class ARM64BranchRelaxation : public MachineFunctionPass { SmallVector BlockInfo; MachineFunction *MF; - const ARM64InstrInfo *TII; + const AArch64InstrInfo *TII; bool relaxBranchInstructions(); void scanFunction(); @@ -92,19 +92,19 @@ class ARM64BranchRelaxation : public MachineFunctionPass { public: static char ID; - ARM64BranchRelaxation() : MachineFunctionPass(ID) {} + AArch64BranchRelaxation() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; const char *getPassName() const override { - return "ARM64 branch relaxation pass"; + return "AArch64 branch relaxation pass"; } }; -char ARM64BranchRelaxation::ID = 0; +char AArch64BranchRelaxation::ID = 0; } /// verify - check BBOffsets, BBSizes, alignment of islands -void ARM64BranchRelaxation::verify() { +void AArch64BranchRelaxation::verify() { #ifndef NDEBUG unsigned PrevNum = MF->begin()->getNumber(); for (MachineBasicBlock &MBB : *MF) { @@ -118,7 +118,7 @@ void ARM64BranchRelaxation::verify() { } /// print block size and offset information - debugging -void ARM64BranchRelaxation::dumpBBs() { +void AArch64BranchRelaxation::dumpBBs() { for (auto &MBB : *MF) { const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()]; dbgs() << format("BB#%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset) @@ -145,7 +145,7 @@ static bool BBHasFallthrough(MachineBasicBlock *MBB) { /// scanFunction - Do the initial scan of the function, building up /// information about each block. -void ARM64BranchRelaxation::scanFunction() { +void AArch64BranchRelaxation::scanFunction() { BlockInfo.clear(); BlockInfo.resize(MF->getNumBlockIDs()); @@ -162,7 +162,7 @@ void ARM64BranchRelaxation::scanFunction() { /// computeBlockSize - Compute the size for MBB. /// This function updates BlockInfo directly. -void ARM64BranchRelaxation::computeBlockSize(const MachineBasicBlock &MBB) { +void AArch64BranchRelaxation::computeBlockSize(const MachineBasicBlock &MBB) { unsigned Size = 0; for (const MachineInstr &MI : MBB) Size += TII->GetInstSizeInBytes(&MI); @@ -172,7 +172,7 @@ void ARM64BranchRelaxation::computeBlockSize(const MachineBasicBlock &MBB) { /// getInstrOffset - Return the current offset of the specified machine /// instruction from the start of the function. This offset changes as stuff is /// moved around inside the function. -unsigned ARM64BranchRelaxation::getInstrOffset(MachineInstr *MI) const { +unsigned AArch64BranchRelaxation::getInstrOffset(MachineInstr *MI) const { MachineBasicBlock *MBB = MI->getParent(); // The offset is composed of two things: the sum of the sizes of all MBB's @@ -188,7 +188,7 @@ unsigned ARM64BranchRelaxation::getInstrOffset(MachineInstr *MI) const { return Offset; } -void ARM64BranchRelaxation::adjustBlockOffsets(MachineBasicBlock &Start) { +void AArch64BranchRelaxation::adjustBlockOffsets(MachineBasicBlock &Start) { unsigned PrevNum = Start.getNumber(); for (auto &MBB : make_range(MachineFunction::iterator(Start), MF->end())) { unsigned Num = MBB.getNumber(); @@ -209,7 +209,7 @@ void ARM64BranchRelaxation::adjustBlockOffsets(MachineBasicBlock &Start) { /// and must be updated by the caller! Other transforms follow using this /// utility function, so no point updating now rather than waiting. MachineBasicBlock * -ARM64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) { +AArch64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) { MachineBasicBlock *OrigBB = MI->getParent(); // Create a new MBB for the code after the OrigBB. @@ -226,7 +226,7 @@ ARM64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) { // Note the new unconditional branch is not being recorded. // There doesn't seem to be meaningful DebugInfo available; this doesn't // correspond to anything in the source. - BuildMI(OrigBB, DebugLoc(), TII->get(ARM64::B)).addMBB(NewBB); + BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::B)).addMBB(NewBB); // Insert an entry into BlockInfo to align it properly with the block numbers. BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); @@ -252,9 +252,9 @@ ARM64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) { /// isBlockInRange - Returns true if the distance between specific MI and /// specific BB can fit in MI's displacement field. -bool ARM64BranchRelaxation::isBlockInRange(MachineInstr *MI, - MachineBasicBlock *DestBB, - unsigned Bits) { +bool AArch64BranchRelaxation::isBlockInRange(MachineInstr *MI, + MachineBasicBlock *DestBB, + unsigned Bits) { unsigned MaxOffs = ((1 << (Bits - 1)) - 1) << 2; unsigned BrOffset = getInstrOffset(MI); unsigned DestOffset = BlockInfo[DestBB->getNumber()].Offset; @@ -275,15 +275,15 @@ static bool isConditionalBranch(unsigned Opc) { switch (Opc) { default: return false; - case ARM64::TBZW: - case ARM64::TBNZW: - case ARM64::TBZX: - case ARM64::TBNZX: - case ARM64::CBZW: - case ARM64::CBNZW: - case ARM64::CBZX: - case ARM64::CBNZX: - case ARM64::Bcc: + case AArch64::TBZW: + case AArch64::TBNZW: + case AArch64::TBZX: + case AArch64::TBNZX: + case AArch64::CBZW: + case AArch64::CBNZW: + case AArch64::CBZX: + case AArch64::CBNZX: + case AArch64::Bcc: return true; } } @@ -292,16 +292,16 @@ static MachineBasicBlock *getDestBlock(MachineInstr *MI) { switch (MI->getOpcode()) { default: assert(0 && "unexpected opcode!"); - case ARM64::TBZW: - case ARM64::TBNZW: - case ARM64::TBZX: - case ARM64::TBNZX: + case AArch64::TBZW: + case AArch64::TBNZW: + case AArch64::TBZX: + case AArch64::TBNZX: return MI->getOperand(2).getMBB(); - case ARM64::CBZW: - case ARM64::CBNZW: - case ARM64::CBZX: - case ARM64::CBNZX: - case ARM64::Bcc: + case AArch64::CBZW: + case AArch64::CBNZW: + case AArch64::CBZX: + case AArch64::CBNZX: + case AArch64::Bcc: return MI->getOperand(1).getMBB(); } } @@ -310,15 +310,15 @@ static unsigned getOppositeConditionOpcode(unsigned Opc) { switch (Opc) { default: assert(0 && "unexpected opcode!"); - case ARM64::TBNZW: return ARM64::TBZW; - case ARM64::TBNZX: return ARM64::TBZX; - case ARM64::TBZW: return ARM64::TBNZW; - case ARM64::TBZX: return ARM64::TBNZX; - case ARM64::CBNZW: return ARM64::CBZW; - case ARM64::CBNZX: return ARM64::CBZX; - case ARM64::CBZW: return ARM64::CBNZW; - case ARM64::CBZX: return ARM64::CBNZX; - case ARM64::Bcc: return ARM64::Bcc; // Condition is an operand for Bcc. + case AArch64::TBNZW: return AArch64::TBZW; + case AArch64::TBNZX: return AArch64::TBZX; + case AArch64::TBZW: return AArch64::TBNZW; + case AArch64::TBZX: return AArch64::TBNZX; + case AArch64::CBNZW: return AArch64::CBZW; + case AArch64::CBNZX: return AArch64::CBZX; + case AArch64::CBZW: return AArch64::CBNZW; + case AArch64::CBZX: return AArch64::CBNZX; + case AArch64::Bcc: return AArch64::Bcc; // Condition is an operand for Bcc. } } @@ -326,32 +326,32 @@ static unsigned getBranchDisplacementBits(unsigned Opc) { switch (Opc) { default: assert(0 && "unexpected opcode!"); - case ARM64::TBNZW: - case ARM64::TBZW: - case ARM64::TBNZX: - case ARM64::TBZX: + case AArch64::TBNZW: + case AArch64::TBZW: + case AArch64::TBNZX: + case AArch64::TBZX: return TBZDisplacementBits; - case ARM64::CBNZW: - case ARM64::CBZW: - case ARM64::CBNZX: - case ARM64::CBZX: + case AArch64::CBNZW: + case AArch64::CBZW: + case AArch64::CBNZX: + case AArch64::CBZX: return CBZDisplacementBits; - case ARM64::Bcc: + case AArch64::Bcc: return BCCDisplacementBits; } } static inline void invertBccCondition(MachineInstr *MI) { - assert(MI->getOpcode() == ARM64::Bcc && "Unexpected opcode!"); - ARM64CC::CondCode CC = (ARM64CC::CondCode)MI->getOperand(0).getImm(); - CC = ARM64CC::getInvertedCondCode(CC); + assert(MI->getOpcode() == AArch64::Bcc && "Unexpected opcode!"); + AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(0).getImm(); + CC = AArch64CC::getInvertedCondCode(CC); MI->getOperand(0).setImm((int64_t)CC); } /// fixupConditionalBranch - Fix up a conditional branch whose destination is /// too far away to fit in its displacement field. It is converted to an inverse /// conditional branch + an unconditional branch to the destination. -bool ARM64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) { +bool AArch64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) { MachineBasicBlock *DestBB = getDestBlock(MI); // Add an unconditional branch to the destination and invert the branch @@ -372,7 +372,7 @@ bool ARM64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) { if (BMI != MI) { if (std::next(MachineBasicBlock::iterator(MI)) == std::prev(MBB->getLastNonDebugInstr()) && - BMI->getOpcode() == ARM64::B) { + BMI->getOpcode() == AArch64::B) { // Last MI in the BB is an unconditional branch. Can we simply invert the // condition and swap destinations: // beq L1 @@ -386,14 +386,15 @@ bool ARM64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) { DEBUG(dbgs() << " Invert condition and swap its destination with " << *BMI); BMI->getOperand(0).setMBB(DestBB); - unsigned OpNum = - (MI->getOpcode() == ARM64::TBZW || MI->getOpcode() == ARM64::TBNZW || - MI->getOpcode() == ARM64::TBZX || MI->getOpcode() == ARM64::TBNZX) - ? 2 - : 1; + unsigned OpNum = (MI->getOpcode() == AArch64::TBZW || + MI->getOpcode() == AArch64::TBNZW || + MI->getOpcode() == AArch64::TBZX || + MI->getOpcode() == AArch64::TBNZX) + ? 2 + : 1; MI->getOperand(OpNum).setMBB(NewDest); MI->setDesc(TII->get(getOppositeConditionOpcode(MI->getOpcode()))); - if (MI->getOpcode() == ARM64::Bcc) + if (MI->getOpcode() == AArch64::Bcc) invertBccCondition(MI); return true; } @@ -429,14 +430,14 @@ bool ARM64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) { MachineInstrBuilder MIB = BuildMI( MBB, DebugLoc(), TII->get(getOppositeConditionOpcode(MI->getOpcode()))) .addOperand(MI->getOperand(0)); - if (MI->getOpcode() == ARM64::TBZW || MI->getOpcode() == ARM64::TBNZW || - MI->getOpcode() == ARM64::TBZX || MI->getOpcode() == ARM64::TBNZX) + if (MI->getOpcode() == AArch64::TBZW || MI->getOpcode() == AArch64::TBNZW || + MI->getOpcode() == AArch64::TBZX || MI->getOpcode() == AArch64::TBNZX) MIB.addOperand(MI->getOperand(1)); - if (MI->getOpcode() == ARM64::Bcc) + if (MI->getOpcode() == AArch64::Bcc) invertBccCondition(MIB); MIB.addMBB(NextBB); BlockInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back()); - BuildMI(MBB, DebugLoc(), TII->get(ARM64::B)).addMBB(DestBB); + BuildMI(MBB, DebugLoc(), TII->get(AArch64::B)).addMBB(DestBB); BlockInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back()); // Remove the old conditional branch. It may or may not still be in MBB. @@ -448,7 +449,7 @@ bool ARM64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) { return true; } -bool ARM64BranchRelaxation::relaxBranchInstructions() { +bool AArch64BranchRelaxation::relaxBranchInstructions() { bool Changed = false; // Relaxing branches involves creating new basic blocks, so re-eval // end() for termination. @@ -465,16 +466,16 @@ bool ARM64BranchRelaxation::relaxBranchInstructions() { return Changed; } -bool ARM64BranchRelaxation::runOnMachineFunction(MachineFunction &mf) { +bool AArch64BranchRelaxation::runOnMachineFunction(MachineFunction &mf) { MF = &mf; // If the pass is disabled, just bail early. if (!BranchRelaxation) return false; - DEBUG(dbgs() << "***** ARM64BranchRelaxation *****\n"); + DEBUG(dbgs() << "***** AArch64BranchRelaxation *****\n"); - TII = (const ARM64InstrInfo *)MF->getTarget().getInstrInfo(); + TII = (const AArch64InstrInfo *)MF->getTarget().getInstrInfo(); // Renumber all of the machine basic blocks in the function, guaranteeing that // the numbers agree with the position of the block in the function. @@ -502,8 +503,8 @@ bool ARM64BranchRelaxation::runOnMachineFunction(MachineFunction &mf) { return MadeChange; } -/// createARM64BranchRelaxation - returns an instance of the constpool +/// createAArch64BranchRelaxation - returns an instance of the constpool /// island pass. -FunctionPass *llvm::createARM64BranchRelaxation() { - return new ARM64BranchRelaxation(); +FunctionPass *llvm::createAArch64BranchRelaxation() { + return new AArch64BranchRelaxation(); } diff --git a/lib/Target/ARM64/ARM64CallingConv.h b/lib/Target/AArch64/AArch64CallingConv.h similarity index 65% rename from lib/Target/ARM64/ARM64CallingConv.h rename to lib/Target/AArch64/AArch64CallingConv.h index f24ba59dfb9b..1fe426ed686f 100644 --- a/lib/Target/ARM64/ARM64CallingConv.h +++ b/lib/Target/AArch64/AArch64CallingConv.h @@ -1,4 +1,4 @@ -//=== ARM64CallingConv.h - Custom Calling Convention Routines -*- C++ -*-===// +//=== AArch64CallingConv.h - Custom Calling Convention Routines -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,38 +7,38 @@ // //===----------------------------------------------------------------------===// // -// This file contains the custom routines for the ARM64 Calling Convention that +// This file contains the custom routines for the AArch64 Calling Convention that // aren't done by tablegen. // //===----------------------------------------------------------------------===// -#ifndef ARM64CALLINGCONV_H -#define ARM64CALLINGCONV_H +#ifndef AArch64CALLINGCONV_H +#define AArch64CALLINGCONV_H -#include "ARM64InstrInfo.h" +#include "AArch64InstrInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/Target/TargetInstrInfo.h" namespace llvm { -/// CC_ARM64_Custom_i1i8i16_Reg - customized handling of passing i1/i8/i16 via +/// CC_AArch64_Custom_i1i8i16_Reg - customized handling of passing i1/i8/i16 via /// register. Here, ValVT can be i1/i8/i16 or i32 depending on whether the /// argument is already promoted and LocVT is i1/i8/i16. We only promote the /// argument to i32 if we are sure this argument will be passed in register. -static bool CC_ARM64_Custom_i1i8i16_Reg(unsigned ValNo, MVT ValVT, MVT LocVT, +static bool CC_AArch64_Custom_i1i8i16_Reg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsWebKitJS = false) { - static const MCPhysReg RegList1[] = { ARM64::W0, ARM64::W1, ARM64::W2, - ARM64::W3, ARM64::W4, ARM64::W5, - ARM64::W6, ARM64::W7 }; - static const MCPhysReg RegList2[] = { ARM64::X0, ARM64::X1, ARM64::X2, - ARM64::X3, ARM64::X4, ARM64::X5, - ARM64::X6, ARM64::X7 }; - static const MCPhysReg WebKitRegList1[] = { ARM64::W0 }; - static const MCPhysReg WebKitRegList2[] = { ARM64::X0 }; + static const MCPhysReg RegList1[] = { AArch64::W0, AArch64::W1, AArch64::W2, + AArch64::W3, AArch64::W4, AArch64::W5, + AArch64::W6, AArch64::W7 }; + static const MCPhysReg RegList2[] = { AArch64::X0, AArch64::X1, AArch64::X2, + AArch64::X3, AArch64::X4, AArch64::X5, + AArch64::X6, AArch64::X7 }; + static const MCPhysReg WebKitRegList1[] = { AArch64::W0 }; + static const MCPhysReg WebKitRegList2[] = { AArch64::X0 }; const MCPhysReg *List1 = IsWebKitJS ? WebKitRegList1 : RegList1; const MCPhysReg *List2 = IsWebKitJS ? WebKitRegList2 : RegList2; @@ -63,22 +63,22 @@ static bool CC_ARM64_Custom_i1i8i16_Reg(unsigned ValNo, MVT ValVT, MVT LocVT, return false; } -/// CC_ARM64_WebKit_JS_i1i8i16_Reg - customized handling of passing i1/i8/i16 -/// via register. This behaves the same as CC_ARM64_Custom_i1i8i16_Reg, but only +/// CC_AArch64_WebKit_JS_i1i8i16_Reg - customized handling of passing i1/i8/i16 +/// via register. This behaves the same as CC_AArch64_Custom_i1i8i16_Reg, but only /// uses the first register. -static bool CC_ARM64_WebKit_JS_i1i8i16_Reg(unsigned ValNo, MVT ValVT, MVT LocVT, +static bool CC_AArch64_WebKit_JS_i1i8i16_Reg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { - return CC_ARM64_Custom_i1i8i16_Reg(ValNo, ValVT, LocVT, LocInfo, ArgFlags, + return CC_AArch64_Custom_i1i8i16_Reg(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, true); } -/// CC_ARM64_Custom_i1i8i16_Stack: customized handling of passing i1/i8/i16 on +/// CC_AArch64_Custom_i1i8i16_Stack: customized handling of passing i1/i8/i16 on /// stack. Here, ValVT can be i1/i8/i16 or i32 depending on whether the argument /// is already promoted and LocVT is i1/i8/i16. If ValVT is already promoted, /// it will be truncated back to i1/i8/i16. -static bool CC_ARM64_Custom_i1i8i16_Stack(unsigned ValNo, MVT ValVT, MVT LocVT, +static bool CC_AArch64_Custom_i1i8i16_Stack(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { diff --git a/lib/Target/ARM64/ARM64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td similarity index 92% rename from lib/Target/ARM64/ARM64CallingConvention.td rename to lib/Target/AArch64/AArch64CallingConvention.td index 0ef5601718d2..c263d14dcc37 100644 --- a/lib/Target/ARM64/ARM64CallingConvention.td +++ b/lib/Target/AArch64/AArch64CallingConvention.td @@ -1,4 +1,4 @@ -//===- ARM64CallingConv.td - Calling Conventions for ARM64 -*- tablegen -*-===// +//=- AArch64CallingConv.td - Calling Conventions for AArch64 -*- tablegen -*-=// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This describes the calling conventions for ARM64 architecture. +// This describes the calling conventions for AArch64 architecture. // //===----------------------------------------------------------------------===// @@ -22,7 +22,7 @@ class CCIfBigEndian : // ARM AAPCS64 Calling Convention //===----------------------------------------------------------------------===// -def CC_ARM64_AAPCS : CallingConv<[ +def CC_AArch64_AAPCS : CallingConv<[ CCIfType<[v2f32], CCBitConvertToType>, CCIfType<[v2f64, v4f32], CCBitConvertToType>, @@ -42,7 +42,7 @@ def CC_ARM64_AAPCS : CallingConv<[ // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, // up to eight each of GPR and FPR. - CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_Custom_i1i8i16_Reg">>, + CCIfType<[i1, i8, i16], CCCustom<"CC_AArch64_Custom_i1i8i16_Reg">>, CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], [X0, X1, X2, X3, X4, X5, X6, X7]>>, // i128 is split to two i64s, we can't fit half to register X7. @@ -73,7 +73,7 @@ def CC_ARM64_AAPCS : CallingConv<[ CCAssignToStack<16, 16>> ]>; -def RetCC_ARM64_AAPCS : CallingConv<[ +def RetCC_AArch64_AAPCS : CallingConv<[ CCIfType<[v2f32], CCBitConvertToType>, CCIfType<[v2f64, v4f32], CCBitConvertToType>, @@ -104,7 +104,7 @@ def RetCC_ARM64_AAPCS : CallingConv<[ // from the standard one at this level: // + i128s (i.e. split i64s) don't need even registers. // + Stack slots are sized as needed rather than being at least 64-bit. -def CC_ARM64_DarwinPCS : CallingConv<[ +def CC_AArch64_DarwinPCS : CallingConv<[ CCIfType<[v2f32], CCBitConvertToType>, CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, @@ -117,7 +117,7 @@ def CC_ARM64_DarwinPCS : CallingConv<[ // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, // up to eight each of GPR and FPR. - CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_Custom_i1i8i16_Reg">>, + CCIfType<[i1, i8, i16], CCCustom<"CC_AArch64_Custom_i1i8i16_Reg">>, CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], [X0, X1, X2, X3, X4, X5, X6, X7]>>, // i128 is split to two i64s, we can't fit half to register X7. @@ -140,14 +140,14 @@ def CC_ARM64_DarwinPCS : CallingConv<[ CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, // If more than will fit in registers, pass them on the stack instead. - CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_Custom_i1i8i16_Stack">>, + CCIfType<[i1, i8, i16], CCCustom<"CC_AArch64_Custom_i1i8i16_Stack">>, CCIfType<[i32, f32], CCAssignToStack<4, 4>>, CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8], CCAssignToStack<8, 8>>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToStack<16, 16>> ]>; -def CC_ARM64_DarwinPCS_VarArg : CallingConv<[ +def CC_AArch64_DarwinPCS_VarArg : CallingConv<[ CCIfType<[v2f32], CCBitConvertToType>, CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, @@ -166,9 +166,9 @@ def CC_ARM64_DarwinPCS_VarArg : CallingConv<[ // in register and the remaining arguments on stack. We allow 32bit stack slots, // so that WebKit can write partial values in the stack and define the other // 32bit quantity as undef. -def CC_ARM64_WebKit_JS : CallingConv<[ +def CC_AArch64_WebKit_JS : CallingConv<[ // Handle i1, i8, i16, i32, and i64 passing in register X0 (W0). - CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_WebKit_JS_i1i8i16_Reg">>, + CCIfType<[i1, i8, i16], CCCustom<"CC_AArch64_WebKit_JS_i1i8i16_Reg">>, CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>, CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>, @@ -178,7 +178,7 @@ def CC_ARM64_WebKit_JS : CallingConv<[ CCIfType<[i64, f64], CCAssignToStack<8, 8>> ]>; -def RetCC_ARM64_WebKit_JS : CallingConv<[ +def RetCC_AArch64_WebKit_JS : CallingConv<[ CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], [X0, X1, X2, X3, X4, X5, X6, X7]>>, CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], @@ -197,7 +197,7 @@ def RetCC_ARM64_WebKit_JS : CallingConv<[ // It would be better to model its preservation semantics properly (create a // vreg on entry, use it in RET & tail call generation; make that vreg def if we // end up saving LR as part of a call frame). Watch this space... -def CSR_ARM64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, +def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, X23, X24, X25, X26, X27, X28, D8, D9, D10, D11, D12, D13, D14, D15)>; @@ -210,24 +210,24 @@ def CSR_ARM64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, // (For generic ARM 64-bit ABI code, clang will not generate constructors or // destructors with 'this' returns, so this RegMask will not be used in that // case) -def CSR_ARM64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_ARM64_AAPCS, X0)>; +def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>; // The function used by Darwin to obtain the address of a thread-local variable // guarantees more than a normal AAPCS function. x16 and x17 are used on the // fast path for calculation, but other registers except X0 (argument/return) // and LR (it is a call, after all) are preserved. -def CSR_ARM64_TLS_Darwin +def CSR_AArch64_TLS_Darwin : CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17), FP, (sequence "Q%u", 0, 31))>; // The ELF stub used for TLS-descriptor access saves every feasible // register. Only X0 and LR are clobbered. -def CSR_ARM64_TLS_ELF +def CSR_AArch64_TLS_ELF : CalleeSavedRegs<(add (sequence "X%u", 1, 28), FP, (sequence "Q%u", 0, 31))>; -def CSR_ARM64_AllRegs +def CSR_AArch64_AllRegs : CalleeSavedRegs<(add (sequence "W%u", 0, 30), WSP, (sequence "X%u", 0, 28), FP, LR, SP, (sequence "B%u", 0, 31), (sequence "H%u", 0, 31), diff --git a/lib/Target/ARM64/ARM64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp similarity index 81% rename from lib/Target/ARM64/ARM64CleanupLocalDynamicTLSPass.cpp rename to lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp index dce1301b92e1..4d23dc59d7ac 100644 --- a/lib/Target/ARM64/ARM64CleanupLocalDynamicTLSPass.cpp +++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp @@ -1,4 +1,4 @@ -//===-- ARM64CleanupLocalDynamicTLSPass.cpp -----------------------*- C++ -*-=// +//===-- AArch64CleanupLocalDynamicTLSPass.cpp ---------------------*- C++ -*-=// // // The LLVM Compiler Infrastructure // @@ -22,10 +22,10 @@ // pass looks through a function and performs such combinations. // //===----------------------------------------------------------------------===// -#include "ARM64.h" -#include "ARM64InstrInfo.h" -#include "ARM64MachineFunctionInfo.h" -#include "ARM64TargetMachine.h" +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64TargetMachine.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -39,7 +39,7 @@ struct LDTLSCleanup : public MachineFunctionPass { LDTLSCleanup() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override { - ARM64FunctionInfo *AFI = MF.getInfo(); + AArch64FunctionInfo *AFI = MF.getInfo(); if (AFI->getNumLocalDynamicTLSAccesses() < 2) { // No point folding accesses if there isn't at least two. return false; @@ -62,7 +62,7 @@ struct LDTLSCleanup : public MachineFunctionPass { for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { switch (I->getOpcode()) { - case ARM64::TLSDESC_BLR: + case AArch64::TLSDESC_BLR: // Make sure it's a local dynamic access. if (!I->getOperand(1).isSymbol() || strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_")) @@ -92,15 +92,15 @@ struct LDTLSCleanup : public MachineFunctionPass { MachineInstr *replaceTLSBaseAddrCall(MachineInstr *I, unsigned TLSBaseAddrReg) { MachineFunction *MF = I->getParent()->getParent(); - const ARM64TargetMachine *TM = - static_cast(&MF->getTarget()); - const ARM64InstrInfo *TII = TM->getInstrInfo(); + const AArch64TargetMachine *TM = + static_cast(&MF->getTarget()); + const AArch64InstrInfo *TII = TM->getInstrInfo(); // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the // code sequence assumes the address will be. - MachineInstr *Copy = - BuildMI(*I->getParent(), I, I->getDebugLoc(), - TII->get(TargetOpcode::COPY), ARM64::X0).addReg(TLSBaseAddrReg); + MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(), + TII->get(TargetOpcode::COPY), + AArch64::X0).addReg(TLSBaseAddrReg); // Erase the TLS_base_addr instruction. I->eraseFromParent(); @@ -112,19 +112,19 @@ struct LDTLSCleanup : public MachineFunctionPass { // inserting a copy instruction after I. Returns the new instruction. MachineInstr *setRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) { MachineFunction *MF = I->getParent()->getParent(); - const ARM64TargetMachine *TM = - static_cast(&MF->getTarget()); - const ARM64InstrInfo *TII = TM->getInstrInfo(); + const AArch64TargetMachine *TM = + static_cast(&MF->getTarget()); + const AArch64InstrInfo *TII = TM->getInstrInfo(); // Create a virtual register for the TLS base address. MachineRegisterInfo &RegInfo = MF->getRegInfo(); - *TLSBaseAddrReg = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass); + *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass); // Insert a copy from X0 to TLSBaseAddrReg for later. MachineInstr *Next = I->getNextNode(); MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(), TII->get(TargetOpcode::COPY), - *TLSBaseAddrReg).addReg(ARM64::X0); + *TLSBaseAddrReg).addReg(AArch64::X0); return Copy; } @@ -142,6 +142,6 @@ struct LDTLSCleanup : public MachineFunctionPass { } char LDTLSCleanup::ID = 0; -FunctionPass *llvm::createARM64CleanupLocalDynamicTLSPass() { +FunctionPass *llvm::createAArch64CleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); } diff --git a/lib/Target/ARM64/ARM64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp similarity index 91% rename from lib/Target/ARM64/ARM64CollectLOH.cpp rename to lib/Target/AArch64/AArch64CollectLOH.cpp index 8b48f3ae9b2a..6b1f09678e9a 100644 --- a/lib/Target/ARM64/ARM64CollectLOH.cpp +++ b/lib/Target/AArch64/AArch64CollectLOH.cpp @@ -1,4 +1,4 @@ -//===-------------- ARM64CollectLOH.cpp - ARM64 collect LOH pass --*- C++ -*-=// +//===---------- AArch64CollectLOH.cpp - AArch64 collect LOH pass --*- C++ -*-=// // // The LLVM Compiler Infrastructure // @@ -85,8 +85,8 @@ // This LOH aims at getting rid of redundant ADRP instructions. // // The overall design for emitting the LOHs is: -// 1. ARM64CollectLOH (this pass) records the LOHs in the ARM64FunctionInfo. -// 2. ARM64AsmPrinter reads the LOHs from ARM64FunctionInfo and it: +// 1. AArch64CollectLOH (this pass) records the LOHs in the AArch64FunctionInfo. +// 2. AArch64AsmPrinter reads the LOHs from AArch64FunctionInfo and it: // 1. Associates them a label. // 2. Emits them in a MCStreamer (EmitLOHDirective). // - The MCMachOStreamer records them into the MCAssembler. @@ -98,10 +98,10 @@ // - Other ObjectWriters ignore them. //===----------------------------------------------------------------------===// -#include "ARM64.h" -#include "ARM64InstrInfo.h" -#include "ARM64MachineFunctionInfo.h" -#include "MCTargetDesc/ARM64AddressingModes.h" +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "AArch64MachineFunctionInfo.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" @@ -122,16 +122,16 @@ #include "llvm/ADT/Statistic.h" using namespace llvm; -#define DEBUG_TYPE "arm64-collect-loh" +#define DEBUG_TYPE "aarch64-collect-loh" static cl::opt -PreCollectRegister("arm64-collect-loh-pre-collect-register", cl::Hidden, +PreCollectRegister("aarch64-collect-loh-pre-collect-register", cl::Hidden, cl::desc("Restrict analysis to registers invovled" " in LOHs"), cl::init(true)); static cl::opt -BasicBlockScopeOnly("arm64-collect-loh-bb-only", cl::Hidden, +BasicBlockScopeOnly("aarch64-collect-loh-bb-only", cl::Hidden, cl::desc("Restrict analysis at basic block scope"), cl::init(true)); @@ -164,20 +164,20 @@ STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD"); STATISTIC(NumADRComplexCandidate, "Number of too complex ADRP + ADD"); namespace llvm { -void initializeARM64CollectLOHPass(PassRegistry &); +void initializeAArch64CollectLOHPass(PassRegistry &); } namespace { -struct ARM64CollectLOH : public MachineFunctionPass { +struct AArch64CollectLOH : public MachineFunctionPass { static char ID; - ARM64CollectLOH() : MachineFunctionPass(ID) { - initializeARM64CollectLOHPass(*PassRegistry::getPassRegistry()); + AArch64CollectLOH() : MachineFunctionPass(ID) { + initializeAArch64CollectLOHPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; const char *getPassName() const override { - return "ARM64 Collect Linker Optimization Hint (LOH)"; + return "AArch64 Collect Linker Optimization Hint (LOH)"; } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -214,14 +214,14 @@ typedef DenseMap MapRegToId; typedef SmallVector MapIdToReg; } // end anonymous namespace. -char ARM64CollectLOH::ID = 0; +char AArch64CollectLOH::ID = 0; -INITIALIZE_PASS_BEGIN(ARM64CollectLOH, "arm64-collect-loh", - "ARM64 Collect Linker Optimization Hint (LOH)", false, +INITIALIZE_PASS_BEGIN(AArch64CollectLOH, "aarch64-collect-loh", + "AArch64 Collect Linker Optimization Hint (LOH)", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_END(ARM64CollectLOH, "arm64-collect-loh", - "ARM64 Collect Linker Optimization Hint (LOH)", false, +INITIALIZE_PASS_END(AArch64CollectLOH, "aarch64-collect-loh", + "AArch64 Collect Linker Optimization Hint (LOH)", false, false) /// Given a couple (MBB, reg) get the corresponding set of instruction from @@ -295,7 +295,7 @@ static void initReachingDef(MachineFunction &MF, BitVector &BBKillSet = Kill[&MBB]; BBKillSet.resize(NbReg); for (const MachineInstr &MI : MBB) { - bool IsADRP = MI.getOpcode() == ARM64::ADRP; + bool IsADRP = MI.getOpcode() == AArch64::ADRP; // Process uses first. if (IsADRP || !ADRPMode) @@ -509,9 +509,9 @@ static bool canDefBePartOfLOH(const MachineInstr *Def) { switch (Opc) { default: return false; - case ARM64::ADRP: + case AArch64::ADRP: return true; - case ARM64::ADDXri: + case AArch64::ADDXri: // Check immediate to see if the immediate is an address. switch (Def->getOperand(2).getType()) { default: @@ -522,7 +522,7 @@ static bool canDefBePartOfLOH(const MachineInstr *Def) { case MachineOperand::MO_BlockAddress: return true; } - case ARM64::LDRXui: + case AArch64::LDRXui: // Check immediate to see if the immediate is an address. switch (Def->getOperand(2).getType()) { default: @@ -541,13 +541,13 @@ static bool isCandidateStore(const MachineInstr *Instr) { switch (Instr->getOpcode()) { default: return false; - case ARM64::STRBui: - case ARM64::STRHui: - case ARM64::STRWui: - case ARM64::STRXui: - case ARM64::STRSui: - case ARM64::STRDui: - case ARM64::STRQui: + case AArch64::STRBui: + case AArch64::STRHui: + case AArch64::STRWui: + case AArch64::STRXui: + case AArch64::STRSui: + case AArch64::STRDui: + case AArch64::STRQui: // In case we have str xA, [xA, #imm], this is two different uses // of xA and we cannot fold, otherwise the xA stored may be wrong, // even if #imm == 0. @@ -582,7 +582,7 @@ static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs, MapRegToId::const_iterator It; // if all the reaching defs are not adrp, this use will not be // simplifiable. - if ((ADRPMode && Def->getOpcode() != ARM64::ADRP) || + if ((ADRPMode && Def->getOpcode() != AArch64::ADRP) || (!ADRPMode && !canDefBePartOfLOH(Def)) || (!ADRPMode && isCandidateStore(MI) && // store are LOH candidate iff the end of the chain is used as @@ -615,7 +615,7 @@ static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs, /// Based on the use to defs information (in ADRPMode), compute the /// opportunities of LOH ADRP-related. static void computeADRP(const InstrToInstrs &UseToDefs, - ARM64FunctionInfo &ARM64FI, + AArch64FunctionInfo &AArch64FI, const MachineDominatorTree *MDT) { DEBUG(dbgs() << "*** Compute LOH for ADRP\n"); for (const auto &Entry : UseToDefs) { @@ -634,7 +634,7 @@ static void computeADRP(const InstrToInstrs &UseToDefs, SmallVector Args; Args.push_back(L2); Args.push_back(L1); - ARM64FI.addLOHDirective(MCLOH_AdrpAdrp, Args); + AArch64FI.addLOHDirective(MCLOH_AdrpAdrp, Args); ++NumADRPSimpleCandidate; } #ifdef DEBUG @@ -656,19 +656,19 @@ static bool isCandidateLoad(const MachineInstr *Instr) { switch (Instr->getOpcode()) { default: return false; - case ARM64::LDRSBWui: - case ARM64::LDRSBXui: - case ARM64::LDRSHWui: - case ARM64::LDRSHXui: - case ARM64::LDRSWui: - case ARM64::LDRBui: - case ARM64::LDRHui: - case ARM64::LDRWui: - case ARM64::LDRXui: - case ARM64::LDRSui: - case ARM64::LDRDui: - case ARM64::LDRQui: - if (Instr->getOperand(2).getTargetFlags() & ARM64II::MO_GOT) + case AArch64::LDRSBWui: + case AArch64::LDRSBXui: + case AArch64::LDRSHWui: + case AArch64::LDRSHXui: + case AArch64::LDRSWui: + case AArch64::LDRBui: + case AArch64::LDRHui: + case AArch64::LDRWui: + case AArch64::LDRXui: + case AArch64::LDRSui: + case AArch64::LDRDui: + case AArch64::LDRQui: + if (Instr->getOperand(2).getTargetFlags() & AArch64II::MO_GOT) return false; return true; } @@ -681,12 +681,12 @@ static bool supportLoadFromLiteral(const MachineInstr *Instr) { switch (Instr->getOpcode()) { default: return false; - case ARM64::LDRSWui: - case ARM64::LDRWui: - case ARM64::LDRXui: - case ARM64::LDRSui: - case ARM64::LDRDui: - case ARM64::LDRQui: + case AArch64::LDRSWui: + case AArch64::LDRWui: + case AArch64::LDRXui: + case AArch64::LDRSui: + case AArch64::LDRDui: + case AArch64::LDRQui: return true; } // Unreachable. @@ -705,7 +705,7 @@ static bool isCandidate(const MachineInstr *Instr, return false; const MachineInstr *Def = *UseToDefs.find(Instr)->second.begin(); - if (Def->getOpcode() != ARM64::ADRP) { + if (Def->getOpcode() != AArch64::ADRP) { // At this point, Def is ADDXri or LDRXui of the right type of // symbol, because we filtered out the uses that were not defined // by these kind of instructions (+ ADRP). @@ -728,7 +728,7 @@ static bool isCandidate(const MachineInstr *Instr, // - top is ADRP. // - check the simple chain property: each intermediate node must // dominates the next one. - if (Def->getOpcode() == ARM64::ADRP) + if (Def->getOpcode() == AArch64::ADRP) return MDT->dominates(Def, Instr); return false; } @@ -736,22 +736,22 @@ static bool isCandidate(const MachineInstr *Instr, static bool registerADRCandidate(const MachineInstr &Use, const InstrToInstrs &UseToDefs, const InstrToInstrs *DefsPerColorToUses, - ARM64FunctionInfo &ARM64FI, + AArch64FunctionInfo &AArch64FI, SetOfMachineInstr *InvolvedInLOHs, const MapRegToId &RegToId) { // Look for opportunities to turn ADRP -> ADD or // ADRP -> LDR GOTPAGEOFF into ADR. // If ADRP has more than one use. Give up. - if (Use.getOpcode() != ARM64::ADDXri && - (Use.getOpcode() != ARM64::LDRXui || - !(Use.getOperand(2).getTargetFlags() & ARM64II::MO_GOT))) + if (Use.getOpcode() != AArch64::ADDXri && + (Use.getOpcode() != AArch64::LDRXui || + !(Use.getOperand(2).getTargetFlags() & AArch64II::MO_GOT))) return false; InstrToInstrs::const_iterator It = UseToDefs.find(&Use); // The map may contain garbage that we need to ignore. if (It == UseToDefs.end() || It->second.empty()) return false; const MachineInstr &Def = **It->second.begin(); - if (Def.getOpcode() != ARM64::ADRP) + if (Def.getOpcode() != AArch64::ADRP) return false; // Check the number of users of ADRP. const SetOfMachineInstr *Users = @@ -772,7 +772,7 @@ static bool registerADRCandidate(const MachineInstr &Use, Args.push_back(&Def); Args.push_back(&Use); - ARM64FI.addLOHDirective(Use.getOpcode() == ARM64::ADDXri ? MCLOH_AdrpAdd + AArch64FI.addLOHDirective(Use.getOpcode() == AArch64::ADDXri ? MCLOH_AdrpAdd : MCLOH_AdrpLdrGot, Args); return true; @@ -782,7 +782,7 @@ static bool registerADRCandidate(const MachineInstr &Use, /// opportunities of LOH non-ADRP-related static void computeOthers(const InstrToInstrs &UseToDefs, const InstrToInstrs *DefsPerColorToUses, - ARM64FunctionInfo &ARM64FI, const MapRegToId &RegToId, + AArch64FunctionInfo &AArch64FI, const MapRegToId &RegToId, const MachineDominatorTree *MDT) { SetOfMachineInstr *InvolvedInLOHs = nullptr; #ifdef DEBUG @@ -839,7 +839,7 @@ static void computeOthers(const InstrToInstrs &UseToDefs, const MachineInstr *L1 = Def; const MachineInstr *L2 = nullptr; unsigned ImmediateDefOpc = Def->getOpcode(); - if (Def->getOpcode() != ARM64::ADRP) { + if (Def->getOpcode() != AArch64::ADRP) { // Check the number of users of this node. const SetOfMachineInstr *Users = getUses(DefsPerColorToUses, @@ -899,10 +899,10 @@ static void computeOthers(const InstrToInstrs &UseToDefs, continue; } - bool IsL2Add = (ImmediateDefOpc == ARM64::ADDXri); + bool IsL2Add = (ImmediateDefOpc == AArch64::ADDXri); // If the chain is three instructions long and ldr is the second element, // then this ldr must load form GOT, otherwise this is not a correct chain. - if (L2 && !IsL2Add && L2->getOperand(2).getTargetFlags() != ARM64II::MO_GOT) + if (L2 && !IsL2Add && L2->getOperand(2).getTargetFlags() != AArch64II::MO_GOT) continue; SmallVector Args; MCLOHType Kind; @@ -944,18 +944,18 @@ static void computeOthers(const InstrToInstrs &UseToDefs, #ifdef DEBUG // get the immediate of the load if (Candidate->getOperand(2).getImm() == 0) - if (ImmediateDefOpc == ARM64::ADDXri) + if (ImmediateDefOpc == AArch64::ADDXri) ++NumADDToLDR; else ++NumLDRToLDR; - else if (ImmediateDefOpc == ARM64::ADDXri) + else if (ImmediateDefOpc == AArch64::ADDXri) ++NumADDToLDRWithImm; else ++NumLDRToLDRWithImm; #endif // DEBUG } } else { - if (ImmediateDefOpc == ARM64::ADRP) + if (ImmediateDefOpc == AArch64::ADRP) continue; else { @@ -978,23 +978,23 @@ static void computeOthers(const InstrToInstrs &UseToDefs, #ifdef DEBUG // get the immediate of the store if (Candidate->getOperand(2).getImm() == 0) - if (ImmediateDefOpc == ARM64::ADDXri) + if (ImmediateDefOpc == AArch64::ADDXri) ++NumADDToSTR; else ++NumLDRToSTR; - else if (ImmediateDefOpc == ARM64::ADDXri) + else if (ImmediateDefOpc == AArch64::ADDXri) ++NumADDToSTRWithImm; else ++NumLDRToSTRWithImm; #endif // DEBUG } } - ARM64FI.addLOHDirective(Kind, Args); + AArch64FI.addLOHDirective(Kind, Args); } // Now, we grabbed all the big patterns, check ADR opportunities. for (const MachineInstr *Candidate : PotentialADROpportunities) - registerADRCandidate(*Candidate, UseToDefs, DefsPerColorToUses, ARM64FI, + registerADRCandidate(*Candidate, UseToDefs, DefsPerColorToUses, AArch64FI, InvolvedInLOHs, RegToId); } @@ -1041,15 +1041,15 @@ static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId, } } -bool ARM64CollectLOH::runOnMachineFunction(MachineFunction &MF) { +bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) { const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); const MachineDominatorTree *MDT = &getAnalysis(); MapRegToId RegToId; MapIdToReg IdToReg; - ARM64FunctionInfo *ARM64FI = MF.getInfo(); - assert(ARM64FI && "No MachineFunctionInfo for this function!"); + AArch64FunctionInfo *AArch64FI = MF.getInfo(); + assert(AArch64FI && "No MachineFunctionInfo for this function!"); DEBUG(dbgs() << "Looking for LOH in " << MF.getName() << '\n'); @@ -1059,11 +1059,11 @@ bool ARM64CollectLOH::runOnMachineFunction(MachineFunction &MF) { MachineInstr *DummyOp = nullptr; if (BasicBlockScopeOnly) { - const ARM64InstrInfo *TII = - static_cast(TM.getInstrInfo()); + const AArch64InstrInfo *TII = + static_cast(TM.getInstrInfo()); // For local analysis, create a dummy operation to record uses that are not // local. - DummyOp = MF.CreateMachineInstr(TII->get(ARM64::COPY), DebugLoc()); + DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc()); } unsigned NbReg = RegToId.size(); @@ -1084,7 +1084,7 @@ bool ARM64CollectLOH::runOnMachineFunction(MachineFunction &MF) { reachedUsesToDefs(ADRPToReachingDefs, ColorOpToReachedUses, RegToId, true); // Compute LOH for ADRP. - computeADRP(ADRPToReachingDefs, *ARM64FI, MDT); + computeADRP(ADRPToReachingDefs, *AArch64FI, MDT); delete[] ColorOpToReachedUses; // Continue with general ADRP -> ADD/LDR -> LDR/STR pattern. @@ -1100,7 +1100,7 @@ bool ARM64CollectLOH::runOnMachineFunction(MachineFunction &MF) { reachedUsesToDefs(UsesToReachingDefs, ColorOpToReachedUses, RegToId, false); // Compute other than AdrpAdrp LOH. - computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *ARM64FI, RegToId, + computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *AArch64FI, RegToId, MDT); delete[] ColorOpToReachedUses; @@ -1110,8 +1110,8 @@ bool ARM64CollectLOH::runOnMachineFunction(MachineFunction &MF) { return Modified; } -/// createARM64CollectLOHPass - returns an instance of the Statistic for +/// createAArch64CollectLOHPass - returns an instance of the Statistic for /// linker optimization pass. -FunctionPass *llvm::createARM64CollectLOHPass() { - return new ARM64CollectLOH(); +FunctionPass *llvm::createAArch64CollectLOHPass() { + return new AArch64CollectLOH(); } diff --git a/lib/Target/ARM64/ARM64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp similarity index 85% rename from lib/Target/ARM64/ARM64ConditionalCompares.cpp rename to lib/Target/AArch64/AArch64ConditionalCompares.cpp index 2243cce51a1f..452cdecf8a0c 100644 --- a/lib/Target/ARM64/ARM64ConditionalCompares.cpp +++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp @@ -1,4 +1,4 @@ -//===-- ARM64ConditionalCompares.cpp --- CCMP formation for ARM64 ---------===// +//===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file implements the ARM64ConditionalCompares pass which reduces +// This file implements the AArch64ConditionalCompares pass which reduces // branching and code size by using the conditional compare instructions CCMP, // CCMN, and FCMP. // @@ -17,7 +17,7 @@ // //===----------------------------------------------------------------------===// -#include "ARM64.h" +#include "AArch64.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetVector.h" @@ -42,16 +42,16 @@ using namespace llvm; -#define DEBUG_TYPE "arm64-ccmp" +#define DEBUG_TYPE "aarch64-ccmp" // Absolute maximum number of instructions allowed per speculated block. // This bypasses all other heuristics, so it should be set fairly high. static cl::opt BlockInstrLimit( - "arm64-ccmp-limit", cl::init(30), cl::Hidden, + "aarch64-ccmp-limit", cl::init(30), cl::Hidden, cl::desc("Maximum number of instructions per speculated block.")); // Stress testing mode - disable heuristics. -static cl::opt Stress("arm64-stress-ccmp", cl::Hidden, +static cl::opt Stress("aarch64-stress-ccmp", cl::Hidden, cl::desc("Turn all knobs to 11")); STATISTIC(NumConsidered, "Number of ccmps considered"); @@ -98,7 +98,7 @@ STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted"); // // The cmp-conversion turns the compare instruction in CmpBB into a conditional // compare, and merges CmpBB into Head, speculatively executing its -// instructions. The ARM64 conditional compare instructions have an immediate +// instructions. The AArch64 conditional compare instructions have an immediate // operand that specifies the NZCV flag values when the condition is false and // the compare isn't executed. This makes it possible to chain compares with // different condition codes. @@ -162,13 +162,13 @@ class SSACCmpConv { SmallVector HeadCond; /// The condition code that makes Head branch to CmpBB. - ARM64CC::CondCode HeadCmpBBCC; + AArch64CC::CondCode HeadCmpBBCC; /// The branch condition in CmpBB. SmallVector CmpBBCond; /// The condition code that makes CmpBB branch to Tail. - ARM64CC::CondCode CmpBBTailCC; + AArch64CC::CondCode CmpBBTailCC; /// Check if the Tail PHIs are trivially convertible. bool trivialTailPHIs(); @@ -253,11 +253,11 @@ void SSACCmpConv::updateTailPHIs() { } } -// This pass runs before the ARM64DeadRegisterDefinitions pass, so compares are -// still writing virtual registers without any uses. +// This pass runs before the AArch64DeadRegisterDefinitions pass, so compares +// are still writing virtual registers without any uses. bool SSACCmpConv::isDeadDef(unsigned DstReg) { // Writes to the zero register are dead. - if (DstReg == ARM64::WZR || DstReg == ARM64::XZR) + if (DstReg == AArch64::WZR || DstReg == AArch64::XZR) return true; if (!TargetRegisterInfo::isVirtualRegister(DstReg)) return false; @@ -269,11 +269,11 @@ bool SSACCmpConv::isDeadDef(unsigned DstReg) { // Parse a condition code returned by AnalyzeBranch, and compute the CondCode // corresponding to TBB. // Return -static bool parseCond(ArrayRef Cond, ARM64CC::CondCode &CC) { +static bool parseCond(ArrayRef Cond, AArch64CC::CondCode &CC) { // A normal br.cond simply has the condition code. if (Cond[0].getImm() != -1) { assert(Cond.size() == 1 && "Unknown Cond array format"); - CC = (ARM64CC::CondCode)(int)Cond[0].getImm(); + CC = (AArch64CC::CondCode)(int)Cond[0].getImm(); return true; } // For tbz and cbz instruction, the opcode is next. @@ -282,15 +282,15 @@ static bool parseCond(ArrayRef Cond, ARM64CC::CondCode &CC) { // This includes tbz / tbnz branches which can't be converted to // ccmp + br.cond. return false; - case ARM64::CBZW: - case ARM64::CBZX: + case AArch64::CBZW: + case AArch64::CBZX: assert(Cond.size() == 3 && "Unknown Cond array format"); - CC = ARM64CC::EQ; + CC = AArch64CC::EQ; return true; - case ARM64::CBNZW: - case ARM64::CBNZX: + case AArch64::CBNZW: + case AArch64::CBNZX: assert(Cond.size() == 3 && "Unknown Cond array format"); - CC = ARM64CC::NE; + CC = AArch64CC::NE; return true; } } @@ -300,12 +300,12 @@ MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) { if (I == MBB->end()) return nullptr; // The terminator must be controlled by the flags. - if (!I->readsRegister(ARM64::NZCV)) { + if (!I->readsRegister(AArch64::NZCV)) { switch (I->getOpcode()) { - case ARM64::CBZW: - case ARM64::CBZX: - case ARM64::CBNZW: - case ARM64::CBNZX: + case AArch64::CBZW: + case AArch64::CBZX: + case AArch64::CBNZW: + case AArch64::CBNZX: // These can be converted into a ccmp against #0. return I; } @@ -320,11 +320,11 @@ MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) { assert(!I->isTerminator() && "Spurious terminator"); switch (I->getOpcode()) { // cmp is an alias for subs with a dead destination register. - case ARM64::SUBSWri: - case ARM64::SUBSXri: + case AArch64::SUBSWri: + case AArch64::SUBSXri: // cmn is an alias for adds with a dead destination register. - case ARM64::ADDSWri: - case ARM64::ADDSXri: + case AArch64::ADDSWri: + case AArch64::ADDSXri: // Check that the immediate operand is within range, ccmp wants a uimm5. // Rd = SUBSri Rn, imm, shift if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) { @@ -333,25 +333,25 @@ MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) { return nullptr; } // Fall through. - case ARM64::SUBSWrr: - case ARM64::SUBSXrr: - case ARM64::ADDSWrr: - case ARM64::ADDSXrr: + case AArch64::SUBSWrr: + case AArch64::SUBSXrr: + case AArch64::ADDSWrr: + case AArch64::ADDSXrr: if (isDeadDef(I->getOperand(0).getReg())) return I; DEBUG(dbgs() << "Can't convert compare with live destination: " << *I); ++NumLiveDstRejs; return nullptr; - case ARM64::FCMPSrr: - case ARM64::FCMPDrr: - case ARM64::FCMPESrr: - case ARM64::FCMPEDrr: + case AArch64::FCMPSrr: + case AArch64::FCMPDrr: + case AArch64::FCMPESrr: + case AArch64::FCMPEDrr: return I; } // Check for flag reads and clobbers. MIOperands::PhysRegInfo PRI = - MIOperands(I).analyzePhysReg(ARM64::NZCV, TRI); + MIOperands(I).analyzePhysReg(AArch64::NZCV, TRI); if (PRI.Reads) { // The ccmp doesn't produce exactly the same flags as the original @@ -422,7 +422,7 @@ bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB, } // Only CmpMI is allowed to clobber the flags. - if (&I != CmpMI && I.modifiesRegister(ARM64::NZCV, TRI)) { + if (&I != CmpMI && I.modifiesRegister(AArch64::NZCV, TRI)) { DEBUG(dbgs() << "Clobbers flags: " << I); return false; } @@ -519,7 +519,7 @@ bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) { // Make sure the branch direction is right. if (TBB != CmpBB) { assert(TBB == Tail && "Unexpected TBB"); - HeadCmpBBCC = ARM64CC::getInvertedCondCode(HeadCmpBBCC); + HeadCmpBBCC = AArch64CC::getInvertedCondCode(HeadCmpBBCC); } CmpBBCond.clear(); @@ -543,10 +543,10 @@ bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) { } if (TBB != Tail) - CmpBBTailCC = ARM64CC::getInvertedCondCode(CmpBBTailCC); + CmpBBTailCC = AArch64CC::getInvertedCondCode(CmpBBTailCC); - DEBUG(dbgs() << "Head->CmpBB on " << ARM64CC::getCondCodeName(HeadCmpBBCC) - << ", CmpBB->Tail on " << ARM64CC::getCondCodeName(CmpBBTailCC) + DEBUG(dbgs() << "Head->CmpBB on " << AArch64CC::getCondCodeName(HeadCmpBBCC) + << ", CmpBB->Tail on " << AArch64CC::getCondCodeName(CmpBBTailCC) << '\n'); CmpMI = findConvertibleCompare(CmpBB); @@ -579,13 +579,13 @@ void SSACCmpConv::convert(SmallVectorImpl &RemovedBlocks) { ++NumCompBranches; unsigned Opc = 0; switch (HeadCond[1].getImm()) { - case ARM64::CBZW: - case ARM64::CBNZW: - Opc = ARM64::SUBSWri; + case AArch64::CBZW: + case AArch64::CBNZW: + Opc = AArch64::SUBSWri; break; - case ARM64::CBZX: - case ARM64::CBNZX: - Opc = ARM64::SUBSXri; + case AArch64::CBZX: + case AArch64::CBNZX: + Opc = AArch64::SUBSXri; break; default: llvm_unreachable("Cannot convert Head branch"); @@ -615,27 +615,27 @@ void SSACCmpConv::convert(SmallVectorImpl &RemovedBlocks) { switch (CmpMI->getOpcode()) { default: llvm_unreachable("Unknown compare opcode"); - case ARM64::SUBSWri: Opc = ARM64::CCMPWi; break; - case ARM64::SUBSWrr: Opc = ARM64::CCMPWr; break; - case ARM64::SUBSXri: Opc = ARM64::CCMPXi; break; - case ARM64::SUBSXrr: Opc = ARM64::CCMPXr; break; - case ARM64::ADDSWri: Opc = ARM64::CCMNWi; break; - case ARM64::ADDSWrr: Opc = ARM64::CCMNWr; break; - case ARM64::ADDSXri: Opc = ARM64::CCMNXi; break; - case ARM64::ADDSXrr: Opc = ARM64::CCMNXr; break; - case ARM64::FCMPSrr: Opc = ARM64::FCCMPSrr; FirstOp = 0; break; - case ARM64::FCMPDrr: Opc = ARM64::FCCMPDrr; FirstOp = 0; break; - case ARM64::FCMPESrr: Opc = ARM64::FCCMPESrr; FirstOp = 0; break; - case ARM64::FCMPEDrr: Opc = ARM64::FCCMPEDrr; FirstOp = 0; break; - case ARM64::CBZW: - case ARM64::CBNZW: - Opc = ARM64::CCMPWi; + case AArch64::SUBSWri: Opc = AArch64::CCMPWi; break; + case AArch64::SUBSWrr: Opc = AArch64::CCMPWr; break; + case AArch64::SUBSXri: Opc = AArch64::CCMPXi; break; + case AArch64::SUBSXrr: Opc = AArch64::CCMPXr; break; + case AArch64::ADDSWri: Opc = AArch64::CCMNWi; break; + case AArch64::ADDSWrr: Opc = AArch64::CCMNWr; break; + case AArch64::ADDSXri: Opc = AArch64::CCMNXi; break; + case AArch64::ADDSXrr: Opc = AArch64::CCMNXr; break; + case AArch64::FCMPSrr: Opc = AArch64::FCCMPSrr; FirstOp = 0; break; + case AArch64::FCMPDrr: Opc = AArch64::FCCMPDrr; FirstOp = 0; break; + case AArch64::FCMPESrr: Opc = AArch64::FCCMPESrr; FirstOp = 0; break; + case AArch64::FCMPEDrr: Opc = AArch64::FCCMPEDrr; FirstOp = 0; break; + case AArch64::CBZW: + case AArch64::CBNZW: + Opc = AArch64::CCMPWi; FirstOp = 0; isZBranch = true; break; - case ARM64::CBZX: - case ARM64::CBNZX: - Opc = ARM64::CCMPXi; + case AArch64::CBZX: + case AArch64::CBNZX: + Opc = AArch64::CCMPXi; FirstOp = 0; isZBranch = true; break; @@ -646,7 +646,7 @@ void SSACCmpConv::convert(SmallVectorImpl &RemovedBlocks) { // The NZCV immediate operand should provide flags for the case where Head // would have branched to Tail. These flags should cause the new Head // terminator to branch to tail. - unsigned NZCV = ARM64CC::getNZCVToSatisfyCondCode(CmpBBTailCC); + unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC); const MCInstrDesc &MCID = TII->get(Opc); MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(), TII->getRegClass(MCID, 0, TRI, *MF)); @@ -665,10 +665,10 @@ void SSACCmpConv::convert(SmallVectorImpl &RemovedBlocks) { // If CmpMI was a terminator, we need a new conditional branch to replace it. // This now becomes a Head terminator. if (isZBranch) { - bool isNZ = CmpMI->getOpcode() == ARM64::CBNZW || - CmpMI->getOpcode() == ARM64::CBNZX; - BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(ARM64::Bcc)) - .addImm(isNZ ? ARM64CC::NE : ARM64CC::EQ) + bool isNZ = CmpMI->getOpcode() == AArch64::CBNZW || + CmpMI->getOpcode() == AArch64::CBNZX; + BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(AArch64::Bcc)) + .addImm(isNZ ? AArch64CC::NE : AArch64CC::EQ) .addOperand(CmpMI->getOperand(1)); // Branch target. } CmpMI->eraseFromParent(); @@ -687,10 +687,10 @@ int SSACCmpConv::expectedCodeSizeDelta() const { // plus a branch instruction. if (HeadCond[0].getImm() == -1) { switch (HeadCond[1].getImm()) { - case ARM64::CBZW: - case ARM64::CBNZW: - case ARM64::CBZX: - case ARM64::CBNZX: + case AArch64::CBZW: + case AArch64::CBNZW: + case AArch64::CBZX: + case AArch64::CBNZX: // Therefore delta += 1 delta = 1; break; @@ -706,21 +706,21 @@ int SSACCmpConv::expectedCodeSizeDelta() const { default: --delta; break; - case ARM64::CBZW: - case ARM64::CBNZW: - case ARM64::CBZX: - case ARM64::CBNZX: + case AArch64::CBZW: + case AArch64::CBNZW: + case AArch64::CBZX: + case AArch64::CBNZX: break; } return delta; } //===----------------------------------------------------------------------===// -// ARM64ConditionalCompares Pass +// AArch64ConditionalCompares Pass //===----------------------------------------------------------------------===// namespace { -class ARM64ConditionalCompares : public MachineFunctionPass { +class AArch64ConditionalCompares : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const MCSchedModel *SchedModel; @@ -735,11 +735,11 @@ class ARM64ConditionalCompares : public MachineFunctionPass { public: static char ID; - ARM64ConditionalCompares() : MachineFunctionPass(ID) {} + AArch64ConditionalCompares() : MachineFunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnMachineFunction(MachineFunction &MF) override; const char *getPassName() const override { - return "ARM64 Conditional Compares"; + return "AArch64 Conditional Compares"; } private: @@ -751,25 +751,25 @@ class ARM64ConditionalCompares : public MachineFunctionPass { }; } // end anonymous namespace -char ARM64ConditionalCompares::ID = 0; +char AArch64ConditionalCompares::ID = 0; namespace llvm { -void initializeARM64ConditionalComparesPass(PassRegistry &); +void initializeAArch64ConditionalComparesPass(PassRegistry &); } -INITIALIZE_PASS_BEGIN(ARM64ConditionalCompares, "arm64-ccmp", "ARM64 CCMP Pass", - false, false) +INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp", + "AArch64 CCMP Pass", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) -INITIALIZE_PASS_END(ARM64ConditionalCompares, "arm64-ccmp", "ARM64 CCMP Pass", - false, false) +INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp", + "AArch64 CCMP Pass", false, false) -FunctionPass *llvm::createARM64ConditionalCompares() { - return new ARM64ConditionalCompares(); +FunctionPass *llvm::createAArch64ConditionalCompares() { + return new AArch64ConditionalCompares(); } -void ARM64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const { +void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.addPreserved(); @@ -781,8 +781,8 @@ void ARM64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const { } /// Update the dominator tree after if-conversion erased some blocks. -void -ARM64ConditionalCompares::updateDomTree(ArrayRef Removed) { +void AArch64ConditionalCompares::updateDomTree( + ArrayRef Removed) { // convert() removes CmpBB which was previously dominated by Head. // CmpBB children should be transferred to Head. MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head); @@ -798,7 +798,7 @@ ARM64ConditionalCompares::updateDomTree(ArrayRef Removed) { /// Update LoopInfo after if-conversion. void -ARM64ConditionalCompares::updateLoops(ArrayRef Removed) { +AArch64ConditionalCompares::updateLoops(ArrayRef Removed) { if (!Loops) return; for (unsigned i = 0, e = Removed.size(); i != e; ++i) @@ -806,7 +806,7 @@ ARM64ConditionalCompares::updateLoops(ArrayRef Removed) { } /// Invalidate MachineTraceMetrics before if-conversion. -void ARM64ConditionalCompares::invalidateTraces() { +void AArch64ConditionalCompares::invalidateTraces() { Traces->invalidate(CmpConv.Head); Traces->invalidate(CmpConv.CmpBB); } @@ -814,7 +814,7 @@ void ARM64ConditionalCompares::invalidateTraces() { /// Apply cost model and heuristics to the if-conversion in IfConv. /// Return true if the conversion is a good idea. /// -bool ARM64ConditionalCompares::shouldConvert() { +bool AArch64ConditionalCompares::shouldConvert() { // Stress testing mode disables all cost considerations. if (Stress) return true; @@ -875,7 +875,7 @@ bool ARM64ConditionalCompares::shouldConvert() { return true; } -bool ARM64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) { +bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) { bool Changed = false; while (CmpConv.canConvert(MBB) && shouldConvert()) { invalidateTraces(); @@ -888,8 +888,8 @@ bool ARM64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) { return Changed; } -bool ARM64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "********** ARM64 Conditional Compares **********\n" +bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n" << "********** Function: " << MF.getName() << '\n'); TII = MF.getTarget().getInstrInfo(); TRI = MF.getTarget().getRegisterInfo(); diff --git a/lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp similarity index 79% rename from lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp rename to lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp index e8f03ec833f7..a2d853c85fef 100644 --- a/lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp +++ b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp @@ -1,4 +1,4 @@ -//===-- ARM64DeadRegisterDefinitions.cpp - Replace dead defs w/ zero reg --===// +//==-- AArch64DeadRegisterDefinitions.cpp - Replace dead defs w/ zero reg --==// // // The LLVM Compiler Infrastructure // @@ -11,8 +11,8 @@ // hardware's register renamer. //===----------------------------------------------------------------------===// -#include "ARM64.h" -#include "ARM64RegisterInfo.h" +#include "AArch64.h" +#include "AArch64RegisterInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFunction.h" @@ -21,12 +21,12 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -#define DEBUG_TYPE "arm64-dead-defs" +#define DEBUG_TYPE "aarch64-dead-defs" STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced"); namespace { -class ARM64DeadRegisterDefinitions : public MachineFunctionPass { +class AArch64DeadRegisterDefinitions : public MachineFunctionPass { private: const TargetRegisterInfo *TRI; bool implicitlyDefinesOverlappingReg(unsigned Reg, const MachineInstr &MI); @@ -34,7 +34,7 @@ class ARM64DeadRegisterDefinitions : public MachineFunctionPass { bool usesFrameIndex(const MachineInstr &MI); public: static char ID; // Pass identification, replacement for typeid. - explicit ARM64DeadRegisterDefinitions() : MachineFunctionPass(ID) {} + explicit AArch64DeadRegisterDefinitions() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &F) override; @@ -45,10 +45,10 @@ class ARM64DeadRegisterDefinitions : public MachineFunctionPass { MachineFunctionPass::getAnalysisUsage(AU); } }; -char ARM64DeadRegisterDefinitions::ID = 0; +char AArch64DeadRegisterDefinitions::ID = 0; } // end anonymous namespace -bool ARM64DeadRegisterDefinitions::implicitlyDefinesOverlappingReg( +bool AArch64DeadRegisterDefinitions::implicitlyDefinesOverlappingReg( unsigned Reg, const MachineInstr &MI) { for (const MachineOperand &MO : MI.implicit_operands()) if (MO.isReg() && MO.isDef()) @@ -57,15 +57,15 @@ bool ARM64DeadRegisterDefinitions::implicitlyDefinesOverlappingReg( return false; } -bool ARM64DeadRegisterDefinitions::usesFrameIndex(const MachineInstr &MI) { +bool AArch64DeadRegisterDefinitions::usesFrameIndex(const MachineInstr &MI) { for (const MachineOperand &Op : MI.uses()) if (Op.isFI()) return true; return false; } -bool -ARM64DeadRegisterDefinitions::processMachineBasicBlock(MachineBasicBlock &MBB) { +bool AArch64DeadRegisterDefinitions::processMachineBasicBlock( + MachineBasicBlock &MBB) { bool Changed = false; for (MachineInstr &MI : MBB) { if (usesFrameIndex(MI)) { @@ -99,11 +99,11 @@ ARM64DeadRegisterDefinitions::processMachineBasicBlock(MachineBasicBlock &MBB) { default: DEBUG(dbgs() << " Ignoring, register is not a GPR.\n"); continue; - case ARM64::GPR32RegClassID: - NewReg = ARM64::WZR; + case AArch64::GPR32RegClassID: + NewReg = AArch64::WZR; break; - case ARM64::GPR64RegClassID: - NewReg = ARM64::XZR; + case AArch64::GPR64RegClassID: + NewReg = AArch64::XZR; break; } DEBUG(dbgs() << " Replacing with zero register. New:\n "); @@ -118,10 +118,10 @@ ARM64DeadRegisterDefinitions::processMachineBasicBlock(MachineBasicBlock &MBB) { // Scan the function for instructions that have a dead definition of a // register. Replace that register with the zero register when possible. -bool ARM64DeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) { +bool AArch64DeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) { TRI = MF.getTarget().getRegisterInfo(); bool Changed = false; - DEBUG(dbgs() << "***** ARM64DeadRegisterDefinitions *****\n"); + DEBUG(dbgs() << "***** AArch64DeadRegisterDefinitions *****\n"); for (auto &MBB : MF) if (processMachineBasicBlock(MBB)) @@ -129,6 +129,6 @@ bool ARM64DeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) { return Changed; } -FunctionPass *llvm::createARM64DeadRegisterDefinitions() { - return new ARM64DeadRegisterDefinitions(); +FunctionPass *llvm::createAArch64DeadRegisterDefinitions() { + return new AArch64DeadRegisterDefinitions(); } diff --git a/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp similarity index 76% rename from lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp rename to lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index a4b5d31314ef..a76fd76e5ed4 100644 --- a/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp +++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -1,4 +1,4 @@ -//===-- ARM64ExpandPseudoInsts.cpp - Expand pseudo instructions ---*- C++ -*-=// +//==-- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions --*- C++ -*-=// // // The LLVM Compiler Infrastructure // @@ -14,25 +14,25 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "ARM64InstrInfo.h" +#include "MCTargetDesc/AArch64AddressingModes.h" +#include "AArch64InstrInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Support/MathExtras.h" using namespace llvm; namespace { -class ARM64ExpandPseudo : public MachineFunctionPass { +class AArch64ExpandPseudo : public MachineFunctionPass { public: static char ID; - ARM64ExpandPseudo() : MachineFunctionPass(ID) {} + AArch64ExpandPseudo() : MachineFunctionPass(ID) {} - const ARM64InstrInfo *TII; + const AArch64InstrInfo *TII; bool runOnMachineFunction(MachineFunction &Fn) override; const char *getPassName() const override { - return "ARM64 pseudo instruction expansion pass"; + return "AArch64 pseudo instruction expansion pass"; } private: @@ -41,7 +41,7 @@ class ARM64ExpandPseudo : public MachineFunctionPass { bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned BitSize); }; -char ARM64ExpandPseudo::ID = 0; +char AArch64ExpandPseudo::ID = 0; } /// \brief Transfer implicit operands on the pseudo instruction to the @@ -87,17 +87,17 @@ static uint64_t replicateChunk(uint64_t Imm, unsigned FromIdx, unsigned ToIdx) { static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - const ARM64InstrInfo *TII, unsigned ChunkIdx) { + const AArch64InstrInfo *TII, unsigned ChunkIdx) { assert(ChunkIdx < 4 && "Out of range chunk index specified!"); const unsigned ShiftAmt = ChunkIdx * 16; uint64_t Encoding; - if (ARM64_AM::processLogicalImmediate(OrrImm, 64, Encoding)) { + if (AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding)) { // Create the ORR-immediate instruction. MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri)) + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri)) .addOperand(MI.getOperand(0)) - .addReg(ARM64::XZR) + .addReg(AArch64::XZR) .addImm(Encoding); // Create the MOVK instruction. @@ -105,11 +105,11 @@ static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI, const unsigned DstReg = MI.getOperand(0).getReg(); const bool DstIsDead = MI.getOperand(0).isDead(); MachineInstrBuilder MIB1 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi)) + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) .addReg(DstReg) .addImm(Imm16) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt)); + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt)); transferImpOps(MI, MIB, MIB1); MI.eraseFromParent(); @@ -124,7 +124,7 @@ static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI, static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) { Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk; - return ARM64_AM::processLogicalImmediate(Chunk, 64, Encoding); + return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding); } /// \brief Check for identical 16-bit chunks within the constant and if so @@ -138,7 +138,7 @@ static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) { static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - const ARM64InstrInfo *TII) { + const AArch64InstrInfo *TII) { typedef DenseMap CountMap; CountMap Counts; @@ -162,9 +162,9 @@ static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI, const bool CountThree = Count == 3; // Create the ORR-immediate instruction. MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri)) + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri)) .addOperand(MI.getOperand(0)) - .addReg(ARM64::XZR) + .addReg(AArch64::XZR) .addImm(Encoding); const unsigned DstReg = MI.getOperand(0).getReg(); @@ -182,12 +182,12 @@ static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI, // Create the first MOVK instruction. MachineInstrBuilder MIB1 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi)) + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead && CountThree)) .addReg(DstReg) .addImm(Imm16) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt)); + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt)); // In case we have three instances the whole constant is now materialized // and we can exit. @@ -207,11 +207,11 @@ static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI, // Create the second MOVK instruction. MachineInstrBuilder MIB2 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi)) + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) .addReg(DstReg) .addImm(Imm16) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt)); + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt)); transferImpOps(MI, MIB, MIB2); MI.eraseFromParent(); @@ -272,7 +272,7 @@ static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) { static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - const ARM64InstrInfo *TII) { + const AArch64InstrInfo *TII) { const int NotSet = -1; const uint64_t Mask = 0xFFFF; @@ -343,11 +343,11 @@ static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI, // Create the ORR-immediate instruction. uint64_t Encoding = 0; - ARM64_AM::processLogicalImmediate(OrrImm, 64, Encoding); + AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding); MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri)) + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri)) .addOperand(MI.getOperand(0)) - .addReg(ARM64::XZR) + .addReg(AArch64::XZR) .addImm(Encoding); const unsigned DstReg = MI.getOperand(0).getReg(); @@ -356,12 +356,13 @@ static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI, const bool SingleMovk = SecondMovkIdx == NotSet; // Create the first MOVK instruction. MachineInstrBuilder MIB1 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi)) + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead && SingleMovk)) .addReg(DstReg) .addImm(getChunk(UImm, FirstMovkIdx)) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, FirstMovkIdx * 16)); + .addImm( + AArch64_AM::getShifterImm(AArch64_AM::LSL, FirstMovkIdx * 16)); // Early exit in case we only need to emit a single MOVK instruction. if (SingleMovk) { @@ -372,11 +373,12 @@ static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI, // Create the second MOVK instruction. MachineInstrBuilder MIB2 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi)) + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) .addReg(DstReg) .addImm(getChunk(UImm, SecondMovkIdx)) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, SecondMovkIdx * 16)); + .addImm( + AArch64_AM::getShifterImm(AArch64_AM::LSL, SecondMovkIdx * 16)); transferImpOps(MI, MIB, MIB2); MI.eraseFromParent(); @@ -385,9 +387,9 @@ static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI, /// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more /// real move-immediate instructions to synthesize the immediate. -bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned BitSize) { +bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned BitSize) { MachineInstr &MI = *MBBI; uint64_t Imm = MI.getOperand(1).getImm(); const unsigned Mask = 0xFFFF; @@ -395,12 +397,12 @@ bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, // Try a MOVI instruction (aka ORR-immediate with the zero register). uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); uint64_t Encoding; - if (ARM64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { - unsigned Opc = (BitSize == 32 ? ARM64::ORRWri : ARM64::ORRXri); + if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { + unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) .addOperand(MI.getOperand(0)) - .addReg(BitSize == 32 ? ARM64::WZR : ARM64::XZR) + .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) .addImm(Encoding); transferImpOps(MI, MIB, MIB); MI.eraseFromParent(); @@ -504,9 +506,9 @@ bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, unsigned FirstOpc; if (BitSize == 32) { Imm &= (1LL << 32) - 1; - FirstOpc = (isNeg ? ARM64::MOVNWi : ARM64::MOVZWi); + FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi); } else { - FirstOpc = (isNeg ? ARM64::MOVNXi : ARM64::MOVZXi); + FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi); } unsigned Shift = 0; // LSL amount for high bits with MOVZ/MOVN unsigned LastShift = 0; // LSL amount for last MOVK @@ -524,7 +526,7 @@ bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead && Shift == LastShift)) .addImm(Imm16) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, Shift)); + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift)); // If a MOVN was used for the high bits of a negative value, flip the rest // of the bits back for use with MOVK. @@ -538,7 +540,7 @@ bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, } MachineInstrBuilder MIB2; - unsigned Opc = (BitSize == 32 ? ARM64::MOVKWi : ARM64::MOVKXi); + unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi); while (Shift != LastShift) { Shift -= 16; Imm16 = (Imm >> Shift) & Mask; @@ -550,7 +552,7 @@ bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, getDeadRegState(DstIsDead && Shift == LastShift)) .addReg(DstReg) .addImm(Imm16) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, Shift)); + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift)); } transferImpOps(MI, MIB1, MIB2); @@ -560,7 +562,7 @@ bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, /// \brief If MBBI references a pseudo instruction that should be expanded here, /// do the expansion and return true. Otherwise return false. -bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB, +bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { MachineInstr &MI = *MBBI; unsigned Opcode = MI.getOpcode(); @@ -568,75 +570,76 @@ bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB, default: break; - case ARM64::ADDWrr: - case ARM64::SUBWrr: - case ARM64::ADDXrr: - case ARM64::SUBXrr: - case ARM64::ADDSWrr: - case ARM64::SUBSWrr: - case ARM64::ADDSXrr: - case ARM64::SUBSXrr: - case ARM64::ANDWrr: - case ARM64::ANDXrr: - case ARM64::BICWrr: - case ARM64::BICXrr: - case ARM64::ANDSWrr: - case ARM64::ANDSXrr: - case ARM64::BICSWrr: - case ARM64::BICSXrr: - case ARM64::EONWrr: - case ARM64::EONXrr: - case ARM64::EORWrr: - case ARM64::EORXrr: - case ARM64::ORNWrr: - case ARM64::ORNXrr: - case ARM64::ORRWrr: - case ARM64::ORRXrr: { + case AArch64::ADDWrr: + case AArch64::SUBWrr: + case AArch64::ADDXrr: + case AArch64::SUBXrr: + case AArch64::ADDSWrr: + case AArch64::SUBSWrr: + case AArch64::ADDSXrr: + case AArch64::SUBSXrr: + case AArch64::ANDWrr: + case AArch64::ANDXrr: + case AArch64::BICWrr: + case AArch64::BICXrr: + case AArch64::ANDSWrr: + case AArch64::ANDSXrr: + case AArch64::BICSWrr: + case AArch64::BICSXrr: + case AArch64::EONWrr: + case AArch64::EONXrr: + case AArch64::EORWrr: + case AArch64::EORXrr: + case AArch64::ORNWrr: + case AArch64::ORNXrr: + case AArch64::ORRWrr: + case AArch64::ORRXrr: { unsigned Opcode; switch (MI.getOpcode()) { default: return false; - case ARM64::ADDWrr: Opcode = ARM64::ADDWrs; break; - case ARM64::SUBWrr: Opcode = ARM64::SUBWrs; break; - case ARM64::ADDXrr: Opcode = ARM64::ADDXrs; break; - case ARM64::SUBXrr: Opcode = ARM64::SUBXrs; break; - case ARM64::ADDSWrr: Opcode = ARM64::ADDSWrs; break; - case ARM64::SUBSWrr: Opcode = ARM64::SUBSWrs; break; - case ARM64::ADDSXrr: Opcode = ARM64::ADDSXrs; break; - case ARM64::SUBSXrr: Opcode = ARM64::SUBSXrs; break; - case ARM64::ANDWrr: Opcode = ARM64::ANDWrs; break; - case ARM64::ANDXrr: Opcode = ARM64::ANDXrs; break; - case ARM64::BICWrr: Opcode = ARM64::BICWrs; break; - case ARM64::BICXrr: Opcode = ARM64::BICXrs; break; - case ARM64::ANDSWrr: Opcode = ARM64::ANDSWrs; break; - case ARM64::ANDSXrr: Opcode = ARM64::ANDSXrs; break; - case ARM64::BICSWrr: Opcode = ARM64::BICSWrs; break; - case ARM64::BICSXrr: Opcode = ARM64::BICSXrs; break; - case ARM64::EONWrr: Opcode = ARM64::EONWrs; break; - case ARM64::EONXrr: Opcode = ARM64::EONXrs; break; - case ARM64::EORWrr: Opcode = ARM64::EORWrs; break; - case ARM64::EORXrr: Opcode = ARM64::EORXrs; break; - case ARM64::ORNWrr: Opcode = ARM64::ORNWrs; break; - case ARM64::ORNXrr: Opcode = ARM64::ORNXrs; break; - case ARM64::ORRWrr: Opcode = ARM64::ORRWrs; break; - case ARM64::ORRXrr: Opcode = ARM64::ORRXrs; break; + case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break; + case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break; + case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break; + case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break; + case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break; + case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break; + case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break; + case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break; + case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break; + case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break; + case AArch64::BICWrr: Opcode = AArch64::BICWrs; break; + case AArch64::BICXrr: Opcode = AArch64::BICXrs; break; + case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break; + case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break; + case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break; + case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break; + case AArch64::EONWrr: Opcode = AArch64::EONWrs; break; + case AArch64::EONXrr: Opcode = AArch64::EONXrs; break; + case AArch64::EORWrr: Opcode = AArch64::EORWrs; break; + case AArch64::EORXrr: Opcode = AArch64::EORXrs; break; + case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break; + case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break; + case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break; + case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break; } MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode), MI.getOperand(0).getReg()) .addOperand(MI.getOperand(1)) .addOperand(MI.getOperand(2)) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0)); + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); transferImpOps(MI, MIB1, MIB1); MI.eraseFromParent(); return true; } - case ARM64::FCVTSHpseudo: { + case AArch64::FCVTSHpseudo: { MachineOperand Src = MI.getOperand(1); Src.setImplicit(); - unsigned SrcH = TII->getRegisterInfo().getSubReg(Src.getReg(), ARM64::hsub); - auto MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::FCVTSHr)) + unsigned SrcH = + TII->getRegisterInfo().getSubReg(Src.getReg(), AArch64::hsub); + auto MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::FCVTSHr)) .addOperand(MI.getOperand(0)) .addReg(SrcH, RegState::Undef) .addOperand(Src); @@ -644,33 +647,34 @@ bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB, MI.eraseFromParent(); return true; } - case ARM64::LOADgot: { + case AArch64::LOADgot: { // Expand into ADRP + LDR. unsigned DstReg = MI.getOperand(0).getReg(); const MachineOperand &MO1 = MI.getOperand(1); unsigned Flags = MO1.getTargetFlags(); MachineInstrBuilder MIB1 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADRP), DstReg); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg); MachineInstrBuilder MIB2 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::LDRXui)) + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui)) .addOperand(MI.getOperand(0)) .addReg(DstReg); if (MO1.isGlobal()) { - MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | ARM64II::MO_PAGE); + MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE); MIB2.addGlobalAddress(MO1.getGlobal(), 0, - Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC); + Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); } else if (MO1.isSymbol()) { - MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | ARM64II::MO_PAGE); + MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE); MIB2.addExternalSymbol(MO1.getSymbolName(), - Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC); + Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); } else { assert(MO1.isCPI() && "Only expect globals, externalsymbols, or constant pools"); MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), - Flags | ARM64II::MO_PAGE); + Flags | AArch64II::MO_PAGE); MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), - Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC); + Flags | AArch64II::MO_PAGEOFF | + AArch64II::MO_NC); } transferImpOps(MI, MIB1, MIB2); @@ -678,20 +682,20 @@ bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB, return true; } - case ARM64::MOVaddr: - case ARM64::MOVaddrJT: - case ARM64::MOVaddrCP: - case ARM64::MOVaddrBA: - case ARM64::MOVaddrTLS: - case ARM64::MOVaddrEXT: { + case AArch64::MOVaddr: + case AArch64::MOVaddrJT: + case AArch64::MOVaddrCP: + case AArch64::MOVaddrBA: + case AArch64::MOVaddrTLS: + case AArch64::MOVaddrEXT: { // Expand into ADRP + ADD. unsigned DstReg = MI.getOperand(0).getReg(); MachineInstrBuilder MIB1 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADRP), DstReg) + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) .addOperand(MI.getOperand(1)); MachineInstrBuilder MIB2 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADDXri)) + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) .addOperand(MI.getOperand(0)) .addReg(DstReg) .addOperand(MI.getOperand(2)) @@ -702,13 +706,13 @@ bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB, return true; } - case ARM64::MOVi32imm: + case AArch64::MOVi32imm: return expandMOVImm(MBB, MBBI, 32); - case ARM64::MOVi64imm: + case AArch64::MOVi64imm: return expandMOVImm(MBB, MBBI, 64); - case ARM64::RET_ReallyLR: - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::RET)) - .addReg(ARM64::LR); + case AArch64::RET_ReallyLR: + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET)) + .addReg(AArch64::LR); MI.eraseFromParent(); return true; } @@ -717,7 +721,7 @@ bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB, /// \brief Iterate over the instructions in basic block MBB and expand any /// pseudo instructions. Return true if anything was modified. -bool ARM64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { +bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { bool Modified = false; MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); @@ -730,8 +734,8 @@ bool ARM64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { return Modified; } -bool ARM64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { - TII = static_cast(MF.getTarget().getInstrInfo()); +bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { + TII = static_cast(MF.getTarget().getInstrInfo()); bool Modified = false; for (auto &MBB : MF) @@ -740,6 +744,6 @@ bool ARM64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { } /// \brief Returns an instance of the pseudo instruction expansion pass. -FunctionPass *llvm::createARM64ExpandPseudoPass() { - return new ARM64ExpandPseudo(); +FunctionPass *llvm::createAArch64ExpandPseudoPass() { + return new AArch64ExpandPseudo(); } diff --git a/lib/Target/ARM64/ARM64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp similarity index 78% rename from lib/Target/ARM64/ARM64FastISel.cpp rename to lib/Target/AArch64/AArch64FastISel.cpp index f4bf616559a8..58178b1a48bb 100644 --- a/lib/Target/ARM64/ARM64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -1,4 +1,4 @@ -//===-- ARM6464FastISel.cpp - ARM64 FastISel implementation ---------------===// +//===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===// // // The LLVM Compiler Infrastructure // @@ -7,17 +7,17 @@ // //===----------------------------------------------------------------------===// // -// This file defines the ARM64-specific support for the FastISel class. Some +// This file defines the AArch64-specific support for the FastISel class. Some // of the target-specific code is generated by tablegen in the file -// ARM64GenFastISel.inc, which is #included here. +// AArch64GenFastISel.inc, which is #included here. // //===----------------------------------------------------------------------===// -#include "ARM64.h" -#include "ARM64TargetMachine.h" -#include "ARM64Subtarget.h" -#include "ARM64CallingConv.h" -#include "MCTargetDesc/ARM64AddressingModes.h" +#include "AArch64.h" +#include "AArch64TargetMachine.h" +#include "AArch64Subtarget.h" +#include "AArch64CallingConv.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -40,7 +40,7 @@ using namespace llvm; namespace { -class ARM64FastISel : public FastISel { +class AArch64FastISel : public FastISel { class Address { public: @@ -85,9 +85,9 @@ class ARM64FastISel : public FastISel { bool isValid() { return isFIBase() || (isRegBase() && getReg() != 0); } }; - /// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can + /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can /// make the right decision when generating code for different targets. - const ARM64Subtarget *Subtarget; + const AArch64Subtarget *Subtarget; LLVMContext *Context; private: @@ -130,8 +130,8 @@ class ARM64FastISel : public FastISel { unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); - unsigned ARM64MaterializeFP(const ConstantFP *CFP, MVT VT); - unsigned ARM64MaterializeGV(const GlobalValue *GV); + unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT); + unsigned AArch64MaterializeGV(const GlobalValue *GV); // Call handling routines. private: @@ -150,29 +150,29 @@ class ARM64FastISel : public FastISel { unsigned TargetMaterializeAlloca(const AllocaInst *AI) override; unsigned TargetMaterializeConstant(const Constant *C) override; - explicit ARM64FastISel(FunctionLoweringInfo &funcInfo, + explicit AArch64FastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) : FastISel(funcInfo, libInfo) { - Subtarget = &TM.getSubtarget(); + Subtarget = &TM.getSubtarget(); Context = &funcInfo.Fn->getContext(); } bool TargetSelectInstruction(const Instruction *I) override; -#include "ARM64GenFastISel.inc" +#include "AArch64GenFastISel.inc" }; } // end anonymous namespace -#include "ARM64GenCallingConv.inc" +#include "AArch64GenCallingConv.inc" -CCAssignFn *ARM64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { +CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { if (CC == CallingConv::WebKit_JS) - return CC_ARM64_WebKit_JS; - return Subtarget->isTargetDarwin() ? CC_ARM64_DarwinPCS : CC_ARM64_AAPCS; + return CC_AArch64_WebKit_JS; + return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS; } -unsigned ARM64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) { +unsigned AArch64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) { assert(TLI.getValueType(AI->getType(), true) == MVT::i64 && "Alloca should always return a pointer."); @@ -184,8 +184,8 @@ unsigned ARM64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) { FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) { - unsigned ResultReg = createResultReg(&ARM64::GPR64RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADDXri), + unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), ResultReg) .addFrameIndex(SI->second) .addImm(0) @@ -196,7 +196,7 @@ unsigned ARM64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) { return 0; } -unsigned ARM64FastISel::ARM64MaterializeFP(const ConstantFP *CFP, MVT VT) { +unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) { if (VT != MVT::f32 && VT != MVT::f64) return 0; @@ -209,11 +209,11 @@ unsigned ARM64FastISel::ARM64MaterializeFP(const ConstantFP *CFP, MVT VT) { int Imm; unsigned Opc; if (is64bit) { - Imm = ARM64_AM::getFP64Imm(Val); - Opc = ARM64::FMOVDi; + Imm = AArch64_AM::getFP64Imm(Val); + Opc = AArch64::FMOVDi; } else { - Imm = ARM64_AM::getFP32Imm(Val); - Opc = ARM64::FMOVSi; + Imm = AArch64_AM::getFP32Imm(Val); + Opc = AArch64::FMOVSi; } unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) @@ -228,19 +228,19 @@ unsigned ARM64FastISel::ARM64MaterializeFP(const ConstantFP *CFP, MVT VT) { Align = DL.getTypeAllocSize(CFP->getType()); unsigned Idx = MCP.getConstantPoolIndex(cast(CFP), Align); - unsigned ADRPReg = createResultReg(&ARM64::GPR64commonRegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADRP), - ADRPReg).addConstantPoolIndex(Idx, 0, ARM64II::MO_PAGE); + unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), + ADRPReg).addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGE); - unsigned Opc = is64bit ? ARM64::LDRDui : ARM64::LDRSui; + unsigned Opc = is64bit ? AArch64::LDRDui : AArch64::LDRSui; unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addReg(ADRPReg) - .addConstantPoolIndex(Idx, 0, ARM64II::MO_PAGEOFF | ARM64II::MO_NC); + .addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); return ResultReg; } -unsigned ARM64FastISel::ARM64MaterializeGV(const GlobalValue *GV) { +unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) { // We can't handle thread-local variables quickly yet. Unfortunately we have // to peer through any aliases to find out if that rule applies. const GlobalValue *TLSGV = GV; @@ -257,37 +257,37 @@ unsigned ARM64FastISel::ARM64MaterializeGV(const GlobalValue *GV) { if (!DestEVT.isSimple()) return 0; - unsigned ADRPReg = createResultReg(&ARM64::GPR64commonRegClass); + unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); unsigned ResultReg; - if (OpFlags & ARM64II::MO_GOT) { + if (OpFlags & AArch64II::MO_GOT) { // ADRP + LDRX - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADRP), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), ADRPReg) - .addGlobalAddress(GV, 0, ARM64II::MO_GOT | ARM64II::MO_PAGE); + .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE); - ResultReg = createResultReg(&ARM64::GPR64RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::LDRXui), + ResultReg = createResultReg(&AArch64::GPR64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui), ResultReg) .addReg(ADRPReg) - .addGlobalAddress(GV, 0, ARM64II::MO_GOT | ARM64II::MO_PAGEOFF | - ARM64II::MO_NC); + .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | + AArch64II::MO_NC); } else { // ADRP + ADDX - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADRP), - ADRPReg).addGlobalAddress(GV, 0, ARM64II::MO_PAGE); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), + ADRPReg).addGlobalAddress(GV, 0, AArch64II::MO_PAGE); - ResultReg = createResultReg(&ARM64::GPR64spRegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADDXri), + ResultReg = createResultReg(&AArch64::GPR64spRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), ResultReg) .addReg(ADRPReg) - .addGlobalAddress(GV, 0, ARM64II::MO_PAGEOFF | ARM64II::MO_NC) + .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC) .addImm(0); } return ResultReg; } -unsigned ARM64FastISel::TargetMaterializeConstant(const Constant *C) { +unsigned AArch64FastISel::TargetMaterializeConstant(const Constant *C) { EVT CEVT = TLI.getValueType(C->getType(), true); // Only handle simple types. @@ -297,15 +297,15 @@ unsigned ARM64FastISel::TargetMaterializeConstant(const Constant *C) { // FIXME: Handle ConstantInt. if (const ConstantFP *CFP = dyn_cast(C)) - return ARM64MaterializeFP(CFP, VT); + return AArch64MaterializeFP(CFP, VT); else if (const GlobalValue *GV = dyn_cast(C)) - return ARM64MaterializeGV(GV); + return AArch64MaterializeGV(GV); return 0; } // Computes the address to get to an object. -bool ARM64FastISel::ComputeAddress(const Value *Obj, Address &Addr) { +bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) { const User *U = nullptr; unsigned Opcode = Instruction::UserOp1; if (const Instruction *I = dyn_cast(Obj)) { @@ -413,7 +413,7 @@ bool ARM64FastISel::ComputeAddress(const Value *Obj, Address &Addr) { return Addr.isValid(); } -bool ARM64FastISel::isTypeLegal(Type *Ty, MVT &VT) { +bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { EVT evt = TLI.getValueType(Ty, true); // Only handle simple types. @@ -430,7 +430,7 @@ bool ARM64FastISel::isTypeLegal(Type *Ty, MVT &VT) { return TLI.isTypeLegal(VT); } -bool ARM64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) { +bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) { if (isTypeLegal(Ty, VT)) return true; @@ -442,8 +442,8 @@ bool ARM64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) { return false; } -bool ARM64FastISel::SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor, - bool UseUnscaled) { +bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT, + int64_t ScaleFactor, bool UseUnscaled) { bool needsLowering = false; int64_t Offset = Addr.getOffset(); switch (VT.SimpleTy) { @@ -486,9 +486,9 @@ bool ARM64FastISel::SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor, return true; } -void ARM64FastISel::AddLoadStoreOperands(Address &Addr, - const MachineInstrBuilder &MIB, - unsigned Flags, bool UseUnscaled) { +void AArch64FastISel::AddLoadStoreOperands(Address &Addr, + const MachineInstrBuilder &MIB, + unsigned Flags, bool UseUnscaled) { int64_t Offset = Addr.getOffset(); // Frame base works a bit differently. Handle it separately. if (Addr.getKind() == Address::FrameIndexBase) { @@ -507,8 +507,8 @@ void ARM64FastISel::AddLoadStoreOperands(Address &Addr, } } -bool ARM64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr, - bool UseUnscaled) { +bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr, + bool UseUnscaled) { // Negative offsets require unscaled, 9-bit, signed immediate offsets. // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. if (!UseUnscaled && Addr.getOffset() < 0) @@ -525,32 +525,32 @@ bool ARM64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr, VTIsi1 = true; // Intentional fall-through. case MVT::i8: - Opc = UseUnscaled ? ARM64::LDURBBi : ARM64::LDRBBui; - RC = &ARM64::GPR32RegClass; + Opc = UseUnscaled ? AArch64::LDURBBi : AArch64::LDRBBui; + RC = &AArch64::GPR32RegClass; ScaleFactor = 1; break; case MVT::i16: - Opc = UseUnscaled ? ARM64::LDURHHi : ARM64::LDRHHui; - RC = &ARM64::GPR32RegClass; + Opc = UseUnscaled ? AArch64::LDURHHi : AArch64::LDRHHui; + RC = &AArch64::GPR32RegClass; ScaleFactor = 2; break; case MVT::i32: - Opc = UseUnscaled ? ARM64::LDURWi : ARM64::LDRWui; - RC = &ARM64::GPR32RegClass; + Opc = UseUnscaled ? AArch64::LDURWi : AArch64::LDRWui; + RC = &AArch64::GPR32RegClass; ScaleFactor = 4; break; case MVT::i64: - Opc = UseUnscaled ? ARM64::LDURXi : ARM64::LDRXui; - RC = &ARM64::GPR64RegClass; + Opc = UseUnscaled ? AArch64::LDURXi : AArch64::LDRXui; + RC = &AArch64::GPR64RegClass; ScaleFactor = 8; break; case MVT::f32: - Opc = UseUnscaled ? ARM64::LDURSi : ARM64::LDRSui; + Opc = UseUnscaled ? AArch64::LDURSi : AArch64::LDRSui; RC = TLI.getRegClassFor(VT); ScaleFactor = 4; break; case MVT::f64: - Opc = UseUnscaled ? ARM64::LDURDi : ARM64::LDRDui; + Opc = UseUnscaled ? AArch64::LDURDi : AArch64::LDRDui; RC = TLI.getRegClassFor(VT); ScaleFactor = 8; break; @@ -577,18 +577,18 @@ bool ARM64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr, // Loading an i1 requires special handling. if (VTIsi1) { - MRI.constrainRegClass(ResultReg, &ARM64::GPR32RegClass); - unsigned ANDReg = createResultReg(&ARM64::GPR32spRegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri), + MRI.constrainRegClass(ResultReg, &AArch64::GPR32RegClass); + unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri), ANDReg) .addReg(ResultReg) - .addImm(ARM64_AM::encodeLogicalImmediate(1, 32)); + .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); ResultReg = ANDReg; } return true; } -bool ARM64FastISel::SelectLoad(const Instruction *I) { +bool AArch64FastISel::SelectLoad(const Instruction *I) { MVT VT; // Verify we have a legal type before going any further. Currently, we handle // simple types that will directly fit in a register (i32/f32/i64/f64) or @@ -609,8 +609,8 @@ bool ARM64FastISel::SelectLoad(const Instruction *I) { return true; } -bool ARM64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr, - bool UseUnscaled) { +bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr, + bool UseUnscaled) { // Negative offsets require unscaled, 9-bit, signed immediate offsets. // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. if (!UseUnscaled && Addr.getOffset() < 0) @@ -626,27 +626,27 @@ bool ARM64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr, case MVT::i1: VTIsi1 = true; case MVT::i8: - StrOpc = UseUnscaled ? ARM64::STURBBi : ARM64::STRBBui; + StrOpc = UseUnscaled ? AArch64::STURBBi : AArch64::STRBBui; ScaleFactor = 1; break; case MVT::i16: - StrOpc = UseUnscaled ? ARM64::STURHHi : ARM64::STRHHui; + StrOpc = UseUnscaled ? AArch64::STURHHi : AArch64::STRHHui; ScaleFactor = 2; break; case MVT::i32: - StrOpc = UseUnscaled ? ARM64::STURWi : ARM64::STRWui; + StrOpc = UseUnscaled ? AArch64::STURWi : AArch64::STRWui; ScaleFactor = 4; break; case MVT::i64: - StrOpc = UseUnscaled ? ARM64::STURXi : ARM64::STRXui; + StrOpc = UseUnscaled ? AArch64::STURXi : AArch64::STRXui; ScaleFactor = 8; break; case MVT::f32: - StrOpc = UseUnscaled ? ARM64::STURSi : ARM64::STRSui; + StrOpc = UseUnscaled ? AArch64::STURSi : AArch64::STRSui; ScaleFactor = 4; break; case MVT::f64: - StrOpc = UseUnscaled ? ARM64::STURDi : ARM64::STRDui; + StrOpc = UseUnscaled ? AArch64::STURDi : AArch64::STRDui; ScaleFactor = 8; break; } @@ -666,12 +666,12 @@ bool ARM64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr, // Storing an i1 requires special handling. if (VTIsi1) { - MRI.constrainRegClass(SrcReg, &ARM64::GPR32RegClass); - unsigned ANDReg = createResultReg(&ARM64::GPR32spRegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri), + MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass); + unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri), ANDReg) .addReg(SrcReg) - .addImm(ARM64_AM::encodeLogicalImmediate(1, 32)); + .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); SrcReg = ANDReg; } // Create the base instruction, then add the operands. @@ -681,7 +681,7 @@ bool ARM64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr, return true; } -bool ARM64FastISel::SelectStore(const Instruction *I) { +bool AArch64FastISel::SelectStore(const Instruction *I) { MVT VT; Value *Op0 = I->getOperand(0); // Verify we have a legal type before going any further. Currently, we handle @@ -706,53 +706,53 @@ bool ARM64FastISel::SelectStore(const Instruction *I) { return true; } -static ARM64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { +static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { switch (Pred) { case CmpInst::FCMP_ONE: case CmpInst::FCMP_UEQ: default: // AL is our "false" for now. The other two need more compares. - return ARM64CC::AL; + return AArch64CC::AL; case CmpInst::ICMP_EQ: case CmpInst::FCMP_OEQ: - return ARM64CC::EQ; + return AArch64CC::EQ; case CmpInst::ICMP_SGT: case CmpInst::FCMP_OGT: - return ARM64CC::GT; + return AArch64CC::GT; case CmpInst::ICMP_SGE: case CmpInst::FCMP_OGE: - return ARM64CC::GE; + return AArch64CC::GE; case CmpInst::ICMP_UGT: case CmpInst::FCMP_UGT: - return ARM64CC::HI; + return AArch64CC::HI; case CmpInst::FCMP_OLT: - return ARM64CC::MI; + return AArch64CC::MI; case CmpInst::ICMP_ULE: case CmpInst::FCMP_OLE: - return ARM64CC::LS; + return AArch64CC::LS; case CmpInst::FCMP_ORD: - return ARM64CC::VC; + return AArch64CC::VC; case CmpInst::FCMP_UNO: - return ARM64CC::VS; + return AArch64CC::VS; case CmpInst::FCMP_UGE: - return ARM64CC::PL; + return AArch64CC::PL; case CmpInst::ICMP_SLT: case CmpInst::FCMP_ULT: - return ARM64CC::LT; + return AArch64CC::LT; case CmpInst::ICMP_SLE: case CmpInst::FCMP_ULE: - return ARM64CC::LE; + return AArch64CC::LE; case CmpInst::FCMP_UNE: case CmpInst::ICMP_NE: - return ARM64CC::NE; + return AArch64CC::NE; case CmpInst::ICMP_UGE: - return ARM64CC::HS; + return AArch64CC::HS; case CmpInst::ICMP_ULT: - return ARM64CC::LO; + return AArch64CC::LO; } } -bool ARM64FastISel::SelectBranch(const Instruction *I) { +bool AArch64FastISel::SelectBranch(const Instruction *I) { const BranchInst *BI = cast(I); MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; @@ -760,8 +760,8 @@ bool ARM64FastISel::SelectBranch(const Instruction *I) { if (const CmpInst *CI = dyn_cast(BI->getCondition())) { if (CI->hasOneUse() && (CI->getParent() == I->getParent())) { // We may not handle every CC for now. - ARM64CC::CondCode CC = getCompareCC(CI->getPredicate()); - if (CC == ARM64CC::AL) + AArch64CC::CondCode CC = getCompareCC(CI->getPredicate()); + if (CC == AArch64CC::AL) return false; // Emit the cmp. @@ -769,7 +769,7 @@ bool ARM64FastISel::SelectBranch(const Instruction *I) { return false; // Emit the branch. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::Bcc)) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) .addImm(CC) .addMBB(TBB); FuncInfo.MBB->addSuccessor(TBB); @@ -788,26 +788,27 @@ bool ARM64FastISel::SelectBranch(const Instruction *I) { // Issue an extract_subreg to get the lower 32-bits. if (SrcVT == MVT::i64) CondReg = FastEmitInst_extractsubreg(MVT::i32, CondReg, /*Kill=*/true, - ARM64::sub_32); + AArch64::sub_32); - MRI.constrainRegClass(CondReg, &ARM64::GPR32RegClass); - unsigned ANDReg = createResultReg(&ARM64::GPR32spRegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri), - ANDReg) + MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass); + unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(AArch64::ANDWri), ANDReg) .addReg(CondReg) - .addImm(ARM64_AM::encodeLogicalImmediate(1, 32)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SUBSWri)) + .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(AArch64::SUBSWri)) .addReg(ANDReg) .addReg(ANDReg) .addImm(0) .addImm(0); - unsigned CC = ARM64CC::NE; + unsigned CC = AArch64CC::NE; if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { std::swap(TBB, FBB); - CC = ARM64CC::EQ; + CC = AArch64CC::EQ; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::Bcc)) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) .addImm(CC) .addMBB(TBB); FuncInfo.MBB->addSuccessor(TBB); @@ -818,7 +819,7 @@ bool ARM64FastISel::SelectBranch(const Instruction *I) { dyn_cast(BI->getCondition())) { uint64_t Imm = CI->getZExtValue(); MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::B)) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B)) .addMBB(Target); FuncInfo.MBB->addSuccessor(Target); return true; @@ -835,19 +836,19 @@ bool ARM64FastISel::SelectBranch(const Instruction *I) { // Regardless, the compare has been done in the predecessor block, // and it left a value for us in a virtual register. Ergo, we test // the one-bit value left in the virtual register. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SUBSWri), - ARM64::WZR) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri), + AArch64::WZR) .addReg(CondReg) .addImm(0) .addImm(0); - unsigned CC = ARM64CC::NE; + unsigned CC = AArch64CC::NE; if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { std::swap(TBB, FBB); - CC = ARM64CC::EQ; + CC = AArch64CC::EQ; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::Bcc)) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) .addImm(CC) .addMBB(TBB); FuncInfo.MBB->addSuccessor(TBB); @@ -855,14 +856,14 @@ bool ARM64FastISel::SelectBranch(const Instruction *I) { return true; } -bool ARM64FastISel::SelectIndirectBr(const Instruction *I) { +bool AArch64FastISel::SelectIndirectBr(const Instruction *I) { const IndirectBrInst *BI = cast(I); unsigned AddrReg = getRegForValue(BI->getOperand(0)); if (AddrReg == 0) return false; // Emit the indirect branch. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::BR)) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BR)) .addReg(AddrReg); // Make sure the CFG is up-to-date. @@ -872,7 +873,7 @@ bool ARM64FastISel::SelectIndirectBr(const Instruction *I) { return true; } -bool ARM64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) { +bool AArch64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) { Type *Ty = Src1Value->getType(); EVT SrcEVT = TLI.getValueType(Ty, true); if (!SrcEVT.isSimple()) @@ -916,26 +917,26 @@ bool ARM64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) { needsExt = true; // Intentional fall-through. case MVT::i32: - ZReg = ARM64::WZR; + ZReg = AArch64::WZR; if (UseImm) - CmpOpc = isNegativeImm ? ARM64::ADDSWri : ARM64::SUBSWri; + CmpOpc = isNegativeImm ? AArch64::ADDSWri : AArch64::SUBSWri; else - CmpOpc = ARM64::SUBSWrr; + CmpOpc = AArch64::SUBSWrr; break; case MVT::i64: - ZReg = ARM64::XZR; + ZReg = AArch64::XZR; if (UseImm) - CmpOpc = isNegativeImm ? ARM64::ADDSXri : ARM64::SUBSXri; + CmpOpc = isNegativeImm ? AArch64::ADDSXri : AArch64::SUBSXri; else - CmpOpc = ARM64::SUBSXrr; + CmpOpc = AArch64::SUBSXrr; break; case MVT::f32: isICmp = false; - CmpOpc = UseImm ? ARM64::FCMPSri : ARM64::FCMPSrr; + CmpOpc = UseImm ? AArch64::FCMPSri : AArch64::FCMPSrr; break; case MVT::f64: isICmp = false; - CmpOpc = UseImm ? ARM64::FCMPDri : ARM64::FCMPDrr; + CmpOpc = UseImm ? AArch64::FCMPDri : AArch64::FCMPDrr; break; } @@ -986,12 +987,12 @@ bool ARM64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) { return true; } -bool ARM64FastISel::SelectCmp(const Instruction *I) { +bool AArch64FastISel::SelectCmp(const Instruction *I) { const CmpInst *CI = cast(I); // We may not handle every CC for now. - ARM64CC::CondCode CC = getCompareCC(CI->getPredicate()); - if (CC == ARM64CC::AL) + AArch64CC::CondCode CC = getCompareCC(CI->getPredicate()); + if (CC == AArch64CC::AL) return false; // Emit the cmp. @@ -999,19 +1000,19 @@ bool ARM64FastISel::SelectCmp(const Instruction *I) { return false; // Now set a register based on the comparison. - ARM64CC::CondCode invertedCC = getInvertedCondCode(CC); - unsigned ResultReg = createResultReg(&ARM64::GPR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::CSINCWr), + AArch64CC::CondCode invertedCC = getInvertedCondCode(CC); + unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), ResultReg) - .addReg(ARM64::WZR) - .addReg(ARM64::WZR) + .addReg(AArch64::WZR) + .addReg(AArch64::WZR) .addImm(invertedCC); UpdateValueMap(I, ResultReg); return true; } -bool ARM64FastISel::SelectSelect(const Instruction *I) { +bool AArch64FastISel::SelectSelect(const Instruction *I) { const SelectInst *SI = cast(I); EVT DestEVT = TLI.getValueType(SI->getType(), true); @@ -1034,14 +1035,14 @@ bool ARM64FastISel::SelectSelect(const Instruction *I) { return false; - MRI.constrainRegClass(CondReg, &ARM64::GPR32RegClass); - unsigned ANDReg = createResultReg(&ARM64::GPR32spRegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri), + MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass); + unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri), ANDReg) .addReg(CondReg) - .addImm(ARM64_AM::encodeLogicalImmediate(1, 32)); + .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SUBSWri)) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri)) .addReg(ANDReg) .addReg(ANDReg) .addImm(0) @@ -1052,16 +1053,16 @@ bool ARM64FastISel::SelectSelect(const Instruction *I) { default: return false; case MVT::i32: - SelectOpc = ARM64::CSELWr; + SelectOpc = AArch64::CSELWr; break; case MVT::i64: - SelectOpc = ARM64::CSELXr; + SelectOpc = AArch64::CSELXr; break; case MVT::f32: - SelectOpc = ARM64::FCSELSrrr; + SelectOpc = AArch64::FCSELSrrr; break; case MVT::f64: - SelectOpc = ARM64::FCSELDrrr; + SelectOpc = AArch64::FCSELDrrr; break; } @@ -1070,13 +1071,13 @@ bool ARM64FastISel::SelectSelect(const Instruction *I) { ResultReg) .addReg(TrueReg) .addReg(FalseReg) - .addImm(ARM64CC::NE); + .addImm(AArch64CC::NE); UpdateValueMap(I, ResultReg); return true; } -bool ARM64FastISel::SelectFPExt(const Instruction *I) { +bool AArch64FastISel::SelectFPExt(const Instruction *I) { Value *V = I->getOperand(0); if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) return false; @@ -1085,14 +1086,14 @@ bool ARM64FastISel::SelectFPExt(const Instruction *I) { if (Op == 0) return false; - unsigned ResultReg = createResultReg(&ARM64::FPR64RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::FCVTDSr), + unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr), ResultReg).addReg(Op); UpdateValueMap(I, ResultReg); return true; } -bool ARM64FastISel::SelectFPTrunc(const Instruction *I) { +bool AArch64FastISel::SelectFPTrunc(const Instruction *I) { Value *V = I->getOperand(0); if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) return false; @@ -1101,15 +1102,15 @@ bool ARM64FastISel::SelectFPTrunc(const Instruction *I) { if (Op == 0) return false; - unsigned ResultReg = createResultReg(&ARM64::FPR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::FCVTSDr), + unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr), ResultReg).addReg(Op); UpdateValueMap(I, ResultReg); return true; } // FPToUI and FPToSI -bool ARM64FastISel::SelectFPToInt(const Instruction *I, bool Signed) { +bool AArch64FastISel::SelectFPToInt(const Instruction *I, bool Signed) { MVT DestVT; if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) return false; @@ -1125,24 +1126,24 @@ bool ARM64FastISel::SelectFPToInt(const Instruction *I, bool Signed) { unsigned Opc; if (SrcVT == MVT::f64) { if (Signed) - Opc = (DestVT == MVT::i32) ? ARM64::FCVTZSUWDr : ARM64::FCVTZSUXDr; + Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; else - Opc = (DestVT == MVT::i32) ? ARM64::FCVTZUUWDr : ARM64::FCVTZUUXDr; + Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr; } else { if (Signed) - Opc = (DestVT == MVT::i32) ? ARM64::FCVTZSUWSr : ARM64::FCVTZSUXSr; + Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; else - Opc = (DestVT == MVT::i32) ? ARM64::FCVTZUUWSr : ARM64::FCVTZUUXSr; + Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr; } unsigned ResultReg = createResultReg( - DestVT == MVT::i32 ? &ARM64::GPR32RegClass : &ARM64::GPR64RegClass); + DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addReg(SrcReg); UpdateValueMap(I, ResultReg); return true; } -bool ARM64FastISel::SelectIntToFP(const Instruction *I, bool Signed) { +bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) { MVT DestVT; if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) return false; @@ -1163,20 +1164,20 @@ bool ARM64FastISel::SelectIntToFP(const Instruction *I, bool Signed) { return false; } - MRI.constrainRegClass(SrcReg, SrcVT == MVT::i64 ? &ARM64::GPR64RegClass - : &ARM64::GPR32RegClass); + MRI.constrainRegClass(SrcReg, SrcVT == MVT::i64 ? &AArch64::GPR64RegClass + : &AArch64::GPR32RegClass); unsigned Opc; if (SrcVT == MVT::i64) { if (Signed) - Opc = (DestVT == MVT::f32) ? ARM64::SCVTFUXSri : ARM64::SCVTFUXDri; + Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; else - Opc = (DestVT == MVT::f32) ? ARM64::UCVTFUXSri : ARM64::UCVTFUXDri; + Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri; } else { if (Signed) - Opc = (DestVT == MVT::f32) ? ARM64::SCVTFUWSri : ARM64::SCVTFUWDri; + Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri; else - Opc = (DestVT == MVT::f32) ? ARM64::UCVTFUWSri : ARM64::UCVTFUWDri; + Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri; } unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT)); @@ -1186,12 +1187,11 @@ bool ARM64FastISel::SelectIntToFP(const Instruction *I, bool Signed) { return true; } -bool ARM64FastISel::ProcessCallArgs(SmallVectorImpl &Args, - SmallVectorImpl &ArgRegs, - SmallVectorImpl &ArgVTs, - SmallVectorImpl &ArgFlags, - SmallVectorImpl &RegArgs, - CallingConv::ID CC, unsigned &NumBytes) { +bool AArch64FastISel::ProcessCallArgs( + SmallVectorImpl &Args, SmallVectorImpl &ArgRegs, + SmallVectorImpl &ArgVTs, SmallVectorImpl &ArgFlags, + SmallVectorImpl &RegArgs, CallingConv::ID CC, + unsigned &NumBytes) { SmallVector ArgLocs; CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context); CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC)); @@ -1258,7 +1258,7 @@ bool ARM64FastISel::ProcessCallArgs(SmallVectorImpl &Args, Address Addr; Addr.setKind(Address::RegBase); - Addr.setReg(ARM64::SP); + Addr.setReg(AArch64::SP); Addr.setOffset(VA.getLocMemOffset() + BEAlign); if (!EmitStore(ArgVT, Arg, Addr)) @@ -1268,9 +1268,9 @@ bool ARM64FastISel::ProcessCallArgs(SmallVectorImpl &Args, return true; } -bool ARM64FastISel::FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, - const Instruction *I, CallingConv::ID CC, - unsigned &NumBytes) { +bool AArch64FastISel::FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, + const Instruction *I, CallingConv::ID CC, + unsigned &NumBytes) { // Issue CALLSEQ_END unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) @@ -1302,8 +1302,8 @@ bool ARM64FastISel::FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, return true; } -bool ARM64FastISel::SelectCall(const Instruction *I, - const char *IntrMemName = nullptr) { +bool AArch64FastISel::SelectCall(const Instruction *I, + const char *IntrMemName = nullptr) { const CallInst *CI = cast(I); const Value *Callee = CI->getCalledValue(); @@ -1396,7 +1396,7 @@ bool ARM64FastISel::SelectCall(const Instruction *I, // Issue the call. MachineInstrBuilder MIB; - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::BL)); + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BL)); if (!IntrMemName) MIB.addGlobalAddress(GV, 0, 0); else @@ -1421,15 +1421,15 @@ bool ARM64FastISel::SelectCall(const Instruction *I, return true; } -bool ARM64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) { +bool AArch64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) { if (Alignment) return Len / Alignment <= 4; else return Len < 32; } -bool ARM64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, - unsigned Alignment) { +bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src, + uint64_t Len, unsigned Alignment) { // Make sure we don't bloat code by inlining very large memcpy's. if (!IsMemCpySmall(Len, Alignment)) return false; @@ -1481,7 +1481,7 @@ bool ARM64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, return true; } -bool ARM64FastISel::SelectIntrinsicCall(const IntrinsicInst &I) { +bool AArch64FastISel::SelectIntrinsicCall(const IntrinsicInst &I) { // FIXME: Handle more intrinsics. switch (I.getIntrinsicID()) { default: @@ -1539,7 +1539,7 @@ bool ARM64FastISel::SelectIntrinsicCall(const IntrinsicInst &I) { return SelectCall(&I, "memset"); } case Intrinsic::trap: { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::BRK)) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) .addImm(1); return true; } @@ -1547,7 +1547,7 @@ bool ARM64FastISel::SelectIntrinsicCall(const IntrinsicInst &I) { return false; } -bool ARM64FastISel::SelectRet(const Instruction *I) { +bool AArch64FastISel::SelectRet(const Instruction *I) { const ReturnInst *Ret = cast(I); const Function &F = *I->getParent()->getParent(); @@ -1569,8 +1569,8 @@ bool ARM64FastISel::SelectRet(const Instruction *I) { SmallVector ValLocs; CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, I->getContext()); - CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS - : RetCC_ARM64_AAPCS; + CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS + : RetCC_AArch64_AAPCS; CCInfo.AnalyzeReturn(Outs, RetCC); // Only handle a single return value for now. @@ -1631,13 +1631,13 @@ bool ARM64FastISel::SelectRet(const Instruction *I) { } MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(ARM64::RET_ReallyLR)); + TII.get(AArch64::RET_ReallyLR)); for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) MIB.addReg(RetRegs[i], RegState::Implicit); return true; } -bool ARM64FastISel::SelectTrunc(const Instruction *I) { +bool AArch64FastISel::SelectTrunc(const Instruction *I) { Type *DestTy = I->getType(); Value *Op = I->getOperand(0); Type *SrcTy = Op->getType(); @@ -1684,14 +1684,14 @@ bool ARM64FastISel::SelectTrunc(const Instruction *I) { } // Issue an extract_subreg to get the lower 32-bits. unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, /*Kill=*/true, - ARM64::sub_32); - MRI.constrainRegClass(Reg32, &ARM64::GPR32RegClass); + AArch64::sub_32); + MRI.constrainRegClass(Reg32, &AArch64::GPR32RegClass); // Create the AND instruction which performs the actual truncation. - unsigned ANDReg = createResultReg(&ARM64::GPR32spRegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri), + unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri), ANDReg) .addReg(Reg32) - .addImm(ARM64_AM::encodeLogicalImmediate(Mask, 32)); + .addImm(AArch64_AM::encodeLogicalImmediate(Mask, 32)); SrcReg = ANDReg; } @@ -1699,7 +1699,7 @@ bool ARM64FastISel::SelectTrunc(const Instruction *I) { return true; } -unsigned ARM64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) { +unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) { assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || DestVT == MVT::i64) && "Unexpected value type."); @@ -1708,22 +1708,22 @@ unsigned ARM64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) { DestVT = MVT::i32; if (isZExt) { - MRI.constrainRegClass(SrcReg, &ARM64::GPR32RegClass); - unsigned ResultReg = createResultReg(&ARM64::GPR32spRegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri), + MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass); + unsigned ResultReg = createResultReg(&AArch64::GPR32spRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri), ResultReg) .addReg(SrcReg) - .addImm(ARM64_AM::encodeLogicalImmediate(1, 32)); + .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); if (DestVT == MVT::i64) { // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. - unsigned Reg64 = MRI.createVirtualRegister(&ARM64::GPR64RegClass); + unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(ARM64::SUBREG_TO_REG), Reg64) + TII.get(AArch64::SUBREG_TO_REG), Reg64) .addImm(0) .addReg(ResultReg) - .addImm(ARM64::sub_32); + .addImm(AArch64::sub_32); ResultReg = Reg64; } return ResultReg; @@ -1732,8 +1732,8 @@ unsigned ARM64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) { // FIXME: We're SExt i1 to i64. return 0; } - unsigned ResultReg = createResultReg(&ARM64::GPR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SBFMWri), + unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SBFMWri), ResultReg) .addReg(SrcReg) .addImm(0) @@ -1742,8 +1742,8 @@ unsigned ARM64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) { } } -unsigned ARM64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, - bool isZExt) { +unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, + bool isZExt) { assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); unsigned Opc; unsigned Imm = 0; @@ -1755,21 +1755,21 @@ unsigned ARM64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, return Emiti1Ext(SrcReg, DestVT, isZExt); case MVT::i8: if (DestVT == MVT::i64) - Opc = isZExt ? ARM64::UBFMXri : ARM64::SBFMXri; + Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri; else - Opc = isZExt ? ARM64::UBFMWri : ARM64::SBFMWri; + Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri; Imm = 7; break; case MVT::i16: if (DestVT == MVT::i64) - Opc = isZExt ? ARM64::UBFMXri : ARM64::SBFMXri; + Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri; else - Opc = isZExt ? ARM64::UBFMWri : ARM64::SBFMWri; + Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri; Imm = 15; break; case MVT::i32: assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); - Opc = isZExt ? ARM64::UBFMXri : ARM64::SBFMXri; + Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri; Imm = 31; break; } @@ -1778,12 +1778,12 @@ unsigned ARM64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, if (DestVT == MVT::i8 || DestVT == MVT::i16) DestVT = MVT::i32; else if (DestVT == MVT::i64) { - unsigned Src64 = MRI.createVirtualRegister(&ARM64::GPR64RegClass); + unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(ARM64::SUBREG_TO_REG), Src64) + TII.get(AArch64::SUBREG_TO_REG), Src64) .addImm(0) .addReg(SrcReg) - .addImm(ARM64::sub_32); + .addImm(AArch64::sub_32); SrcReg = Src64; } @@ -1796,7 +1796,7 @@ unsigned ARM64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, return ResultReg; } -bool ARM64FastISel::SelectIntExt(const Instruction *I) { +bool AArch64FastISel::SelectIntExt(const Instruction *I) { // On ARM, in general, integer casts don't involve legal types; this code // handles promotable integers. The high bits for a type smaller than // the register size are assumed to be undefined. @@ -1825,7 +1825,7 @@ bool ARM64FastISel::SelectIntExt(const Instruction *I) { return true; } -bool ARM64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) { +bool AArch64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) { EVT DestEVT = TLI.getValueType(I->getType(), true); if (!DestEVT.isSimple()) return false; @@ -1840,13 +1840,13 @@ bool ARM64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) { default: return false; case ISD::SREM: - DivOpc = is64bit ? ARM64::SDIVXr : ARM64::SDIVWr; + DivOpc = is64bit ? AArch64::SDIVXr : AArch64::SDIVWr; break; case ISD::UREM: - DivOpc = is64bit ? ARM64::UDIVXr : ARM64::UDIVWr; + DivOpc = is64bit ? AArch64::UDIVXr : AArch64::UDIVWr; break; } - unsigned MSubOpc = is64bit ? ARM64::MSUBXrrr : ARM64::MSUBWrrr; + unsigned MSubOpc = is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr; unsigned Src0Reg = getRegForValue(I->getOperand(0)); if (!Src0Reg) return false; @@ -1870,7 +1870,7 @@ bool ARM64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) { return true; } -bool ARM64FastISel::SelectMul(const Instruction *I) { +bool AArch64FastISel::SelectMul(const Instruction *I) { EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true); if (!SrcEVT.isSimple()) return false; @@ -1889,12 +1889,12 @@ bool ARM64FastISel::SelectMul(const Instruction *I) { case MVT::i8: case MVT::i16: case MVT::i32: - ZReg = ARM64::WZR; - Opc = ARM64::MADDWrrr; + ZReg = AArch64::WZR; + Opc = AArch64::MADDWrrr; break; case MVT::i64: - ZReg = ARM64::XZR; - Opc = ARM64::MADDXrrr; + ZReg = AArch64::XZR; + Opc = AArch64::MADDXrrr; break; } @@ -1916,7 +1916,7 @@ bool ARM64FastISel::SelectMul(const Instruction *I) { return true; } -bool ARM64FastISel::TargetSelectInstruction(const Instruction *I) { +bool AArch64FastISel::TargetSelectInstruction(const Instruction *I) { switch (I->getOpcode()) { default: break; @@ -1966,12 +1966,12 @@ bool ARM64FastISel::TargetSelectInstruction(const Instruction *I) { } return false; // Silence warnings. - (void)&CC_ARM64_DarwinPCS_VarArg; + (void)&CC_AArch64_DarwinPCS_VarArg; } namespace llvm { -llvm::FastISel *ARM64::createFastISel(FunctionLoweringInfo &funcInfo, - const TargetLibraryInfo *libInfo) { - return new ARM64FastISel(funcInfo, libInfo); +llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) { + return new AArch64FastISel(funcInfo, libInfo); } } diff --git a/lib/Target/ARM64/ARM64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp similarity index 82% rename from lib/Target/ARM64/ARM64FrameLowering.cpp rename to lib/Target/AArch64/AArch64FrameLowering.cpp index 9c17488ec588..deb306a506dd 100644 --- a/lib/Target/ARM64/ARM64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1,4 +1,4 @@ -//===- ARM64FrameLowering.cpp - ARM64 Frame Lowering -----------*- C++ -*-====// +//===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====// // // The LLVM Compiler Infrastructure // @@ -7,15 +7,15 @@ // //===----------------------------------------------------------------------===// // -// This file contains the ARM64 implementation of TargetFrameLowering class. +// This file contains the AArch64 implementation of TargetFrameLowering class. // //===----------------------------------------------------------------------===// -#include "ARM64FrameLowering.h" -#include "ARM64InstrInfo.h" -#include "ARM64MachineFunctionInfo.h" -#include "ARM64Subtarget.h" -#include "ARM64TargetMachine.h" +#include "AArch64FrameLowering.h" +#include "AArch64InstrInfo.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64Subtarget.h" +#include "AArch64TargetMachine.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" @@ -33,8 +33,8 @@ using namespace llvm; #define DEBUG_TYPE "frame-info" -static cl::opt EnableRedZone("arm64-redzone", - cl::desc("enable use of redzone on ARM64"), +static cl::opt EnableRedZone("aarch64-redzone", + cl::desc("enable use of redzone on AArch64"), cl::init(false), cl::Hidden); STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); @@ -59,7 +59,7 @@ static unsigned estimateStackSize(MachineFunction &MF) { return (unsigned)Offset; } -bool ARM64FrameLowering::canUseRedZone(const MachineFunction &MF) const { +bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { if (!EnableRedZone) return false; // Don't use the red zone if the function explicitly asks us not to. @@ -69,7 +69,7 @@ bool ARM64FrameLowering::canUseRedZone(const MachineFunction &MF) const { return false; const MachineFrameInfo *MFI = MF.getFrameInfo(); - const ARM64FunctionInfo *AFI = MF.getInfo(); + const AArch64FunctionInfo *AFI = MF.getInfo(); unsigned NumBytes = AFI->getLocalStackSize(); // Note: currently hasFP() is always true for hasCalls(), but that's an @@ -82,13 +82,13 @@ bool ARM64FrameLowering::canUseRedZone(const MachineFunction &MF) const { /// hasFP - Return true if the specified function should have a dedicated frame /// pointer register. -bool ARM64FrameLowering::hasFP(const MachineFunction &MF) const { +bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); #ifndef NDEBUG const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); assert(!RegInfo->needsStackRealignment(MF) && - "No stack realignment on ARM64!"); + "No stack realignment on AArch64!"); #endif return (MFI->hasCalls() || MFI->hasVarSizedObjects() || @@ -100,15 +100,16 @@ bool ARM64FrameLowering::hasFP(const MachineFunction &MF) const { /// immediately on entry to the current function. This eliminates the need for /// add/sub sp brackets around call sites. Returns true if the call frame is /// included as part of the stack frame. -bool ARM64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { +bool +AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { return !MF.getFrameInfo()->hasVarSizedObjects(); } -void ARM64FrameLowering::eliminateCallFramePseudoInstr( +void AArch64FrameLowering::eliminateCallFramePseudoInstr( MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - const ARM64InstrInfo *TII = - static_cast(MF.getTarget().getInstrInfo()); + const AArch64InstrInfo *TII = + static_cast(MF.getTarget().getInstrInfo()); DebugLoc DL = I->getDebugLoc(); int Opc = I->getOpcode(); bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); @@ -138,26 +139,26 @@ void ARM64FrameLowering::eliminateCallFramePseudoInstr( // Mostly call frames will be allocated at the start of a function so // this is OK, but it is a limitation that needs dealing with. assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large"); - emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, Amount, TII); + emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII); } } else if (CalleePopAmount != 0) { // If the calling convention demands that the callee pops arguments from the // stack, we want to add it back if we have a reserved call frame. assert(CalleePopAmount < 0xffffff && "call frame too large"); - emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, -CalleePopAmount, TII); + emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount, + TII); } MBB.erase(I); } -void -ARM64FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned FramePtr) const { +void AArch64FrameLowering::emitCalleeSavedFrameMoves( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + unsigned FramePtr) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); - const ARM64InstrInfo *TII = TM.getInstrInfo(); + const AArch64InstrInfo *TII = TM.getInstrInfo(); DebugLoc DL = MBB.findDebugLoc(MBBI); // Add callee saved registers to move list. @@ -185,7 +186,7 @@ ARM64FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, // method automatically generates the directives when frame pointers are // used. If we generate CFI directives for the extra "STP"s, the linker will // lose track of the correct values for the frame pointer and link register. - if (HasFP && (FramePtr == Reg || Reg == ARM64::LR)) { + if (HasFP && (FramePtr == Reg || Reg == AArch64::LR)) { TotalSkipped += stackGrowth; continue; } @@ -198,15 +199,15 @@ ARM64FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, } } -void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const { +void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. MachineBasicBlock::iterator MBBI = MBB.begin(); const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *Fn = MF.getFunction(); - const ARM64RegisterInfo *RegInfo = TM.getRegisterInfo(); - const ARM64InstrInfo *TII = TM.getInstrInfo(); + const AArch64RegisterInfo *RegInfo = TM.getRegisterInfo(); + const AArch64InstrInfo *TII = TM.getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); - ARM64FunctionInfo *AFI = MF.getInfo(); + AArch64FunctionInfo *AFI = MF.getInfo(); bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry(); bool HasFP = hasFP(MF); DebugLoc DL = MBB.findDebugLoc(MBBI); @@ -224,7 +225,7 @@ void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const { // REDZONE: If the stack size is less than 128 bytes, we don't need // to actually allocate. if (NumBytes && !canUseRedZone(MF)) { - emitFrameOffset(MBB, MBBI, DL, ARM64::SP, ARM64::SP, -NumBytes, TII, + emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, MachineInstr::FrameSetup); // Encode the stack size of the leaf function. @@ -244,9 +245,9 @@ void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const { if (HasFP) { // First instruction must a) allocate the stack and b) have an immediate // that is a multiple of -2. - assert((MBBI->getOpcode() == ARM64::STPXpre || - MBBI->getOpcode() == ARM64::STPDpre) && - MBBI->getOperand(3).getReg() == ARM64::SP && + assert((MBBI->getOpcode() == AArch64::STPXpre || + MBBI->getOpcode() == AArch64::STPDpre) && + MBBI->getOperand(3).getReg() == AArch64::SP && MBBI->getOperand(4).getImm() < 0 && (MBBI->getOperand(4).getImm() & 1) == 0); @@ -258,10 +259,10 @@ void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const { } // Move past the saves of the callee-saved registers. - while (MBBI->getOpcode() == ARM64::STPXi || - MBBI->getOpcode() == ARM64::STPDi || - MBBI->getOpcode() == ARM64::STPXpre || - MBBI->getOpcode() == ARM64::STPDpre) { + while (MBBI->getOpcode() == AArch64::STPXi || + MBBI->getOpcode() == AArch64::STPDi || + MBBI->getOpcode() == AArch64::STPXpre || + MBBI->getOpcode() == AArch64::STPDpre) { ++MBBI; NumBytes -= 16; } @@ -271,7 +272,7 @@ void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const { // mov fp,sp when FPOffset is zero. // Note: All stores of callee-saved registers are marked as "FrameSetup". // This code marks the instruction(s) that set the FP also. - emitFrameOffset(MBB, MBBI, DL, ARM64::FP, ARM64::SP, FPOffset, TII, + emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII, MachineInstr::FrameSetup); } @@ -282,7 +283,7 @@ void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const { if (NumBytes) { // If we're a leaf function, try using the red zone. if (!canUseRedZone(MF)) - emitFrameOffset(MBB, MBBI, DL, ARM64::SP, ARM64::SP, -NumBytes, TII, + emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, MachineInstr::FrameSetup); } @@ -295,7 +296,7 @@ void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const { // needed. // if (RegInfo->hasBasePointer(MF)) - TII->copyPhysReg(MBB, MBBI, DL, ARM64::X19, ARM64::SP, false); + TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false); if (needsFrameMoves) { const DataLayout *TD = MF.getTarget().getDataLayout(); @@ -377,7 +378,7 @@ void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const { .addCFIIndex(CFIIndex); // Record the location of the stored LR - unsigned LR = RegInfo->getDwarfRegNum(ARM64::LR, true); + unsigned LR = RegInfo->getDwarfRegNum(AArch64::LR, true); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, LR, StackGrowth)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) @@ -410,15 +411,16 @@ static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs) { static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) { unsigned RtIdx = 0; - if (MI->getOpcode() == ARM64::LDPXpost || MI->getOpcode() == ARM64::LDPDpost) + if (MI->getOpcode() == AArch64::LDPXpost || + MI->getOpcode() == AArch64::LDPDpost) RtIdx = 1; - if (MI->getOpcode() == ARM64::LDPXpost || - MI->getOpcode() == ARM64::LDPDpost || MI->getOpcode() == ARM64::LDPXi || - MI->getOpcode() == ARM64::LDPDi) { + if (MI->getOpcode() == AArch64::LDPXpost || + MI->getOpcode() == AArch64::LDPDpost || + MI->getOpcode() == AArch64::LDPXi || MI->getOpcode() == AArch64::LDPDi) { if (!isCalleeSavedRegister(MI->getOperand(RtIdx).getReg(), CSRegs) || !isCalleeSavedRegister(MI->getOperand(RtIdx + 1).getReg(), CSRegs) || - MI->getOperand(RtIdx + 2).getReg() != ARM64::SP) + MI->getOperand(RtIdx + 2).getReg() != AArch64::SP) return false; return true; } @@ -426,25 +428,25 @@ static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) { return false; } -void ARM64FrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { +void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); MachineFrameInfo *MFI = MF.getFrameInfo(); - const ARM64InstrInfo *TII = - static_cast(MF.getTarget().getInstrInfo()); - const ARM64RegisterInfo *RegInfo = - static_cast(MF.getTarget().getRegisterInfo()); + const AArch64InstrInfo *TII = + static_cast(MF.getTarget().getInstrInfo()); + const AArch64RegisterInfo *RegInfo = static_cast( + MF.getTarget().getRegisterInfo()); DebugLoc DL = MBBI->getDebugLoc(); unsigned RetOpcode = MBBI->getOpcode(); int NumBytes = MFI->getStackSize(); - const ARM64FunctionInfo *AFI = MF.getInfo(); + const AArch64FunctionInfo *AFI = MF.getInfo(); // Initial and residual are named for consitency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. uint64_t ArgumentPopSize = 0; - if (RetOpcode == ARM64::TCRETURNdi || RetOpcode == ARM64::TCRETURNri) { + if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) { MachineOperand &StackAdjust = MBBI->getOperand(1); // For a tail-call in a callee-pops-arguments environment, some or all of @@ -483,8 +485,8 @@ void ARM64FrameLowering::emitEpilogue(MachineFunction &MF, // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize // = StackSize + ArgumentPopSize // - // ARM64TargetLowering::LowerCall figures out ArgumentPopSize and keeps - // it as the 2nd argument of ARM64ISD::TC_RETURN. + // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps + // it as the 2nd argument of AArch64ISD::TC_RETURN. NumBytes += ArgumentPopSize; unsigned NumRestores = 0; @@ -508,7 +510,8 @@ void ARM64FrameLowering::emitEpilogue(MachineFunction &MF, // If this was a redzone leaf function, we don't need to restore the // stack pointer. if (!canUseRedZone(MF)) - emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::SP, NumBytes, TII); + emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, + TII); return; } @@ -517,14 +520,14 @@ void ARM64FrameLowering::emitEpilogue(MachineFunction &MF, // non-post-indexed loads for the restores if we aren't actually going to // be able to save any instructions. if (NumBytes || MFI->hasVarSizedObjects()) - emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::FP, + emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, -(NumRestores - 1) * 16, TII, MachineInstr::NoFlags); } /// getFrameIndexOffset - Returns the displacement from the frame register to /// the stack frame of the specified index. -int ARM64FrameLowering::getFrameIndexOffset(const MachineFunction &MF, - int FI) const { +int AArch64FrameLowering::getFrameIndexOffset(const MachineFunction &MF, + int FI) const { unsigned FrameReg; return getFrameIndexReference(MF, FI, FrameReg); } @@ -533,19 +536,19 @@ int ARM64FrameLowering::getFrameIndexOffset(const MachineFunction &MF, /// debug info. It's the same as what we use for resolving the code-gen /// references for now. FIXME: This can go wrong when references are /// SP-relative and simple call frames aren't used. -int ARM64FrameLowering::getFrameIndexReference(const MachineFunction &MF, - int FI, - unsigned &FrameReg) const { +int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, + int FI, + unsigned &FrameReg) const { return resolveFrameIndexReference(MF, FI, FrameReg); } -int ARM64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, - int FI, unsigned &FrameReg, - bool PreferFP) const { +int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, + int FI, unsigned &FrameReg, + bool PreferFP) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - const ARM64RegisterInfo *RegInfo = - static_cast(MF.getTarget().getRegisterInfo()); - const ARM64FunctionInfo *AFI = MF.getInfo(); + const AArch64RegisterInfo *RegInfo = static_cast( + MF.getTarget().getRegisterInfo()); + const AArch64FunctionInfo *AFI = MF.getInfo(); int FPOffset = MFI->getObjectOffset(FI) + 16; int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); bool isFixed = MFI->isFixedObjectIndex(FI); @@ -587,7 +590,7 @@ int ARM64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, if (RegInfo->hasBasePointer(MF)) FrameReg = RegInfo->getBaseRegister(); else { - FrameReg = ARM64::SP; + FrameReg = AArch64::SP; // If we're using the red zone for this function, the SP won't actually // be adjusted, so the offsets will be negative. They're also all // within range of the signed 9-bit immediate instructions. @@ -599,16 +602,16 @@ int ARM64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, } static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) { - if (Reg != ARM64::LR) + if (Reg != AArch64::LR) return getKillRegState(true); // LR maybe referred to later by an @llvm.returnaddress intrinsic. - bool LRLiveIn = MF.getRegInfo().isLiveIn(ARM64::LR); + bool LRLiveIn = MF.getRegInfo().isLiveIn(AArch64::LR); bool LRKill = !(LRLiveIn && MF.getFrameInfo()->isReturnAddressTaken()); return getKillRegState(LRKill); } -bool ARM64FrameLowering::spillCalleeSavedRegisters( +bool AArch64FrameLowering::spillCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, const TargetRegisterInfo *TRI) const { @@ -645,22 +648,22 @@ bool ARM64FrameLowering::spillCalleeSavedRegisters( // Rationale: This sequence saves uop updates compared to a sequence of // pre-increment spills like stp xi,xj,[sp,#-16]! // Note: Similar rational and sequence for restores in epilog. - if (ARM64::GPR64RegClass.contains(Reg1)) { - assert(ARM64::GPR64RegClass.contains(Reg2) && + if (AArch64::GPR64RegClass.contains(Reg1)) { + assert(AArch64::GPR64RegClass.contains(Reg2) && "Expected GPR64 callee-saved register pair!"); // For first spill use pre-increment store. if (i == 0) - StrOpc = ARM64::STPXpre; + StrOpc = AArch64::STPXpre; else - StrOpc = ARM64::STPXi; - } else if (ARM64::FPR64RegClass.contains(Reg1)) { - assert(ARM64::FPR64RegClass.contains(Reg2) && + StrOpc = AArch64::STPXi; + } else if (AArch64::FPR64RegClass.contains(Reg1)) { + assert(AArch64::FPR64RegClass.contains(Reg2) && "Expected FPR64 callee-saved register pair!"); // For first spill use pre-increment store. if (i == 0) - StrOpc = ARM64::STPDpre; + StrOpc = AArch64::STPDpre; else - StrOpc = ARM64::STPDi; + StrOpc = AArch64::STPDi; } else llvm_unreachable("Unexpected callee saved register!"); DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", " @@ -672,19 +675,19 @@ bool ARM64FrameLowering::spillCalleeSavedRegisters( assert((Offset >= -64 && Offset <= 63) && "Offset out of bounds for STP immediate"); MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); - if (StrOpc == ARM64::STPDpre || StrOpc == ARM64::STPXpre) - MIB.addReg(ARM64::SP, RegState::Define); + if (StrOpc == AArch64::STPDpre || StrOpc == AArch64::STPXpre) + MIB.addReg(AArch64::SP, RegState::Define); MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)) .addReg(Reg1, getPrologueDeath(MF, Reg1)) - .addReg(ARM64::SP) + .addReg(AArch64::SP) .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit .setMIFlag(MachineInstr::FrameSetup); } return true; } -bool ARM64FrameLowering::restoreCalleeSavedRegisters( +bool AArch64FrameLowering::restoreCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, const TargetRegisterInfo *TRI) const { @@ -716,20 +719,20 @@ bool ARM64FrameLowering::restoreCalleeSavedRegisters( assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); assert((i & 1) == 0 && "Odd index for callee-saved reg spill!"); - if (ARM64::GPR64RegClass.contains(Reg1)) { - assert(ARM64::GPR64RegClass.contains(Reg2) && + if (AArch64::GPR64RegClass.contains(Reg1)) { + assert(AArch64::GPR64RegClass.contains(Reg2) && "Expected GPR64 callee-saved register pair!"); if (i == Count - 2) - LdrOpc = ARM64::LDPXpost; + LdrOpc = AArch64::LDPXpost; else - LdrOpc = ARM64::LDPXi; - } else if (ARM64::FPR64RegClass.contains(Reg1)) { - assert(ARM64::FPR64RegClass.contains(Reg2) && + LdrOpc = AArch64::LDPXi; + } else if (AArch64::FPR64RegClass.contains(Reg1)) { + assert(AArch64::FPR64RegClass.contains(Reg2) && "Expected FPR64 callee-saved register pair!"); if (i == Count - 2) - LdrOpc = ARM64::LDPDpost; + LdrOpc = AArch64::LDPDpost; else - LdrOpc = ARM64::LDPDi; + LdrOpc = AArch64::LDPDi; } else llvm_unreachable("Unexpected callee saved register!"); DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", " @@ -742,31 +745,31 @@ bool ARM64FrameLowering::restoreCalleeSavedRegisters( assert((Offset >= -64 && Offset <= 63) && "Offset out of bounds for LDP immediate"); MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc)); - if (LdrOpc == ARM64::LDPXpost || LdrOpc == ARM64::LDPDpost) - MIB.addReg(ARM64::SP, RegState::Define); + if (LdrOpc == AArch64::LDPXpost || LdrOpc == AArch64::LDPDpost) + MIB.addReg(AArch64::SP, RegState::Define); MIB.addReg(Reg2, getDefRegState(true)) .addReg(Reg1, getDefRegState(true)) - .addReg(ARM64::SP) + .addReg(AArch64::SP) .addImm(Offset); // [sp], #offset * 8 or [sp, #offset * 8] // where the factor * 8 is implicit } return true; } -void ARM64FrameLowering::processFunctionBeforeCalleeSavedScan( +void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan( MachineFunction &MF, RegScavenger *RS) const { - const ARM64RegisterInfo *RegInfo = - static_cast(MF.getTarget().getRegisterInfo()); - ARM64FunctionInfo *AFI = MF.getInfo(); + const AArch64RegisterInfo *RegInfo = static_cast( + MF.getTarget().getRegisterInfo()); + AArch64FunctionInfo *AFI = MF.getInfo(); MachineRegisterInfo *MRI = &MF.getRegInfo(); SmallVector UnspilledCSGPRs; SmallVector UnspilledCSFPRs; // The frame record needs to be created by saving the appropriate registers if (hasFP(MF)) { - MRI->setPhysRegUsed(ARM64::FP); - MRI->setPhysRegUsed(ARM64::LR); + MRI->setPhysRegUsed(AArch64::FP); + MRI->setPhysRegUsed(AArch64::LR); } // Spill the BasePtr if it's used. Do this first thing so that the @@ -788,10 +791,10 @@ void ARM64FrameLowering::processFunctionBeforeCalleeSavedScan( const unsigned OddReg = CSRegs[i]; const unsigned EvenReg = CSRegs[i + 1]; - assert((ARM64::GPR64RegClass.contains(OddReg) && - ARM64::GPR64RegClass.contains(EvenReg)) ^ - (ARM64::FPR64RegClass.contains(OddReg) && - ARM64::FPR64RegClass.contains(EvenReg)) && + assert((AArch64::GPR64RegClass.contains(OddReg) && + AArch64::GPR64RegClass.contains(EvenReg)) ^ + (AArch64::FPR64RegClass.contains(OddReg) && + AArch64::FPR64RegClass.contains(EvenReg)) && "Register class mismatch!"); const bool OddRegUsed = MRI->isPhysRegUsed(OddReg); @@ -800,7 +803,7 @@ void ARM64FrameLowering::processFunctionBeforeCalleeSavedScan( // Early exit if none of the registers in the register pair is actually // used. if (!OddRegUsed && !EvenRegUsed) { - if (ARM64::GPR64RegClass.contains(OddReg)) { + if (AArch64::GPR64RegClass.contains(OddReg)) { UnspilledCSGPRs.push_back(OddReg); UnspilledCSGPRs.push_back(EvenReg); } else { @@ -810,7 +813,7 @@ void ARM64FrameLowering::processFunctionBeforeCalleeSavedScan( continue; } - unsigned Reg = ARM64::NoRegister; + unsigned Reg = AArch64::NoRegister; // If only one of the registers of the register pair is used, make sure to // mark the other one as used as well. if (OddRegUsed ^ EvenRegUsed) { @@ -822,17 +825,17 @@ void ARM64FrameLowering::processFunctionBeforeCalleeSavedScan( DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo)); DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo)); - assert(((OddReg == ARM64::LR && EvenReg == ARM64::FP) || + assert(((OddReg == AArch64::LR && EvenReg == AArch64::FP) || (RegInfo->getEncodingValue(OddReg) + 1 == RegInfo->getEncodingValue(EvenReg))) && "Register pair of non-adjacent registers!"); - if (ARM64::GPR64RegClass.contains(OddReg)) { + if (AArch64::GPR64RegClass.contains(OddReg)) { NumGPRSpilled += 2; // If it's not a reserved register, we can use it in lieu of an // emergency spill slot for the register scavenger. // FIXME: It would be better to instead keep looking and choose another // unspilled register that isn't reserved, if there is one. - if (Reg != ARM64::NoRegister && !RegInfo->isReservedReg(MF, Reg)) + if (Reg != AArch64::NoRegister && !RegInfo->isReservedReg(MF, Reg)) ExtraCSSpill = true; } else NumFPRSpilled += 2; @@ -878,7 +881,7 @@ void ARM64FrameLowering::processFunctionBeforeCalleeSavedScan( // If we didn't find an extra callee-saved register to spill, create // an emergency spill slot. if (!ExtraCSSpill) { - const TargetRegisterClass *RC = &ARM64::GPR64RegClass; + const TargetRegisterClass *RC = &AArch64::GPR64RegClass; int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false); RS->addScavengingFrameIndex(FI); DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI diff --git a/lib/Target/ARM64/ARM64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h similarity index 86% rename from lib/Target/ARM64/ARM64FrameLowering.h rename to lib/Target/AArch64/AArch64FrameLowering.h index 1991a0a18dd6..0e00d168003f 100644 --- a/lib/Target/ARM64/ARM64FrameLowering.h +++ b/lib/Target/AArch64/AArch64FrameLowering.h @@ -1,4 +1,4 @@ -//===-- ARM64FrameLowering.h - TargetFrameLowering for ARM64 ----*- C++ -*-===// +//==-- AArch64FrameLowering.h - TargetFrameLowering for AArch64 --*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -11,22 +11,22 @@ // //===----------------------------------------------------------------------===// -#ifndef ARM64_FRAMELOWERING_H -#define ARM64_FRAMELOWERING_H +#ifndef AArch64_FRAMELOWERING_H +#define AArch64_FRAMELOWERING_H #include "llvm/Target/TargetFrameLowering.h" namespace llvm { -class ARM64Subtarget; -class ARM64TargetMachine; +class AArch64Subtarget; +class AArch64TargetMachine; -class ARM64FrameLowering : public TargetFrameLowering { - const ARM64TargetMachine &TM; +class AArch64FrameLowering : public TargetFrameLowering { + const AArch64TargetMachine &TM; public: - explicit ARM64FrameLowering(const ARM64TargetMachine &TM, - const ARM64Subtarget &STI) + explicit AArch64FrameLowering(const AArch64TargetMachine &TM, + const AArch64Subtarget &STI) : TargetFrameLowering(StackGrowsDown, 16, 0, 16, false /*StackRealignable*/), TM(TM) {} diff --git a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp similarity index 70% rename from lib/Target/ARM64/ARM64ISelDAGToDAG.cpp rename to lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 23c45d414e2d..7007ffcce29b 100644 --- a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -1,4 +1,4 @@ -//===-- ARM64ISelDAGToDAG.cpp - A dag to dag inst selector for ARM64 ------===// +//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// // // The LLVM Compiler Infrastructure // @@ -7,12 +7,12 @@ // //===----------------------------------------------------------------------===// // -// This file defines an instruction selector for the ARM64 target. +// This file defines an instruction selector for the AArch64 target. // //===----------------------------------------------------------------------===// -#include "ARM64TargetMachine.h" -#include "MCTargetDesc/ARM64AddressingModes.h" +#include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/APSInt.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/Function.h" // To access function attributes. @@ -25,30 +25,31 @@ using namespace llvm; -#define DEBUG_TYPE "arm64-isel" +#define DEBUG_TYPE "aarch64-isel" //===--------------------------------------------------------------------===// -/// ARM64DAGToDAGISel - ARM64 specific code to select ARM64 machine +/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine /// instructions for SelectionDAG operations. /// namespace { -class ARM64DAGToDAGISel : public SelectionDAGISel { - ARM64TargetMachine &TM; +class AArch64DAGToDAGISel : public SelectionDAGISel { + AArch64TargetMachine &TM; - /// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can + /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can /// make the right decision when generating code for different targets. - const ARM64Subtarget *Subtarget; + const AArch64Subtarget *Subtarget; bool ForCodeSize; public: - explicit ARM64DAGToDAGISel(ARM64TargetMachine &tm, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(tm, OptLevel), TM(tm), - Subtarget(nullptr), ForCodeSize(false) {} + explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, + CodeGenOpt::Level OptLevel) + : SelectionDAGISel(tm, OptLevel), TM(tm), Subtarget(nullptr), + ForCodeSize(false) {} const char *getPassName() const override { - return "ARM64 Instruction Selection"; + return "AArch64 Instruction Selection"; } bool runOnMachineFunction(MachineFunction &MF) override { @@ -57,7 +58,7 @@ class ARM64DAGToDAGISel : public SelectionDAGISel { FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) || FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); - Subtarget = &TM.getSubtarget(); + Subtarget = &TM.getSubtarget(); return SelectionDAGISel::runOnMachineFunction(MF); } @@ -161,7 +162,7 @@ class ARM64DAGToDAGISel : public SelectionDAGISel { SDNode *SelectLIBM(SDNode *N); // Include the pieces autogenerated from the target description. -#include "ARM64GenDAGISel.inc" +#include "AArch64GenDAGISel.inc" private: bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, @@ -214,10 +215,10 @@ static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, isIntImmediate(N->getOperand(1).getNode(), Imm); } -bool ARM64DAGToDAGISel::SelectInlineAsmMemoryOperand( +bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( const SDValue &Op, char ConstraintCode, std::vector &OutOps) { assert(ConstraintCode == 'm' && "unexpected asm memory constraint"); - // Require the address to be in a register. That is safe for all ARM64 + // Require the address to be in a register. That is safe for all AArch64 // variants and it is hard to do anything much smarter without knowing // how the operand is used. OutOps.push_back(Op); @@ -227,8 +228,8 @@ bool ARM64DAGToDAGISel::SelectInlineAsmMemoryOperand( /// SelectArithImmed - Select an immediate value that can be represented as /// a 12-bit value shifted left by either 0 or 12. If so, return true with /// Val set to the 12-bit value and Shift set to the shifter operand. -bool ARM64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, - SDValue &Shift) { +bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, + SDValue &Shift) { // This function is called from the addsub_shifted_imm ComplexPattern, // which lists [imm] as the list of opcode it's interested in, however // we still need to check whether the operand is actually an immediate @@ -248,7 +249,7 @@ bool ARM64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, } else return false; - unsigned ShVal = ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt); + unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); Val = CurDAG->getTargetConstant(Immed, MVT::i32); Shift = CurDAG->getTargetConstant(ShVal, MVT::i32); return true; @@ -256,8 +257,8 @@ bool ARM64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, /// SelectNegArithImmed - As above, but negates the value before trying to /// select it. -bool ARM64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, - SDValue &Shift) { +bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, + SDValue &Shift) { // This function is called from the addsub_shifted_imm ComplexPattern, // which lists [imm] as the list of opcode it's interested in, however // we still need to check whether the operand is actually an immediate @@ -288,23 +289,23 @@ bool ARM64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, /// getShiftTypeForNode - Translate a shift node to the corresponding /// ShiftType value. -static ARM64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { +static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { switch (N.getOpcode()) { default: - return ARM64_AM::InvalidShiftExtend; + return AArch64_AM::InvalidShiftExtend; case ISD::SHL: - return ARM64_AM::LSL; + return AArch64_AM::LSL; case ISD::SRL: - return ARM64_AM::LSR; + return AArch64_AM::LSR; case ISD::SRA: - return ARM64_AM::ASR; + return AArch64_AM::ASR; case ISD::ROTR: - return ARM64_AM::ROR; + return AArch64_AM::ROR; } } /// \brief Determine wether it is worth to fold V into an extended register. -bool ARM64DAGToDAGISel::isWorthFolding(SDValue V) const { +bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { // it hurts if the a value is used at least twice, unless we are optimizing // for code size. if (ForCodeSize || V.hasOneUse()) @@ -317,18 +318,18 @@ bool ARM64DAGToDAGISel::isWorthFolding(SDValue V) const { /// instructions allow the shifted register to be rotated, but the arithmetic /// instructions do not. The AllowROR parameter specifies whether ROR is /// supported. -bool ARM64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, - SDValue &Reg, SDValue &Shift) { - ARM64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); - if (ShType == ARM64_AM::InvalidShiftExtend) +bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, + SDValue &Reg, SDValue &Shift) { + AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); + if (ShType == AArch64_AM::InvalidShiftExtend) return false; - if (!AllowROR && ShType == ARM64_AM::ROR) + if (!AllowROR && ShType == AArch64_AM::ROR) return false; if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { unsigned BitSize = N.getValueType().getSizeInBits(); unsigned Val = RHS->getZExtValue() & (BitSize - 1); - unsigned ShVal = ARM64_AM::getShifterImm(ShType, Val); + unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); Reg = N.getOperand(0); Shift = CurDAG->getTargetConstant(ShVal, MVT::i32); @@ -340,7 +341,7 @@ bool ARM64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, /// getExtendTypeForNode - Translate an extend node to the corresponding /// ExtendType value. -static ARM64_AM::ShiftExtendType +static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { if (N.getOpcode() == ISD::SIGN_EXTEND || N.getOpcode() == ISD::SIGN_EXTEND_INREG) { @@ -351,51 +352,51 @@ getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { SrcVT = N.getOperand(0).getValueType(); if (!IsLoadStore && SrcVT == MVT::i8) - return ARM64_AM::SXTB; + return AArch64_AM::SXTB; else if (!IsLoadStore && SrcVT == MVT::i16) - return ARM64_AM::SXTH; + return AArch64_AM::SXTH; else if (SrcVT == MVT::i32) - return ARM64_AM::SXTW; + return AArch64_AM::SXTW; assert(SrcVT != MVT::i64 && "extend from 64-bits?"); - return ARM64_AM::InvalidShiftExtend; + return AArch64_AM::InvalidShiftExtend; } else if (N.getOpcode() == ISD::ZERO_EXTEND || N.getOpcode() == ISD::ANY_EXTEND) { EVT SrcVT = N.getOperand(0).getValueType(); if (!IsLoadStore && SrcVT == MVT::i8) - return ARM64_AM::UXTB; + return AArch64_AM::UXTB; else if (!IsLoadStore && SrcVT == MVT::i16) - return ARM64_AM::UXTH; + return AArch64_AM::UXTH; else if (SrcVT == MVT::i32) - return ARM64_AM::UXTW; + return AArch64_AM::UXTW; assert(SrcVT != MVT::i64 && "extend from 64-bits?"); - return ARM64_AM::InvalidShiftExtend; + return AArch64_AM::InvalidShiftExtend; } else if (N.getOpcode() == ISD::AND) { ConstantSDNode *CSD = dyn_cast(N.getOperand(1)); if (!CSD) - return ARM64_AM::InvalidShiftExtend; + return AArch64_AM::InvalidShiftExtend; uint64_t AndMask = CSD->getZExtValue(); switch (AndMask) { default: - return ARM64_AM::InvalidShiftExtend; + return AArch64_AM::InvalidShiftExtend; case 0xFF: - return !IsLoadStore ? ARM64_AM::UXTB : ARM64_AM::InvalidShiftExtend; + return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; case 0xFFFF: - return !IsLoadStore ? ARM64_AM::UXTH : ARM64_AM::InvalidShiftExtend; + return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; case 0xFFFFFFFF: - return ARM64_AM::UXTW; + return AArch64_AM::UXTW; } } - return ARM64_AM::InvalidShiftExtend; + return AArch64_AM::InvalidShiftExtend; } // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts. static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) { - if (DL->getOpcode() != ARM64ISD::DUPLANE16 && - DL->getOpcode() != ARM64ISD::DUPLANE32) + if (DL->getOpcode() != AArch64ISD::DUPLANE16 && + DL->getOpcode() != AArch64ISD::DUPLANE32) return false; SDValue SV = DL->getOperand(0); @@ -428,10 +429,10 @@ static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, return true; } -/// SelectMLAV64LaneV128 - ARM64 supports vector MLAs where one multiplicand is -/// a lane in the upper half of a 128-bit vector. Recognize and select this so -/// that we don't emit unnecessary lane extracts. -SDNode *ARM64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) { +/// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand +/// is a lane in the upper half of a 128-bit vector. Recognize and select this +/// so that we don't emit unnecessary lane extracts. +SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) { SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); SDValue MLAOp1; // Will hold ordinary multiplicand for MLA. @@ -458,23 +459,23 @@ SDNode *ARM64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) { default: llvm_unreachable("Unrecognized MLA."); case MVT::v4i16: - MLAOpc = ARM64::MLAv4i16_indexed; + MLAOpc = AArch64::MLAv4i16_indexed; break; case MVT::v8i16: - MLAOpc = ARM64::MLAv8i16_indexed; + MLAOpc = AArch64::MLAv8i16_indexed; break; case MVT::v2i32: - MLAOpc = ARM64::MLAv2i32_indexed; + MLAOpc = AArch64::MLAv2i32_indexed; break; case MVT::v4i32: - MLAOpc = ARM64::MLAv4i32_indexed; + MLAOpc = AArch64::MLAv4i32_indexed; break; } return CurDAG->getMachineNode(MLAOpc, SDLoc(N), N->getValueType(0), Ops); } -SDNode *ARM64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) { +SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) { SDValue SMULLOp0; SDValue SMULLOp1; int LaneIdx; @@ -489,26 +490,26 @@ SDNode *ARM64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) { unsigned SMULLOpc = ~0U; - if (IntNo == Intrinsic::arm64_neon_smull) { + if (IntNo == Intrinsic::aarch64_neon_smull) { switch (N->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unrecognized SMULL."); case MVT::v4i32: - SMULLOpc = ARM64::SMULLv4i16_indexed; + SMULLOpc = AArch64::SMULLv4i16_indexed; break; case MVT::v2i64: - SMULLOpc = ARM64::SMULLv2i32_indexed; + SMULLOpc = AArch64::SMULLv2i32_indexed; break; } - } else if (IntNo == Intrinsic::arm64_neon_umull) { + } else if (IntNo == Intrinsic::aarch64_neon_umull) { switch (N->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unrecognized SMULL."); case MVT::v4i32: - SMULLOpc = ARM64::UMULLv4i16_indexed; + SMULLOpc = AArch64::UMULLv4i16_indexed; break; case MVT::v2i64: - SMULLOpc = ARM64::UMULLv2i32_indexed; + SMULLOpc = AArch64::UMULLv2i32_indexed; break; } } else @@ -525,7 +526,7 @@ static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { if (N.getValueType() == MVT::i32) return N; - SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32); + SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32, N, SubReg); return SDValue(Node, 0); @@ -534,10 +535,10 @@ static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { /// SelectArithExtendedRegister - Select a "extended register" operand. This /// operand folds in an extend followed by an optional left shift. -bool ARM64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, - SDValue &Shift) { +bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, + SDValue &Shift) { unsigned ShiftVal = 0; - ARM64_AM::ShiftExtendType Ext; + AArch64_AM::ShiftExtendType Ext; if (N.getOpcode() == ISD::SHL) { ConstantSDNode *CSD = dyn_cast(N.getOperand(1)); @@ -548,24 +549,24 @@ bool ARM64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, return false; Ext = getExtendTypeForNode(N.getOperand(0)); - if (Ext == ARM64_AM::InvalidShiftExtend) + if (Ext == AArch64_AM::InvalidShiftExtend) return false; Reg = N.getOperand(0).getOperand(0); } else { Ext = getExtendTypeForNode(N); - if (Ext == ARM64_AM::InvalidShiftExtend) + if (Ext == AArch64_AM::InvalidShiftExtend) return false; Reg = N.getOperand(0); } - // ARM64 mandates that the RHS of the operation must use the smallest + // AArch64 mandates that the RHS of the operation must use the smallest // register classs that could contain the size being extended from. Thus, // if we're folding a (sext i8), we need the RHS to be a GPR32, even though // there might not be an actual 32-bit value in the program. We can // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. - assert(Ext != ARM64_AM::UXTX && Ext != ARM64_AM::SXTX); + assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX); Reg = narrowIfNeeded(CurDAG, Reg); Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), MVT::i32); return isWorthFolding(N); @@ -574,7 +575,7 @@ bool ARM64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit /// immediate" address. The "Size" argument is the size in bytes of the memory /// reference, which determines the scale. -bool ARM64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, +bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, SDValue &OffImm) { const TargetLowering *TLI = getTargetLowering(); if (N.getOpcode() == ISD::FrameIndex) { @@ -584,7 +585,7 @@ bool ARM64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, return true; } - if (N.getOpcode() == ARM64ISD::ADDlow) { + if (N.getOpcode() == AArch64ISD::ADDlow) { GlobalAddressSDNode *GAN = dyn_cast(N.getOperand(1).getNode()); Base = N.getOperand(0); @@ -637,8 +638,9 @@ bool ARM64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, /// is not valid for a scaled immediate addressing mode. The "Size" argument /// is the size in bytes of the memory reference, which is needed here to know /// what is valid for a scaled immediate. -bool ARM64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, - SDValue &Base, SDValue &OffImm) { +bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, + SDValue &Base, + SDValue &OffImm) { if (!CurDAG->isBaseWithConstantOffset(N)) return false; if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { @@ -662,7 +664,7 @@ bool ARM64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, } static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { - SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32); + SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); SDValue ImpDef = SDValue( CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SDLoc(N), MVT::i64), 0); @@ -673,21 +675,22 @@ static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { /// \brief Check if the given SHL node (\p N), can be used to form an /// extended register for an addressing mode. -bool ARM64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, - bool WantExtend, SDValue &Offset, - SDValue &SignExtend) { +bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, + bool WantExtend, SDValue &Offset, + SDValue &SignExtend) { assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); ConstantSDNode *CSD = dyn_cast(N.getOperand(1)); if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) return false; if (WantExtend) { - ARM64_AM::ShiftExtendType Ext = getExtendTypeForNode(N.getOperand(0), true); - if (Ext == ARM64_AM::InvalidShiftExtend) + AArch64_AM::ShiftExtendType Ext = + getExtendTypeForNode(N.getOperand(0), true); + if (Ext == AArch64_AM::InvalidShiftExtend) return false; Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); - SignExtend = CurDAG->getTargetConstant(Ext == ARM64_AM::SXTW, MVT::i32); + SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32); } else { Offset = N.getOperand(0); SignExtend = CurDAG->getTargetConstant(0, MVT::i32); @@ -705,10 +708,10 @@ bool ARM64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, return false; } -bool ARM64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, - SDValue &Base, SDValue &Offset, - SDValue &SignExtend, - SDValue &DoShift) { +bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, + SDValue &Base, SDValue &Offset, + SDValue &SignExtend, + SDValue &DoShift) { if (N.getOpcode() != ISD::ADD) return false; SDValue LHS = N.getOperand(0); @@ -750,23 +753,25 @@ bool ARM64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, // There was no shift, whatever else we find. DoShift = CurDAG->getTargetConstant(false, MVT::i32); - ARM64_AM::ShiftExtendType Ext = ARM64_AM::InvalidShiftExtend; + AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; // Try to match an unshifted extend on the LHS. if (IsExtendedRegisterWorthFolding && - (Ext = getExtendTypeForNode(LHS, true)) != ARM64_AM::InvalidShiftExtend) { + (Ext = getExtendTypeForNode(LHS, true)) != + AArch64_AM::InvalidShiftExtend) { Base = RHS; Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); - SignExtend = CurDAG->getTargetConstant(Ext == ARM64_AM::SXTW, MVT::i32); + SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32); if (isWorthFolding(LHS)) return true; } // Try to match an unshifted extend on the RHS. if (IsExtendedRegisterWorthFolding && - (Ext = getExtendTypeForNode(RHS, true)) != ARM64_AM::InvalidShiftExtend) { + (Ext = getExtendTypeForNode(RHS, true)) != + AArch64_AM::InvalidShiftExtend) { Base = LHS; Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); - SignExtend = CurDAG->getTargetConstant(Ext == ARM64_AM::SXTW, MVT::i32); + SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32); if (isWorthFolding(RHS)) return true; } @@ -774,10 +779,10 @@ bool ARM64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, return false; } -bool ARM64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, - SDValue &Base, SDValue &Offset, - SDValue &SignExtend, - SDValue &DoShift) { +bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, + SDValue &Base, SDValue &Offset, + SDValue &SignExtend, + SDValue &DoShift) { if (N.getOpcode() != ISD::ADD) return false; SDValue LHS = N.getOperand(0); @@ -825,27 +830,27 @@ bool ARM64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, return true; } -SDValue ARM64DAGToDAGISel::createDTuple(ArrayRef Regs) { - static unsigned RegClassIDs[] = { ARM64::DDRegClassID, ARM64::DDDRegClassID, - ARM64::DDDDRegClassID }; - static unsigned SubRegs[] = { ARM64::dsub0, ARM64::dsub1, - ARM64::dsub2, ARM64::dsub3 }; +SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef Regs) { + static unsigned RegClassIDs[] = { + AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; + static unsigned SubRegs[] = { AArch64::dsub0, AArch64::dsub1, + AArch64::dsub2, AArch64::dsub3 }; return createTuple(Regs, RegClassIDs, SubRegs); } -SDValue ARM64DAGToDAGISel::createQTuple(ArrayRef Regs) { - static unsigned RegClassIDs[] = { ARM64::QQRegClassID, ARM64::QQQRegClassID, - ARM64::QQQQRegClassID }; - static unsigned SubRegs[] = { ARM64::qsub0, ARM64::qsub1, - ARM64::qsub2, ARM64::qsub3 }; +SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef Regs) { + static unsigned RegClassIDs[] = { + AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; + static unsigned SubRegs[] = { AArch64::qsub0, AArch64::qsub1, + AArch64::qsub2, AArch64::qsub3 }; return createTuple(Regs, RegClassIDs, SubRegs); } -SDValue ARM64DAGToDAGISel::createTuple(ArrayRef Regs, - unsigned RegClassIDs[], - unsigned SubRegs[]) { +SDValue AArch64DAGToDAGISel::createTuple(ArrayRef Regs, + unsigned RegClassIDs[], + unsigned SubRegs[]) { // There's no special register-class for a vector-list of 1 element: it's just // a vector. if (Regs.size() == 1) @@ -872,8 +877,8 @@ SDValue ARM64DAGToDAGISel::createTuple(ArrayRef Regs, return SDValue(N, 0); } -SDNode *ARM64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, - unsigned Opc, bool isExt) { +SDNode *AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, + unsigned Opc, bool isExt) { SDLoc dl(N); EVT VT = N->getValueType(0); @@ -893,7 +898,7 @@ SDNode *ARM64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, return CurDAG->getMachineNode(Opc, dl, VT, Ops); } -SDNode *ARM64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) { +SDNode *AArch64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) { LoadSDNode *LD = cast(N); if (LD->isUnindexed()) return nullptr; @@ -910,14 +915,14 @@ SDNode *ARM64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) { ISD::LoadExtType ExtType = LD->getExtensionType(); bool InsertTo64 = false; if (VT == MVT::i64) - Opcode = IsPre ? ARM64::LDRXpre : ARM64::LDRXpost; + Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost; else if (VT == MVT::i32) { if (ExtType == ISD::NON_EXTLOAD) - Opcode = IsPre ? ARM64::LDRWpre : ARM64::LDRWpost; + Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; else if (ExtType == ISD::SEXTLOAD) - Opcode = IsPre ? ARM64::LDRSWpre : ARM64::LDRSWpost; + Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; else { - Opcode = IsPre ? ARM64::LDRWpre : ARM64::LDRWpost; + Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; InsertTo64 = true; // The result of the load is only i32. It's the subreg_to_reg that makes // it into an i64. @@ -926,11 +931,11 @@ SDNode *ARM64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) { } else if (VT == MVT::i16) { if (ExtType == ISD::SEXTLOAD) { if (DstVT == MVT::i64) - Opcode = IsPre ? ARM64::LDRSHXpre : ARM64::LDRSHXpost; + Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; else - Opcode = IsPre ? ARM64::LDRSHWpre : ARM64::LDRSHWpost; + Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; } else { - Opcode = IsPre ? ARM64::LDRHHpre : ARM64::LDRHHpost; + Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; InsertTo64 = DstVT == MVT::i64; // The result of the load is only i32. It's the subreg_to_reg that makes // it into an i64. @@ -939,22 +944,22 @@ SDNode *ARM64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) { } else if (VT == MVT::i8) { if (ExtType == ISD::SEXTLOAD) { if (DstVT == MVT::i64) - Opcode = IsPre ? ARM64::LDRSBXpre : ARM64::LDRSBXpost; + Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; else - Opcode = IsPre ? ARM64::LDRSBWpre : ARM64::LDRSBWpost; + Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; } else { - Opcode = IsPre ? ARM64::LDRBBpre : ARM64::LDRBBpost; + Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; InsertTo64 = DstVT == MVT::i64; // The result of the load is only i32. It's the subreg_to_reg that makes // it into an i64. DstVT = MVT::i32; } } else if (VT == MVT::f32) { - Opcode = IsPre ? ARM64::LDRSpre : ARM64::LDRSpost; + Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; } else if (VT == MVT::f64 || VT.is64BitVector()) { - Opcode = IsPre ? ARM64::LDRDpre : ARM64::LDRDpost; + Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; } else if (VT.is128BitVector()) { - Opcode = IsPre ? ARM64::LDRQpre : ARM64::LDRQpost; + Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; } else return nullptr; SDValue Chain = LD->getChain(); @@ -969,11 +974,11 @@ SDNode *ARM64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) { Done = true; SDValue LoadedVal = SDValue(Res, 1); if (InsertTo64) { - SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32); + SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); LoadedVal = - SDValue(CurDAG->getMachineNode(ARM64::SUBREG_TO_REG, SDLoc(N), MVT::i64, - CurDAG->getTargetConstant(0, MVT::i64), - LoadedVal, SubReg), + SDValue(CurDAG->getMachineNode( + AArch64::SUBREG_TO_REG, SDLoc(N), MVT::i64, + CurDAG->getTargetConstant(0, MVT::i64), LoadedVal, SubReg), 0); } @@ -984,8 +989,8 @@ SDNode *ARM64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) { return nullptr; } -SDNode *ARM64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, - unsigned SubRegIdx) { +SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, + unsigned Opc, unsigned SubRegIdx) { SDLoc dl(N); EVT VT = N->getValueType(0); SDValue Chain = N->getOperand(0); @@ -1008,8 +1013,8 @@ SDNode *ARM64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, return nullptr; } -SDNode *ARM64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, - unsigned Opc, unsigned SubRegIdx) { +SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, + unsigned Opc, unsigned SubRegIdx) { SDLoc dl(N); EVT VT = N->getValueType(0); SDValue Chain = N->getOperand(0); @@ -1043,8 +1048,8 @@ SDNode *ARM64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, return nullptr; } -SDNode *ARM64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, - unsigned Opc) { +SDNode *AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, + unsigned Opc) { SDLoc dl(N); EVT VT = N->getOperand(2)->getValueType(0); @@ -1062,8 +1067,8 @@ SDNode *ARM64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, return St; } -SDNode *ARM64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, - unsigned Opc) { +SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, + unsigned Opc) { SDLoc dl(N); EVT VT = N->getOperand(2)->getValueType(0); SmallVector ResTys; @@ -1102,7 +1107,7 @@ class WidenVector { SDValue Undef = SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); - return DAG.getTargetInsertSubreg(ARM64::dsub, DL, WideTy, Undef, V64Reg); + return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); } }; @@ -1114,12 +1119,12 @@ static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { MVT EltTy = VT.getVectorElementType().getSimpleVT(); MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); - return DAG.getTargetExtractSubreg(ARM64::dsub, SDLoc(V128Reg), NarrowTy, + return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy, V128Reg); } -SDNode *ARM64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, - unsigned Opc) { +SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, + unsigned Opc) { SDLoc dl(N); EVT VT = N->getValueType(0); bool Narrow = VT.getSizeInBits() == 64; @@ -1149,8 +1154,8 @@ SDNode *ARM64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, SDValue SuperReg = SDValue(Ld, 0); EVT WideVT = RegSeq.getOperand(1)->getValueType(0); - static unsigned QSubs[] = { ARM64::qsub0, ARM64::qsub1, ARM64::qsub2, - ARM64::qsub3 }; + static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2, + AArch64::qsub3 }; for (unsigned i = 0; i < NumVecs; ++i) { SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); if (Narrow) @@ -1163,8 +1168,8 @@ SDNode *ARM64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, return Ld; } -SDNode *ARM64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, - unsigned Opc) { +SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, + unsigned Opc) { SDLoc dl(N); EVT VT = N->getValueType(0); bool Narrow = VT.getSizeInBits() == 64; @@ -1204,8 +1209,8 @@ SDNode *ARM64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); } else { EVT WideVT = RegSeq.getOperand(1)->getValueType(0); - static unsigned QSubs[] = { ARM64::qsub0, ARM64::qsub1, ARM64::qsub2, - ARM64::qsub3 }; + static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2, + AArch64::qsub3 }; for (unsigned i = 0; i < NumVecs; ++i) { SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); @@ -1221,8 +1226,8 @@ SDNode *ARM64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, return Ld; } -SDNode *ARM64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, - unsigned Opc) { +SDNode *AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, + unsigned Opc) { SDLoc dl(N); EVT VT = N->getOperand(2)->getValueType(0); bool Narrow = VT.getSizeInBits() == 64; @@ -1254,8 +1259,8 @@ SDNode *ARM64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, return St; } -SDNode *ARM64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, - unsigned Opc) { +SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, + unsigned Opc) { SDLoc dl(N); EVT VT = N->getOperand(2)->getValueType(0); bool Narrow = VT.getSizeInBits() == 64; @@ -1374,7 +1379,7 @@ static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, // operation. MSB = MSB > 31 ? 31 : MSB; - Opc = VT == MVT::i32 ? ARM64::UBFMWri : ARM64::UBFMXri; + Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; return true; } @@ -1410,9 +1415,9 @@ static bool isOneBitExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, // Check whether we really have a one bit extract here. if (And_mask >> Srl_imm == 0x1) { if (N->getValueType(0) == MVT::i32) - Opc = ARM64::UBFMWri; + Opc = AArch64::UBFMWri; else - Opc = ARM64::UBFMXri; + Opc = AArch64::UBFMXri; LSB = MSB = Srl_imm; @@ -1479,9 +1484,9 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, MSB = LSB + Width; // SRA requires a signed extraction if (VT == MVT::i32) - Opc = N->getOpcode() == ISD::SRA ? ARM64::SBFMWri : ARM64::UBFMWri; + Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; else - Opc = N->getOpcode() == ISD::SRA ? ARM64::SBFMXri : ARM64::UBFMXri; + Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri; return true; } @@ -1509,10 +1514,10 @@ static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, switch (NOpc) { default: return false; - case ARM64::SBFMWri: - case ARM64::UBFMWri: - case ARM64::SBFMXri: - case ARM64::UBFMXri: + case AArch64::SBFMWri: + case AArch64::UBFMWri: + case AArch64::SBFMXri: + case AArch64::UBFMXri: Opc = NOpc; Opd0 = N->getOperand(0); LSB = cast(N->getOperand(1).getNode())->getZExtValue(); @@ -1523,7 +1528,7 @@ static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, return false; } -SDNode *ARM64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) { +SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) { unsigned Opc, LSB, MSB; SDValue Opd0; if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, LSB, MSB)) @@ -1533,12 +1538,12 @@ SDNode *ARM64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) { // If the bit extract operation is 64bit but the original type is 32bit, we // need to add one EXTRACT_SUBREG. - if ((Opc == ARM64::SBFMXri || Opc == ARM64::UBFMXri) && VT == MVT::i32) { + if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, MVT::i64), CurDAG->getTargetConstant(MSB, MVT::i64)}; SDNode *BFM = CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i64, Ops64); - SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32); + SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32, SDValue(BFM, 0), SubReg); @@ -1588,7 +1593,7 @@ static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth) { uint64_t Imm = cast(Op.getOperand(1).getNode())->getZExtValue(); - Imm = ARM64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); + Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); getUsefulBits(Op, UsefulBits, Depth + 1); } @@ -1638,17 +1643,17 @@ static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, Mask.clearAllBits(); Mask.flipAllBits(); - if (ARM64_AM::getShiftType(ShiftTypeAndValue) == ARM64_AM::LSL) { + if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { // Shift Left - uint64_t ShiftAmt = ARM64_AM::getShiftValue(ShiftTypeAndValue); + uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); Mask = Mask.shl(ShiftAmt); getUsefulBits(Op, Mask, Depth + 1); Mask = Mask.lshr(ShiftAmt); - } else if (ARM64_AM::getShiftType(ShiftTypeAndValue) == ARM64_AM::LSR) { + } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { // Shift Right - // We do not handle ARM64_AM::ASR, because the sign will change the + // We do not handle AArch64_AM::ASR, because the sign will change the // number of useful bits - uint64_t ShiftAmt = ARM64_AM::getShiftValue(ShiftTypeAndValue); + uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); Mask = Mask.lshr(ShiftAmt); getUsefulBits(Op, Mask, Depth + 1); Mask = Mask.shl(ShiftAmt); @@ -1695,25 +1700,25 @@ static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, switch (UserNode->getMachineOpcode()) { default: return; - case ARM64::ANDSWri: - case ARM64::ANDSXri: - case ARM64::ANDWri: - case ARM64::ANDXri: + case AArch64::ANDSWri: + case AArch64::ANDSXri: + case AArch64::ANDWri: + case AArch64::ANDXri: // We increment Depth only when we call the getUsefulBits return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, Depth); - case ARM64::UBFMWri: - case ARM64::UBFMXri: + case AArch64::UBFMWri: + case AArch64::UBFMXri: return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); - case ARM64::ORRWrs: - case ARM64::ORRXrs: + case AArch64::ORRWrs: + case AArch64::ORRXrs: if (UserNode->getOperand(1) != Orig) return; return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, Depth); - case ARM64::BFMWri: - case ARM64::BFMXri: + case AArch64::BFMWri: + case AArch64::BFMXri: return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); } } @@ -1751,7 +1756,7 @@ static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); - unsigned UBFMOpc = BitWidth == 32 ? ARM64::UBFMWri : ARM64::UBFMXri; + unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; SDNode *ShiftNode; if (ShlAmount > 0) { @@ -1833,9 +1838,9 @@ static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst, // Set Opc EVT VT = N->getValueType(0); if (VT == MVT::i32) - Opc = ARM64::BFMWri; + Opc = AArch64::BFMWri; else if (VT == MVT::i64) - Opc = ARM64::BFMXri; + Opc = AArch64::BFMXri; else return false; @@ -1860,8 +1865,8 @@ static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst, NumberOfIgnoredLowBits, true)) { // Check that the returned opcode is compatible with the pattern, // i.e., same type and zero extended (U and not S) - if ((BFXOpc != ARM64::UBFMXri && VT == MVT::i64) || - (BFXOpc != ARM64::UBFMWri && VT == MVT::i32)) + if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || + (BFXOpc != AArch64::UBFMWri && VT == MVT::i32)) continue; // Compute the width of the bitfield insertion @@ -1919,7 +1924,7 @@ static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst, return false; } -SDNode *ARM64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) { +SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) { if (N->getOpcode() != ISD::OR) return nullptr; @@ -1938,11 +1943,11 @@ SDNode *ARM64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) { return CurDAG->SelectNodeTo(N, Opc, VT, Ops); } -SDNode *ARM64DAGToDAGISel::SelectLIBM(SDNode *N) { +SDNode *AArch64DAGToDAGISel::SelectLIBM(SDNode *N) { EVT VT = N->getValueType(0); unsigned Variant; unsigned Opc; - unsigned FRINTXOpcs[] = { ARM64::FRINTXSr, ARM64::FRINTXDr }; + unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr }; if (VT == MVT::f32) { Variant = 0; @@ -1958,22 +1963,22 @@ SDNode *ARM64DAGToDAGISel::SelectLIBM(SDNode *N) { default: return nullptr; // Unrecognized libm ISD node. Fall back on default codegen. case ISD::FCEIL: { - unsigned FRINTPOpcs[] = { ARM64::FRINTPSr, ARM64::FRINTPDr }; + unsigned FRINTPOpcs[] = { AArch64::FRINTPSr, AArch64::FRINTPDr }; Opc = FRINTPOpcs[Variant]; break; } case ISD::FFLOOR: { - unsigned FRINTMOpcs[] = { ARM64::FRINTMSr, ARM64::FRINTMDr }; + unsigned FRINTMOpcs[] = { AArch64::FRINTMSr, AArch64::FRINTMDr }; Opc = FRINTMOpcs[Variant]; break; } case ISD::FTRUNC: { - unsigned FRINTZOpcs[] = { ARM64::FRINTZSr, ARM64::FRINTZDr }; + unsigned FRINTZOpcs[] = { AArch64::FRINTZSr, AArch64::FRINTZDr }; Opc = FRINTZOpcs[Variant]; break; } case ISD::FROUND: { - unsigned FRINTAOpcs[] = { ARM64::FRINTASr, ARM64::FRINTADr }; + unsigned FRINTAOpcs[] = { AArch64::FRINTASr, AArch64::FRINTADr }; Opc = FRINTAOpcs[Variant]; break; } @@ -1993,14 +1998,14 @@ SDNode *ARM64DAGToDAGISel::SelectLIBM(SDNode *N) { } bool -ARM64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, +AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth) { APFloat FVal(0.0); if (ConstantFPSDNode *CN = dyn_cast(N)) FVal = CN->getValueAPF(); else if (LoadSDNode *LN = dyn_cast(N)) { // Some otherwise illegal constants are allowed in this case. - if (LN->getOperand(1).getOpcode() != ARM64ISD::ADDlow || + if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow || !isa(LN->getOperand(1)->getOperand(1))) return false; @@ -2036,7 +2041,7 @@ ARM64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, return true; } -SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) { +SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { // Dump information about the Node being selected DEBUG(errs() << "Selecting: "); DEBUG(Node->dump(CurDAG)); @@ -2108,10 +2113,10 @@ SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) { default: assert(0 && "Unexpected vector element type!"); case 64: - SubReg = ARM64::dsub; + SubReg = AArch64::dsub; break; case 32: - SubReg = ARM64::ssub; + SubReg = AArch64::ssub; break; case 16: // FALLTHROUGH case 8: @@ -2131,10 +2136,10 @@ SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) { if (ConstNode->isNullValue()) { if (VT == MVT::i32) return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node), - ARM64::WZR, MVT::i32).getNode(); + AArch64::WZR, MVT::i32).getNode(); else if (VT == MVT::i64) return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node), - ARM64::XZR, MVT::i64).getNode(); + AArch64::XZR, MVT::i64).getNode(); } break; } @@ -2142,22 +2147,22 @@ SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) { case ISD::FrameIndex: { // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. int FI = cast(Node)->getIndex(); - unsigned Shifter = ARM64_AM::getShifterImm(ARM64_AM::LSL, 0); + unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); const TargetLowering *TLI = getTargetLowering(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), CurDAG->getTargetConstant(Shifter, MVT::i32) }; - return CurDAG->SelectNodeTo(Node, ARM64::ADDXri, MVT::i64, Ops); + return CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops); } case ISD::INTRINSIC_W_CHAIN: { unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); switch (IntNo) { default: break; - case Intrinsic::arm64_ldaxp: - case Intrinsic::arm64_ldxp: { + case Intrinsic::aarch64_ldaxp: + case Intrinsic::aarch64_ldxp: { unsigned Op = - IntNo == Intrinsic::arm64_ldaxp ? ARM64::LDAXPX : ARM64::LDXPX; + IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX; SDValue MemAddr = Node->getOperand(2); SDLoc DL(Node); SDValue Chain = Node->getOperand(0); @@ -2171,10 +2176,10 @@ SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) { cast(Ld)->setMemRefs(MemOp, MemOp + 1); return Ld; } - case Intrinsic::arm64_stlxp: - case Intrinsic::arm64_stxp: { + case Intrinsic::aarch64_stlxp: + case Intrinsic::aarch64_stxp: { unsigned Op = - IntNo == Intrinsic::arm64_stlxp ? ARM64::STLXPX : ARM64::STXPX; + IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX; SDLoc DL(Node); SDValue Chain = Node->getOperand(0); SDValue ValLo = Node->getOperand(2); @@ -2196,203 +2201,203 @@ SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) { return St; } - case Intrinsic::arm64_neon_ld1x2: + case Intrinsic::aarch64_neon_ld1x2: if (VT == MVT::v8i8) - return SelectLoad(Node, 2, ARM64::LD1Twov8b, ARM64::dsub0); + return SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0); else if (VT == MVT::v16i8) - return SelectLoad(Node, 2, ARM64::LD1Twov16b, ARM64::qsub0); + return SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); else if (VT == MVT::v4i16) - return SelectLoad(Node, 2, ARM64::LD1Twov4h, ARM64::dsub0); + return SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); else if (VT == MVT::v8i16) - return SelectLoad(Node, 2, ARM64::LD1Twov8h, ARM64::qsub0); + return SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 2, ARM64::LD1Twov2s, ARM64::dsub0); + return SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 2, ARM64::LD1Twov4s, ARM64::qsub0); + return SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 2, ARM64::LD1Twov1d, ARM64::dsub0); + return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 2, ARM64::LD1Twov2d, ARM64::qsub0); + return SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0); break; - case Intrinsic::arm64_neon_ld1x3: + case Intrinsic::aarch64_neon_ld1x3: if (VT == MVT::v8i8) - return SelectLoad(Node, 3, ARM64::LD1Threev8b, ARM64::dsub0); + return SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0); else if (VT == MVT::v16i8) - return SelectLoad(Node, 3, ARM64::LD1Threev16b, ARM64::qsub0); + return SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); else if (VT == MVT::v4i16) - return SelectLoad(Node, 3, ARM64::LD1Threev4h, ARM64::dsub0); + return SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); else if (VT == MVT::v8i16) - return SelectLoad(Node, 3, ARM64::LD1Threev8h, ARM64::qsub0); + return SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 3, ARM64::LD1Threev2s, ARM64::dsub0); + return SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 3, ARM64::LD1Threev4s, ARM64::qsub0); + return SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 3, ARM64::LD1Threev1d, ARM64::dsub0); + return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 3, ARM64::LD1Threev2d, ARM64::qsub0); + return SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0); break; - case Intrinsic::arm64_neon_ld1x4: + case Intrinsic::aarch64_neon_ld1x4: if (VT == MVT::v8i8) - return SelectLoad(Node, 4, ARM64::LD1Fourv8b, ARM64::dsub0); + return SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0); else if (VT == MVT::v16i8) - return SelectLoad(Node, 4, ARM64::LD1Fourv16b, ARM64::qsub0); + return SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); else if (VT == MVT::v4i16) - return SelectLoad(Node, 4, ARM64::LD1Fourv4h, ARM64::dsub0); + return SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); else if (VT == MVT::v8i16) - return SelectLoad(Node, 4, ARM64::LD1Fourv8h, ARM64::qsub0); + return SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 4, ARM64::LD1Fourv2s, ARM64::dsub0); + return SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 4, ARM64::LD1Fourv4s, ARM64::qsub0); + return SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 4, ARM64::LD1Fourv1d, ARM64::dsub0); + return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 4, ARM64::LD1Fourv2d, ARM64::qsub0); + return SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0); break; - case Intrinsic::arm64_neon_ld2: + case Intrinsic::aarch64_neon_ld2: if (VT == MVT::v8i8) - return SelectLoad(Node, 2, ARM64::LD2Twov8b, ARM64::dsub0); + return SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0); else if (VT == MVT::v16i8) - return SelectLoad(Node, 2, ARM64::LD2Twov16b, ARM64::qsub0); + return SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); else if (VT == MVT::v4i16) - return SelectLoad(Node, 2, ARM64::LD2Twov4h, ARM64::dsub0); + return SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); else if (VT == MVT::v8i16) - return SelectLoad(Node, 2, ARM64::LD2Twov8h, ARM64::qsub0); + return SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 2, ARM64::LD2Twov2s, ARM64::dsub0); + return SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 2, ARM64::LD2Twov4s, ARM64::qsub0); + return SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 2, ARM64::LD1Twov1d, ARM64::dsub0); + return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 2, ARM64::LD2Twov2d, ARM64::qsub0); + return SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0); break; - case Intrinsic::arm64_neon_ld3: + case Intrinsic::aarch64_neon_ld3: if (VT == MVT::v8i8) - return SelectLoad(Node, 3, ARM64::LD3Threev8b, ARM64::dsub0); + return SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0); else if (VT == MVT::v16i8) - return SelectLoad(Node, 3, ARM64::LD3Threev16b, ARM64::qsub0); + return SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); else if (VT == MVT::v4i16) - return SelectLoad(Node, 3, ARM64::LD3Threev4h, ARM64::dsub0); + return SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); else if (VT == MVT::v8i16) - return SelectLoad(Node, 3, ARM64::LD3Threev8h, ARM64::qsub0); + return SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 3, ARM64::LD3Threev2s, ARM64::dsub0); + return SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 3, ARM64::LD3Threev4s, ARM64::qsub0); + return SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 3, ARM64::LD1Threev1d, ARM64::dsub0); + return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 3, ARM64::LD3Threev2d, ARM64::qsub0); + return SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0); break; - case Intrinsic::arm64_neon_ld4: + case Intrinsic::aarch64_neon_ld4: if (VT == MVT::v8i8) - return SelectLoad(Node, 4, ARM64::LD4Fourv8b, ARM64::dsub0); + return SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0); else if (VT == MVT::v16i8) - return SelectLoad(Node, 4, ARM64::LD4Fourv16b, ARM64::qsub0); + return SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); else if (VT == MVT::v4i16) - return SelectLoad(Node, 4, ARM64::LD4Fourv4h, ARM64::dsub0); + return SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); else if (VT == MVT::v8i16) - return SelectLoad(Node, 4, ARM64::LD4Fourv8h, ARM64::qsub0); + return SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 4, ARM64::LD4Fourv2s, ARM64::dsub0); + return SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 4, ARM64::LD4Fourv4s, ARM64::qsub0); + return SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 4, ARM64::LD1Fourv1d, ARM64::dsub0); + return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 4, ARM64::LD4Fourv2d, ARM64::qsub0); + return SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0); break; - case Intrinsic::arm64_neon_ld2r: + case Intrinsic::aarch64_neon_ld2r: if (VT == MVT::v8i8) - return SelectLoad(Node, 2, ARM64::LD2Rv8b, ARM64::dsub0); + return SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0); else if (VT == MVT::v16i8) - return SelectLoad(Node, 2, ARM64::LD2Rv16b, ARM64::qsub0); + return SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); else if (VT == MVT::v4i16) - return SelectLoad(Node, 2, ARM64::LD2Rv4h, ARM64::dsub0); + return SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); else if (VT == MVT::v8i16) - return SelectLoad(Node, 2, ARM64::LD2Rv8h, ARM64::qsub0); + return SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 2, ARM64::LD2Rv2s, ARM64::dsub0); + return SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 2, ARM64::LD2Rv4s, ARM64::qsub0); + return SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 2, ARM64::LD2Rv1d, ARM64::dsub0); + return SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 2, ARM64::LD2Rv2d, ARM64::qsub0); + return SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0); break; - case Intrinsic::arm64_neon_ld3r: + case Intrinsic::aarch64_neon_ld3r: if (VT == MVT::v8i8) - return SelectLoad(Node, 3, ARM64::LD3Rv8b, ARM64::dsub0); + return SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0); else if (VT == MVT::v16i8) - return SelectLoad(Node, 3, ARM64::LD3Rv16b, ARM64::qsub0); + return SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); else if (VT == MVT::v4i16) - return SelectLoad(Node, 3, ARM64::LD3Rv4h, ARM64::dsub0); + return SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); else if (VT == MVT::v8i16) - return SelectLoad(Node, 3, ARM64::LD3Rv8h, ARM64::qsub0); + return SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 3, ARM64::LD3Rv2s, ARM64::dsub0); + return SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 3, ARM64::LD3Rv4s, ARM64::qsub0); + return SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 3, ARM64::LD3Rv1d, ARM64::dsub0); + return SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 3, ARM64::LD3Rv2d, ARM64::qsub0); + return SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0); break; - case Intrinsic::arm64_neon_ld4r: + case Intrinsic::aarch64_neon_ld4r: if (VT == MVT::v8i8) - return SelectLoad(Node, 4, ARM64::LD4Rv8b, ARM64::dsub0); + return SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0); else if (VT == MVT::v16i8) - return SelectLoad(Node, 4, ARM64::LD4Rv16b, ARM64::qsub0); + return SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); else if (VT == MVT::v4i16) - return SelectLoad(Node, 4, ARM64::LD4Rv4h, ARM64::dsub0); + return SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); else if (VT == MVT::v8i16) - return SelectLoad(Node, 4, ARM64::LD4Rv8h, ARM64::qsub0); + return SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 4, ARM64::LD4Rv2s, ARM64::dsub0); + return SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 4, ARM64::LD4Rv4s, ARM64::qsub0); + return SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 4, ARM64::LD4Rv1d, ARM64::dsub0); + return SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 4, ARM64::LD4Rv2d, ARM64::qsub0); + return SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0); break; - case Intrinsic::arm64_neon_ld2lane: + case Intrinsic::aarch64_neon_ld2lane: if (VT == MVT::v16i8 || VT == MVT::v8i8) - return SelectLoadLane(Node, 2, ARM64::LD2i8); + return SelectLoadLane(Node, 2, AArch64::LD2i8); else if (VT == MVT::v8i16 || VT == MVT::v4i16) - return SelectLoadLane(Node, 2, ARM64::LD2i16); + return SelectLoadLane(Node, 2, AArch64::LD2i16); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) - return SelectLoadLane(Node, 2, ARM64::LD2i32); + return SelectLoadLane(Node, 2, AArch64::LD2i32); else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) - return SelectLoadLane(Node, 2, ARM64::LD2i64); + return SelectLoadLane(Node, 2, AArch64::LD2i64); break; - case Intrinsic::arm64_neon_ld3lane: + case Intrinsic::aarch64_neon_ld3lane: if (VT == MVT::v16i8 || VT == MVT::v8i8) - return SelectLoadLane(Node, 3, ARM64::LD3i8); + return SelectLoadLane(Node, 3, AArch64::LD3i8); else if (VT == MVT::v8i16 || VT == MVT::v4i16) - return SelectLoadLane(Node, 3, ARM64::LD3i16); + return SelectLoadLane(Node, 3, AArch64::LD3i16); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) - return SelectLoadLane(Node, 3, ARM64::LD3i32); + return SelectLoadLane(Node, 3, AArch64::LD3i32); else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) - return SelectLoadLane(Node, 3, ARM64::LD3i64); + return SelectLoadLane(Node, 3, AArch64::LD3i64); break; - case Intrinsic::arm64_neon_ld4lane: + case Intrinsic::aarch64_neon_ld4lane: if (VT == MVT::v16i8 || VT == MVT::v8i8) - return SelectLoadLane(Node, 4, ARM64::LD4i8); + return SelectLoadLane(Node, 4, AArch64::LD4i8); else if (VT == MVT::v8i16 || VT == MVT::v4i16) - return SelectLoadLane(Node, 4, ARM64::LD4i16); + return SelectLoadLane(Node, 4, AArch64::LD4i16); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) - return SelectLoadLane(Node, 4, ARM64::LD4i32); + return SelectLoadLane(Node, 4, AArch64::LD4i32); else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) - return SelectLoadLane(Node, 4, ARM64::LD4i64); + return SelectLoadLane(Node, 4, AArch64::LD4i64); break; } } break; @@ -2401,32 +2406,32 @@ SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) { switch (IntNo) { default: break; - case Intrinsic::arm64_neon_tbl2: - return SelectTable(Node, 2, VT == MVT::v8i8 ? ARM64::TBLv8i8Two - : ARM64::TBLv16i8Two, + case Intrinsic::aarch64_neon_tbl2: + return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBLv8i8Two + : AArch64::TBLv16i8Two, false); - case Intrinsic::arm64_neon_tbl3: - return SelectTable(Node, 3, VT == MVT::v8i8 ? ARM64::TBLv8i8Three - : ARM64::TBLv16i8Three, + case Intrinsic::aarch64_neon_tbl3: + return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three + : AArch64::TBLv16i8Three, false); - case Intrinsic::arm64_neon_tbl4: - return SelectTable(Node, 4, VT == MVT::v8i8 ? ARM64::TBLv8i8Four - : ARM64::TBLv16i8Four, + case Intrinsic::aarch64_neon_tbl4: + return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four + : AArch64::TBLv16i8Four, false); - case Intrinsic::arm64_neon_tbx2: - return SelectTable(Node, 2, VT == MVT::v8i8 ? ARM64::TBXv8i8Two - : ARM64::TBXv16i8Two, + case Intrinsic::aarch64_neon_tbx2: + return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBXv8i8Two + : AArch64::TBXv16i8Two, true); - case Intrinsic::arm64_neon_tbx3: - return SelectTable(Node, 3, VT == MVT::v8i8 ? ARM64::TBXv8i8Three - : ARM64::TBXv16i8Three, + case Intrinsic::aarch64_neon_tbx3: + return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three + : AArch64::TBXv16i8Three, true); - case Intrinsic::arm64_neon_tbx4: - return SelectTable(Node, 4, VT == MVT::v8i8 ? ARM64::TBXv8i8Four - : ARM64::TBXv16i8Four, + case Intrinsic::aarch64_neon_tbx4: + return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four + : AArch64::TBXv16i8Four, true); - case Intrinsic::arm64_neon_smull: - case Intrinsic::arm64_neon_umull: + case Intrinsic::aarch64_neon_smull: + case Intrinsic::aarch64_neon_umull: if (SDNode *N = SelectMULLV64LaneV128(IntNo, Node)) return N; break; @@ -2440,563 +2445,563 @@ SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) { switch (IntNo) { default: break; - case Intrinsic::arm64_neon_st1x2: { + case Intrinsic::aarch64_neon_st1x2: { if (VT == MVT::v8i8) - return SelectStore(Node, 2, ARM64::ST1Twov8b); + return SelectStore(Node, 2, AArch64::ST1Twov8b); else if (VT == MVT::v16i8) - return SelectStore(Node, 2, ARM64::ST1Twov16b); + return SelectStore(Node, 2, AArch64::ST1Twov16b); else if (VT == MVT::v4i16) - return SelectStore(Node, 2, ARM64::ST1Twov4h); + return SelectStore(Node, 2, AArch64::ST1Twov4h); else if (VT == MVT::v8i16) - return SelectStore(Node, 2, ARM64::ST1Twov8h); + return SelectStore(Node, 2, AArch64::ST1Twov8h); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectStore(Node, 2, ARM64::ST1Twov2s); + return SelectStore(Node, 2, AArch64::ST1Twov2s); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectStore(Node, 2, ARM64::ST1Twov4s); + return SelectStore(Node, 2, AArch64::ST1Twov4s); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectStore(Node, 2, ARM64::ST1Twov2d); + return SelectStore(Node, 2, AArch64::ST1Twov2d); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectStore(Node, 2, ARM64::ST1Twov1d); + return SelectStore(Node, 2, AArch64::ST1Twov1d); break; } - case Intrinsic::arm64_neon_st1x3: { + case Intrinsic::aarch64_neon_st1x3: { if (VT == MVT::v8i8) - return SelectStore(Node, 3, ARM64::ST1Threev8b); + return SelectStore(Node, 3, AArch64::ST1Threev8b); else if (VT == MVT::v16i8) - return SelectStore(Node, 3, ARM64::ST1Threev16b); + return SelectStore(Node, 3, AArch64::ST1Threev16b); else if (VT == MVT::v4i16) - return SelectStore(Node, 3, ARM64::ST1Threev4h); + return SelectStore(Node, 3, AArch64::ST1Threev4h); else if (VT == MVT::v8i16) - return SelectStore(Node, 3, ARM64::ST1Threev8h); + return SelectStore(Node, 3, AArch64::ST1Threev8h); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectStore(Node, 3, ARM64::ST1Threev2s); + return SelectStore(Node, 3, AArch64::ST1Threev2s); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectStore(Node, 3, ARM64::ST1Threev4s); + return SelectStore(Node, 3, AArch64::ST1Threev4s); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectStore(Node, 3, ARM64::ST1Threev2d); + return SelectStore(Node, 3, AArch64::ST1Threev2d); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectStore(Node, 3, ARM64::ST1Threev1d); + return SelectStore(Node, 3, AArch64::ST1Threev1d); break; } - case Intrinsic::arm64_neon_st1x4: { + case Intrinsic::aarch64_neon_st1x4: { if (VT == MVT::v8i8) - return SelectStore(Node, 4, ARM64::ST1Fourv8b); + return SelectStore(Node, 4, AArch64::ST1Fourv8b); else if (VT == MVT::v16i8) - return SelectStore(Node, 4, ARM64::ST1Fourv16b); + return SelectStore(Node, 4, AArch64::ST1Fourv16b); else if (VT == MVT::v4i16) - return SelectStore(Node, 4, ARM64::ST1Fourv4h); + return SelectStore(Node, 4, AArch64::ST1Fourv4h); else if (VT == MVT::v8i16) - return SelectStore(Node, 4, ARM64::ST1Fourv8h); + return SelectStore(Node, 4, AArch64::ST1Fourv8h); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectStore(Node, 4, ARM64::ST1Fourv2s); + return SelectStore(Node, 4, AArch64::ST1Fourv2s); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectStore(Node, 4, ARM64::ST1Fourv4s); + return SelectStore(Node, 4, AArch64::ST1Fourv4s); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectStore(Node, 4, ARM64::ST1Fourv2d); + return SelectStore(Node, 4, AArch64::ST1Fourv2d); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectStore(Node, 4, ARM64::ST1Fourv1d); + return SelectStore(Node, 4, AArch64::ST1Fourv1d); break; } - case Intrinsic::arm64_neon_st2: { + case Intrinsic::aarch64_neon_st2: { if (VT == MVT::v8i8) - return SelectStore(Node, 2, ARM64::ST2Twov8b); + return SelectStore(Node, 2, AArch64::ST2Twov8b); else if (VT == MVT::v16i8) - return SelectStore(Node, 2, ARM64::ST2Twov16b); + return SelectStore(Node, 2, AArch64::ST2Twov16b); else if (VT == MVT::v4i16) - return SelectStore(Node, 2, ARM64::ST2Twov4h); + return SelectStore(Node, 2, AArch64::ST2Twov4h); else if (VT == MVT::v8i16) - return SelectStore(Node, 2, ARM64::ST2Twov8h); + return SelectStore(Node, 2, AArch64::ST2Twov8h); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectStore(Node, 2, ARM64::ST2Twov2s); + return SelectStore(Node, 2, AArch64::ST2Twov2s); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectStore(Node, 2, ARM64::ST2Twov4s); + return SelectStore(Node, 2, AArch64::ST2Twov4s); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectStore(Node, 2, ARM64::ST2Twov2d); + return SelectStore(Node, 2, AArch64::ST2Twov2d); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectStore(Node, 2, ARM64::ST1Twov1d); + return SelectStore(Node, 2, AArch64::ST1Twov1d); break; } - case Intrinsic::arm64_neon_st3: { + case Intrinsic::aarch64_neon_st3: { if (VT == MVT::v8i8) - return SelectStore(Node, 3, ARM64::ST3Threev8b); + return SelectStore(Node, 3, AArch64::ST3Threev8b); else if (VT == MVT::v16i8) - return SelectStore(Node, 3, ARM64::ST3Threev16b); + return SelectStore(Node, 3, AArch64::ST3Threev16b); else if (VT == MVT::v4i16) - return SelectStore(Node, 3, ARM64::ST3Threev4h); + return SelectStore(Node, 3, AArch64::ST3Threev4h); else if (VT == MVT::v8i16) - return SelectStore(Node, 3, ARM64::ST3Threev8h); + return SelectStore(Node, 3, AArch64::ST3Threev8h); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectStore(Node, 3, ARM64::ST3Threev2s); + return SelectStore(Node, 3, AArch64::ST3Threev2s); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectStore(Node, 3, ARM64::ST3Threev4s); + return SelectStore(Node, 3, AArch64::ST3Threev4s); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectStore(Node, 3, ARM64::ST3Threev2d); + return SelectStore(Node, 3, AArch64::ST3Threev2d); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectStore(Node, 3, ARM64::ST1Threev1d); + return SelectStore(Node, 3, AArch64::ST1Threev1d); break; } - case Intrinsic::arm64_neon_st4: { + case Intrinsic::aarch64_neon_st4: { if (VT == MVT::v8i8) - return SelectStore(Node, 4, ARM64::ST4Fourv8b); + return SelectStore(Node, 4, AArch64::ST4Fourv8b); else if (VT == MVT::v16i8) - return SelectStore(Node, 4, ARM64::ST4Fourv16b); + return SelectStore(Node, 4, AArch64::ST4Fourv16b); else if (VT == MVT::v4i16) - return SelectStore(Node, 4, ARM64::ST4Fourv4h); + return SelectStore(Node, 4, AArch64::ST4Fourv4h); else if (VT == MVT::v8i16) - return SelectStore(Node, 4, ARM64::ST4Fourv8h); + return SelectStore(Node, 4, AArch64::ST4Fourv8h); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectStore(Node, 4, ARM64::ST4Fourv2s); + return SelectStore(Node, 4, AArch64::ST4Fourv2s); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectStore(Node, 4, ARM64::ST4Fourv4s); + return SelectStore(Node, 4, AArch64::ST4Fourv4s); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectStore(Node, 4, ARM64::ST4Fourv2d); + return SelectStore(Node, 4, AArch64::ST4Fourv2d); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectStore(Node, 4, ARM64::ST1Fourv1d); + return SelectStore(Node, 4, AArch64::ST1Fourv1d); break; } - case Intrinsic::arm64_neon_st2lane: { + case Intrinsic::aarch64_neon_st2lane: { if (VT == MVT::v16i8 || VT == MVT::v8i8) - return SelectStoreLane(Node, 2, ARM64::ST2i8); + return SelectStoreLane(Node, 2, AArch64::ST2i8); else if (VT == MVT::v8i16 || VT == MVT::v4i16) - return SelectStoreLane(Node, 2, ARM64::ST2i16); + return SelectStoreLane(Node, 2, AArch64::ST2i16); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) - return SelectStoreLane(Node, 2, ARM64::ST2i32); + return SelectStoreLane(Node, 2, AArch64::ST2i32); else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) - return SelectStoreLane(Node, 2, ARM64::ST2i64); + return SelectStoreLane(Node, 2, AArch64::ST2i64); break; } - case Intrinsic::arm64_neon_st3lane: { + case Intrinsic::aarch64_neon_st3lane: { if (VT == MVT::v16i8 || VT == MVT::v8i8) - return SelectStoreLane(Node, 3, ARM64::ST3i8); + return SelectStoreLane(Node, 3, AArch64::ST3i8); else if (VT == MVT::v8i16 || VT == MVT::v4i16) - return SelectStoreLane(Node, 3, ARM64::ST3i16); + return SelectStoreLane(Node, 3, AArch64::ST3i16); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) - return SelectStoreLane(Node, 3, ARM64::ST3i32); + return SelectStoreLane(Node, 3, AArch64::ST3i32); else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) - return SelectStoreLane(Node, 3, ARM64::ST3i64); + return SelectStoreLane(Node, 3, AArch64::ST3i64); break; } - case Intrinsic::arm64_neon_st4lane: { + case Intrinsic::aarch64_neon_st4lane: { if (VT == MVT::v16i8 || VT == MVT::v8i8) - return SelectStoreLane(Node, 4, ARM64::ST4i8); + return SelectStoreLane(Node, 4, AArch64::ST4i8); else if (VT == MVT::v8i16 || VT == MVT::v4i16) - return SelectStoreLane(Node, 4, ARM64::ST4i16); + return SelectStoreLane(Node, 4, AArch64::ST4i16); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) - return SelectStoreLane(Node, 4, ARM64::ST4i32); + return SelectStoreLane(Node, 4, AArch64::ST4i32); else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) - return SelectStoreLane(Node, 4, ARM64::ST4i64); + return SelectStoreLane(Node, 4, AArch64::ST4i64); break; } } } - case ARM64ISD::LD2post: { + case AArch64ISD::LD2post: { if (VT == MVT::v8i8) - return SelectPostLoad(Node, 2, ARM64::LD2Twov8b_POST, ARM64::dsub0); + return SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0); else if (VT == MVT::v16i8) - return SelectPostLoad(Node, 2, ARM64::LD2Twov16b_POST, ARM64::qsub0); + return SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); else if (VT == MVT::v4i16) - return SelectPostLoad(Node, 2, ARM64::LD2Twov4h_POST, ARM64::dsub0); + return SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); else if (VT == MVT::v8i16) - return SelectPostLoad(Node, 2, ARM64::LD2Twov8h_POST, ARM64::qsub0); + return SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectPostLoad(Node, 2, ARM64::LD2Twov2s_POST, ARM64::dsub0); + return SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectPostLoad(Node, 2, ARM64::LD2Twov4s_POST, ARM64::qsub0); + return SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectPostLoad(Node, 2, ARM64::LD1Twov1d_POST, ARM64::dsub0); + return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectPostLoad(Node, 2, ARM64::LD2Twov2d_POST, ARM64::qsub0); + return SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0); break; } - case ARM64ISD::LD3post: { + case AArch64ISD::LD3post: { if (VT == MVT::v8i8) - return SelectPostLoad(Node, 3, ARM64::LD3Threev8b_POST, ARM64::dsub0); + return SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0); else if (VT == MVT::v16i8) - return SelectPostLoad(Node, 3, ARM64::LD3Threev16b_POST, ARM64::qsub0); + return SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); else if (VT == MVT::v4i16) - return SelectPostLoad(Node, 3, ARM64::LD3Threev4h_POST, ARM64::dsub0); + return SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); else if (VT == MVT::v8i16) - return SelectPostLoad(Node, 3, ARM64::LD3Threev8h_POST, ARM64::qsub0); + return SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectPostLoad(Node, 3, ARM64::LD3Threev2s_POST, ARM64::dsub0); + return SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectPostLoad(Node, 3, ARM64::LD3Threev4s_POST, ARM64::qsub0); + return SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectPostLoad(Node, 3, ARM64::LD1Threev1d_POST, ARM64::dsub0); + return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectPostLoad(Node, 3, ARM64::LD3Threev2d_POST, ARM64::qsub0); + return SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0); break; } - case ARM64ISD::LD4post: { + case AArch64ISD::LD4post: { if (VT == MVT::v8i8) - return SelectPostLoad(Node, 4, ARM64::LD4Fourv8b_POST, ARM64::dsub0); + return SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0); else if (VT == MVT::v16i8) - return SelectPostLoad(Node, 4, ARM64::LD4Fourv16b_POST, ARM64::qsub0); + return SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); else if (VT == MVT::v4i16) - return SelectPostLoad(Node, 4, ARM64::LD4Fourv4h_POST, ARM64::dsub0); + return SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); else if (VT == MVT::v8i16) - return SelectPostLoad(Node, 4, ARM64::LD4Fourv8h_POST, ARM64::qsub0); + return SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectPostLoad(Node, 4, ARM64::LD4Fourv2s_POST, ARM64::dsub0); + return SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectPostLoad(Node, 4, ARM64::LD4Fourv4s_POST, ARM64::qsub0); + return SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectPostLoad(Node, 4, ARM64::LD1Fourv1d_POST, ARM64::dsub0); + return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectPostLoad(Node, 4, ARM64::LD4Fourv2d_POST, ARM64::qsub0); + return SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0); break; } - case ARM64ISD::LD1x2post: { + case AArch64ISD::LD1x2post: { if (VT == MVT::v8i8) - return SelectPostLoad(Node, 2, ARM64::LD1Twov8b_POST, ARM64::dsub0); + return SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0); else if (VT == MVT::v16i8) - return SelectPostLoad(Node, 2, ARM64::LD1Twov16b_POST, ARM64::qsub0); + return SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); else if (VT == MVT::v4i16) - return SelectPostLoad(Node, 2, ARM64::LD1Twov4h_POST, ARM64::dsub0); + return SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); else if (VT == MVT::v8i16) - return SelectPostLoad(Node, 2, ARM64::LD1Twov8h_POST, ARM64::qsub0); + return SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectPostLoad(Node, 2, ARM64::LD1Twov2s_POST, ARM64::dsub0); + return SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectPostLoad(Node, 2, ARM64::LD1Twov4s_POST, ARM64::qsub0); + return SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectPostLoad(Node, 2, ARM64::LD1Twov1d_POST, ARM64::dsub0); + return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectPostLoad(Node, 2, ARM64::LD1Twov2d_POST, ARM64::qsub0); + return SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0); break; } - case ARM64ISD::LD1x3post: { + case AArch64ISD::LD1x3post: { if (VT == MVT::v8i8) - return SelectPostLoad(Node, 3, ARM64::LD1Threev8b_POST, ARM64::dsub0); + return SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0); else if (VT == MVT::v16i8) - return SelectPostLoad(Node, 3, ARM64::LD1Threev16b_POST, ARM64::qsub0); + return SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); else if (VT == MVT::v4i16) - return SelectPostLoad(Node, 3, ARM64::LD1Threev4h_POST, ARM64::dsub0); + return SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); else if (VT == MVT::v8i16) - return SelectPostLoad(Node, 3, ARM64::LD1Threev8h_POST, ARM64::qsub0); + return SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectPostLoad(Node, 3, ARM64::LD1Threev2s_POST, ARM64::dsub0); + return SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectPostLoad(Node, 3, ARM64::LD1Threev4s_POST, ARM64::qsub0); + return SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectPostLoad(Node, 3, ARM64::LD1Threev1d_POST, ARM64::dsub0); + return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectPostLoad(Node, 3, ARM64::LD1Threev2d_POST, ARM64::qsub0); + return SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0); break; } - case ARM64ISD::LD1x4post: { + case AArch64ISD::LD1x4post: { if (VT == MVT::v8i8) - return SelectPostLoad(Node, 4, ARM64::LD1Fourv8b_POST, ARM64::dsub0); + return SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0); else if (VT == MVT::v16i8) - return SelectPostLoad(Node, 4, ARM64::LD1Fourv16b_POST, ARM64::qsub0); + return SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); else if (VT == MVT::v4i16) - return SelectPostLoad(Node, 4, ARM64::LD1Fourv4h_POST, ARM64::dsub0); + return SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); else if (VT == MVT::v8i16) - return SelectPostLoad(Node, 4, ARM64::LD1Fourv8h_POST, ARM64::qsub0); + return SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectPostLoad(Node, 4, ARM64::LD1Fourv2s_POST, ARM64::dsub0); + return SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectPostLoad(Node, 4, ARM64::LD1Fourv4s_POST, ARM64::qsub0); + return SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectPostLoad(Node, 4, ARM64::LD1Fourv1d_POST, ARM64::dsub0); + return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectPostLoad(Node, 4, ARM64::LD1Fourv2d_POST, ARM64::qsub0); + return SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0); break; } - case ARM64ISD::LD1DUPpost: { + case AArch64ISD::LD1DUPpost: { if (VT == MVT::v8i8) - return SelectPostLoad(Node, 1, ARM64::LD1Rv8b_POST, ARM64::dsub0); + return SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0); else if (VT == MVT::v16i8) - return SelectPostLoad(Node, 1, ARM64::LD1Rv16b_POST, ARM64::qsub0); + return SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); else if (VT == MVT::v4i16) - return SelectPostLoad(Node, 1, ARM64::LD1Rv4h_POST, ARM64::dsub0); + return SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); else if (VT == MVT::v8i16) - return SelectPostLoad(Node, 1, ARM64::LD1Rv8h_POST, ARM64::qsub0); + return SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectPostLoad(Node, 1, ARM64::LD1Rv2s_POST, ARM64::dsub0); + return SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectPostLoad(Node, 1, ARM64::LD1Rv4s_POST, ARM64::qsub0); + return SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectPostLoad(Node, 1, ARM64::LD1Rv1d_POST, ARM64::dsub0); + return SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectPostLoad(Node, 1, ARM64::LD1Rv2d_POST, ARM64::qsub0); + return SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0); break; } - case ARM64ISD::LD2DUPpost: { + case AArch64ISD::LD2DUPpost: { if (VT == MVT::v8i8) - return SelectPostLoad(Node, 2, ARM64::LD2Rv8b_POST, ARM64::dsub0); + return SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0); else if (VT == MVT::v16i8) - return SelectPostLoad(Node, 2, ARM64::LD2Rv16b_POST, ARM64::qsub0); + return SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); else if (VT == MVT::v4i16) - return SelectPostLoad(Node, 2, ARM64::LD2Rv4h_POST, ARM64::dsub0); + return SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); else if (VT == MVT::v8i16) - return SelectPostLoad(Node, 2, ARM64::LD2Rv8h_POST, ARM64::qsub0); + return SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectPostLoad(Node, 2, ARM64::LD2Rv2s_POST, ARM64::dsub0); + return SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectPostLoad(Node, 2, ARM64::LD2Rv4s_POST, ARM64::qsub0); + return SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectPostLoad(Node, 2, ARM64::LD2Rv1d_POST, ARM64::dsub0); + return SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectPostLoad(Node, 2, ARM64::LD2Rv2d_POST, ARM64::qsub0); + return SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0); break; } - case ARM64ISD::LD3DUPpost: { + case AArch64ISD::LD3DUPpost: { if (VT == MVT::v8i8) - return SelectPostLoad(Node, 3, ARM64::LD3Rv8b_POST, ARM64::dsub0); + return SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0); else if (VT == MVT::v16i8) - return SelectPostLoad(Node, 3, ARM64::LD3Rv16b_POST, ARM64::qsub0); + return SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); else if (VT == MVT::v4i16) - return SelectPostLoad(Node, 3, ARM64::LD3Rv4h_POST, ARM64::dsub0); + return SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); else if (VT == MVT::v8i16) - return SelectPostLoad(Node, 3, ARM64::LD3Rv8h_POST, ARM64::qsub0); + return SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectPostLoad(Node, 3, ARM64::LD3Rv2s_POST, ARM64::dsub0); + return SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectPostLoad(Node, 3, ARM64::LD3Rv4s_POST, ARM64::qsub0); + return SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectPostLoad(Node, 3, ARM64::LD3Rv1d_POST, ARM64::dsub0); + return SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectPostLoad(Node, 3, ARM64::LD3Rv2d_POST, ARM64::qsub0); + return SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0); break; } - case ARM64ISD::LD4DUPpost: { + case AArch64ISD::LD4DUPpost: { if (VT == MVT::v8i8) - return SelectPostLoad(Node, 4, ARM64::LD4Rv8b_POST, ARM64::dsub0); + return SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0); else if (VT == MVT::v16i8) - return SelectPostLoad(Node, 4, ARM64::LD4Rv16b_POST, ARM64::qsub0); + return SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); else if (VT == MVT::v4i16) - return SelectPostLoad(Node, 4, ARM64::LD4Rv4h_POST, ARM64::dsub0); + return SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); else if (VT == MVT::v8i16) - return SelectPostLoad(Node, 4, ARM64::LD4Rv8h_POST, ARM64::qsub0); + return SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectPostLoad(Node, 4, ARM64::LD4Rv2s_POST, ARM64::dsub0); + return SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectPostLoad(Node, 4, ARM64::LD4Rv4s_POST, ARM64::qsub0); + return SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectPostLoad(Node, 4, ARM64::LD4Rv1d_POST, ARM64::dsub0); + return SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectPostLoad(Node, 4, ARM64::LD4Rv2d_POST, ARM64::qsub0); + return SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0); break; } - case ARM64ISD::LD1LANEpost: { + case AArch64ISD::LD1LANEpost: { if (VT == MVT::v16i8 || VT == MVT::v8i8) - return SelectPostLoadLane(Node, 1, ARM64::LD1i8_POST); + return SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); else if (VT == MVT::v8i16 || VT == MVT::v4i16) - return SelectPostLoadLane(Node, 1, ARM64::LD1i16_POST); + return SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) - return SelectPostLoadLane(Node, 1, ARM64::LD1i32_POST); + return SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST); else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) - return SelectPostLoadLane(Node, 1, ARM64::LD1i64_POST); + return SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST); break; } - case ARM64ISD::LD2LANEpost: { + case AArch64ISD::LD2LANEpost: { if (VT == MVT::v16i8 || VT == MVT::v8i8) - return SelectPostLoadLane(Node, 2, ARM64::LD2i8_POST); + return SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); else if (VT == MVT::v8i16 || VT == MVT::v4i16) - return SelectPostLoadLane(Node, 2, ARM64::LD2i16_POST); + return SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) - return SelectPostLoadLane(Node, 2, ARM64::LD2i32_POST); + return SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST); else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) - return SelectPostLoadLane(Node, 2, ARM64::LD2i64_POST); + return SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST); break; } - case ARM64ISD::LD3LANEpost: { + case AArch64ISD::LD3LANEpost: { if (VT == MVT::v16i8 || VT == MVT::v8i8) - return SelectPostLoadLane(Node, 3, ARM64::LD3i8_POST); + return SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); else if (VT == MVT::v8i16 || VT == MVT::v4i16) - return SelectPostLoadLane(Node, 3, ARM64::LD3i16_POST); + return SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) - return SelectPostLoadLane(Node, 3, ARM64::LD3i32_POST); + return SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST); else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) - return SelectPostLoadLane(Node, 3, ARM64::LD3i64_POST); + return SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST); break; } - case ARM64ISD::LD4LANEpost: { + case AArch64ISD::LD4LANEpost: { if (VT == MVT::v16i8 || VT == MVT::v8i8) - return SelectPostLoadLane(Node, 4, ARM64::LD4i8_POST); + return SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); else if (VT == MVT::v8i16 || VT == MVT::v4i16) - return SelectPostLoadLane(Node, 4, ARM64::LD4i16_POST); + return SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) - return SelectPostLoadLane(Node, 4, ARM64::LD4i32_POST); + return SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST); else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) - return SelectPostLoadLane(Node, 4, ARM64::LD4i64_POST); + return SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST); break; } - case ARM64ISD::ST2post: { + case AArch64ISD::ST2post: { VT = Node->getOperand(1).getValueType(); if (VT == MVT::v8i8) - return SelectPostStore(Node, 2, ARM64::ST2Twov8b_POST); + return SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST); else if (VT == MVT::v16i8) - return SelectPostStore(Node, 2, ARM64::ST2Twov16b_POST); + return SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); else if (VT == MVT::v4i16) - return SelectPostStore(Node, 2, ARM64::ST2Twov4h_POST); + return SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); else if (VT == MVT::v8i16) - return SelectPostStore(Node, 2, ARM64::ST2Twov8h_POST); + return SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectPostStore(Node, 2, ARM64::ST2Twov2s_POST); + return SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectPostStore(Node, 2, ARM64::ST2Twov4s_POST); + return SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectPostStore(Node, 2, ARM64::ST2Twov2d_POST); + return SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectPostStore(Node, 2, ARM64::ST1Twov1d_POST); + return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); break; } - case ARM64ISD::ST3post: { + case AArch64ISD::ST3post: { VT = Node->getOperand(1).getValueType(); if (VT == MVT::v8i8) - return SelectPostStore(Node, 3, ARM64::ST3Threev8b_POST); + return SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST); else if (VT == MVT::v16i8) - return SelectPostStore(Node, 3, ARM64::ST3Threev16b_POST); + return SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); else if (VT == MVT::v4i16) - return SelectPostStore(Node, 3, ARM64::ST3Threev4h_POST); + return SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); else if (VT == MVT::v8i16) - return SelectPostStore(Node, 3, ARM64::ST3Threev8h_POST); + return SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectPostStore(Node, 3, ARM64::ST3Threev2s_POST); + return SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectPostStore(Node, 3, ARM64::ST3Threev4s_POST); + return SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectPostStore(Node, 3, ARM64::ST3Threev2d_POST); + return SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectPostStore(Node, 3, ARM64::ST1Threev1d_POST); + return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); break; } - case ARM64ISD::ST4post: { + case AArch64ISD::ST4post: { VT = Node->getOperand(1).getValueType(); if (VT == MVT::v8i8) - return SelectPostStore(Node, 4, ARM64::ST4Fourv8b_POST); + return SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST); else if (VT == MVT::v16i8) - return SelectPostStore(Node, 4, ARM64::ST4Fourv16b_POST); + return SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); else if (VT == MVT::v4i16) - return SelectPostStore(Node, 4, ARM64::ST4Fourv4h_POST); + return SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); else if (VT == MVT::v8i16) - return SelectPostStore(Node, 4, ARM64::ST4Fourv8h_POST); + return SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectPostStore(Node, 4, ARM64::ST4Fourv2s_POST); + return SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectPostStore(Node, 4, ARM64::ST4Fourv4s_POST); + return SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectPostStore(Node, 4, ARM64::ST4Fourv2d_POST); + return SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectPostStore(Node, 4, ARM64::ST1Fourv1d_POST); + return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); break; } - case ARM64ISD::ST1x2post: { + case AArch64ISD::ST1x2post: { VT = Node->getOperand(1).getValueType(); if (VT == MVT::v8i8) - return SelectPostStore(Node, 2, ARM64::ST1Twov8b_POST); + return SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST); else if (VT == MVT::v16i8) - return SelectPostStore(Node, 2, ARM64::ST1Twov16b_POST); + return SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); else if (VT == MVT::v4i16) - return SelectPostStore(Node, 2, ARM64::ST1Twov4h_POST); + return SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); else if (VT == MVT::v8i16) - return SelectPostStore(Node, 2, ARM64::ST1Twov8h_POST); + return SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectPostStore(Node, 2, ARM64::ST1Twov2s_POST); + return SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectPostStore(Node, 2, ARM64::ST1Twov4s_POST); + return SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectPostStore(Node, 2, ARM64::ST1Twov1d_POST); + return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectPostStore(Node, 2, ARM64::ST1Twov2d_POST); + return SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST); break; } - case ARM64ISD::ST1x3post: { + case AArch64ISD::ST1x3post: { VT = Node->getOperand(1).getValueType(); if (VT == MVT::v8i8) - return SelectPostStore(Node, 3, ARM64::ST1Threev8b_POST); + return SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST); else if (VT == MVT::v16i8) - return SelectPostStore(Node, 3, ARM64::ST1Threev16b_POST); + return SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); else if (VT == MVT::v4i16) - return SelectPostStore(Node, 3, ARM64::ST1Threev4h_POST); + return SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); else if (VT == MVT::v8i16) - return SelectPostStore(Node, 3, ARM64::ST1Threev8h_POST); + return SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectPostStore(Node, 3, ARM64::ST1Threev2s_POST); + return SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectPostStore(Node, 3, ARM64::ST1Threev4s_POST); + return SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectPostStore(Node, 3, ARM64::ST1Threev1d_POST); + return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectPostStore(Node, 3, ARM64::ST1Threev2d_POST); + return SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST); break; } - case ARM64ISD::ST1x4post: { + case AArch64ISD::ST1x4post: { VT = Node->getOperand(1).getValueType(); if (VT == MVT::v8i8) - return SelectPostStore(Node, 4, ARM64::ST1Fourv8b_POST); + return SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST); else if (VT == MVT::v16i8) - return SelectPostStore(Node, 4, ARM64::ST1Fourv16b_POST); + return SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); else if (VT == MVT::v4i16) - return SelectPostStore(Node, 4, ARM64::ST1Fourv4h_POST); + return SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); else if (VT == MVT::v8i16) - return SelectPostStore(Node, 4, ARM64::ST1Fourv8h_POST); + return SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectPostStore(Node, 4, ARM64::ST1Fourv2s_POST); + return SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST); else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectPostStore(Node, 4, ARM64::ST1Fourv4s_POST); + return SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST); else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectPostStore(Node, 4, ARM64::ST1Fourv1d_POST); + return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectPostStore(Node, 4, ARM64::ST1Fourv2d_POST); + return SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST); break; } - case ARM64ISD::ST2LANEpost: { + case AArch64ISD::ST2LANEpost: { VT = Node->getOperand(1).getValueType(); if (VT == MVT::v16i8 || VT == MVT::v8i8) - return SelectPostStoreLane(Node, 2, ARM64::ST2i8_POST); + return SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); else if (VT == MVT::v8i16 || VT == MVT::v4i16) - return SelectPostStoreLane(Node, 2, ARM64::ST2i16_POST); + return SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) - return SelectPostStoreLane(Node, 2, ARM64::ST2i32_POST); + return SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST); else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) - return SelectPostStoreLane(Node, 2, ARM64::ST2i64_POST); + return SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST); break; } - case ARM64ISD::ST3LANEpost: { + case AArch64ISD::ST3LANEpost: { VT = Node->getOperand(1).getValueType(); if (VT == MVT::v16i8 || VT == MVT::v8i8) - return SelectPostStoreLane(Node, 3, ARM64::ST3i8_POST); + return SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); else if (VT == MVT::v8i16 || VT == MVT::v4i16) - return SelectPostStoreLane(Node, 3, ARM64::ST3i16_POST); + return SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) - return SelectPostStoreLane(Node, 3, ARM64::ST3i32_POST); + return SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST); else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) - return SelectPostStoreLane(Node, 3, ARM64::ST3i64_POST); + return SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST); break; } - case ARM64ISD::ST4LANEpost: { + case AArch64ISD::ST4LANEpost: { VT = Node->getOperand(1).getValueType(); if (VT == MVT::v16i8 || VT == MVT::v8i8) - return SelectPostStoreLane(Node, 4, ARM64::ST4i8_POST); + return SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); else if (VT == MVT::v8i16 || VT == MVT::v4i16) - return SelectPostStoreLane(Node, 4, ARM64::ST4i16_POST); + return SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) - return SelectPostStoreLane(Node, 4, ARM64::ST4i32_POST); + return SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST); else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) - return SelectPostStoreLane(Node, 4, ARM64::ST4i64_POST); + return SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST); break; } @@ -3022,9 +3027,9 @@ SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) { return ResNode; } -/// createARM64ISelDag - This pass converts a legalized DAG into a -/// ARM64-specific DAG, ready for instruction scheduling. -FunctionPass *llvm::createARM64ISelDag(ARM64TargetMachine &TM, - CodeGenOpt::Level OptLevel) { - return new ARM64DAGToDAGISel(TM, OptLevel); +/// createAArch64ISelDag - This pass converts a legalized DAG into a +/// AArch64-specific DAG, ready for instruction scheduling. +FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new AArch64DAGToDAGISel(TM, OptLevel); } diff --git a/lib/Target/ARM64/ARM64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp similarity index 80% rename from lib/Target/ARM64/ARM64ISelLowering.cpp rename to lib/Target/AArch64/AArch64ISelLowering.cpp index c24b7deea94a..4ddba0073398 100644 --- a/lib/Target/ARM64/ARM64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1,4 +1,4 @@ -//===-- ARM64ISelLowering.cpp - ARM64 DAG Lowering Implementation --------===// +//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===// // // The LLVM Compiler Infrastructure // @@ -7,18 +7,18 @@ // //===----------------------------------------------------------------------===// // -// This file implements the ARM64TargetLowering class. +// This file implements the AArch64TargetLowering class. // //===----------------------------------------------------------------------===// -#include "ARM64ISelLowering.h" -#include "ARM64PerfectShuffle.h" -#include "ARM64Subtarget.h" -#include "ARM64CallingConv.h" -#include "ARM64MachineFunctionInfo.h" -#include "ARM64TargetMachine.h" -#include "ARM64TargetObjectFile.h" -#include "MCTargetDesc/ARM64AddressingModes.h" +#include "AArch64ISelLowering.h" +#include "AArch64PerfectShuffle.h" +#include "AArch64Subtarget.h" +#include "AArch64CallingConv.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64TargetMachine.h" +#include "AArch64TargetObjectFile.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -34,7 +34,7 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; -#define DEBUG_TYPE "arm64-lower" +#define DEBUG_TYPE "aarch64-lower" STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumShiftInserts, "Number of vector shift inserts"); @@ -48,38 +48,38 @@ static cl::opt Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(NoStrictAlign), cl::values( - clEnumValN(StrictAlign, "arm64-strict-align", + clEnumValN(StrictAlign, "aarch64-strict-align", "Disallow all unaligned memory accesses"), - clEnumValN(NoStrictAlign, "arm64-no-strict-align", + clEnumValN(NoStrictAlign, "aarch64-no-strict-align", "Allow unaligned memory accesses"), clEnumValEnd)); // Place holder until extr generation is tested fully. static cl::opt -EnableARM64ExtrGeneration("arm64-extr-generation", cl::Hidden, - cl::desc("Allow ARM64 (or (shift)(shift))->extract"), +EnableAArch64ExtrGeneration("aarch64-extr-generation", cl::Hidden, + cl::desc("Allow AArch64 (or (shift)(shift))->extract"), cl::init(true)); static cl::opt -EnableARM64SlrGeneration("arm64-shift-insert-generation", cl::Hidden, - cl::desc("Allow ARM64 SLI/SRI formation"), +EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden, + cl::desc("Allow AArch64 SLI/SRI formation"), cl::init(false)); //===----------------------------------------------------------------------===// -// ARM64 Lowering public interface. +// AArch64 Lowering public interface. //===----------------------------------------------------------------------===// static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { - if (TM.getSubtarget().isTargetDarwin()) - return new ARM64_MachoTargetObjectFile(); + if (TM.getSubtarget().isTargetDarwin()) + return new AArch64_MachoTargetObjectFile(); - return new ARM64_ELFTargetObjectFile(); + return new AArch64_ELFTargetObjectFile(); } -ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM) +AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) : TargetLowering(TM, createTLOF(TM)) { - Subtarget = &TM.getSubtarget(); + Subtarget = &TM.getSubtarget(); - // ARM64 doesn't have comparisons which set GPRs or setcc instructions, so + // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so // we have to make something up. Arbitrarily, choose ZeroOrOne. setBooleanContents(ZeroOrOneBooleanContent); // When comparing vectors the result sets the different elements in the @@ -87,19 +87,19 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM) setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // Set up the register classes. - addRegisterClass(MVT::i32, &ARM64::GPR32allRegClass); - addRegisterClass(MVT::i64, &ARM64::GPR64allRegClass); + addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass); + addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass); if (Subtarget->hasFPARMv8()) { - addRegisterClass(MVT::f16, &ARM64::FPR16RegClass); - addRegisterClass(MVT::f32, &ARM64::FPR32RegClass); - addRegisterClass(MVT::f64, &ARM64::FPR64RegClass); - addRegisterClass(MVT::f128, &ARM64::FPR128RegClass); + addRegisterClass(MVT::f16, &AArch64::FPR16RegClass); + addRegisterClass(MVT::f32, &AArch64::FPR32RegClass); + addRegisterClass(MVT::f64, &AArch64::FPR64RegClass); + addRegisterClass(MVT::f128, &AArch64::FPR128RegClass); } if (Subtarget->hasNEON()) { - addRegisterClass(MVT::v16i8, &ARM64::FPR8RegClass); - addRegisterClass(MVT::v8i16, &ARM64::FPR16RegClass); + addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass); + addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass); // Someone set us up the NEON. addDRTypeForNEON(MVT::v2f32); addDRTypeForNEON(MVT::v8i8); @@ -209,8 +209,8 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM) // Exception handling. // FIXME: These are guesses. Has this been defined yet? - setExceptionPointerRegister(ARM64::X0); - setExceptionSelectorRegister(ARM64::X1); + setExceptionPointerRegister(AArch64::X0); + setExceptionSelectorRegister(AArch64::X1); // Constant pool entries setOperationAction(ISD::ConstantPool, MVT::i64, Custom); @@ -228,17 +228,17 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM) setOperationAction(ISD::SUBC, MVT::i64, Custom); setOperationAction(ISD::SUBE, MVT::i64, Custom); - // ARM64 lacks both left-rotate and popcount instructions. + // AArch64 lacks both left-rotate and popcount instructions. setOperationAction(ISD::ROTL, MVT::i32, Expand); setOperationAction(ISD::ROTL, MVT::i64, Expand); - // ARM64 doesn't have {U|S}MUL_LOHI. + // AArch64 doesn't have {U|S}MUL_LOHI. setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); // Expand the undefined-at-zero variants to cttz/ctlz to their defined-at-zero - // counterparts, which ARM64 supports directly. + // counterparts, which AArch64 supports directly. setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); @@ -279,7 +279,7 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM) setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); - // ARM64 has implementations of a lot of rounding-like FP operations. + // AArch64 has implementations of a lot of rounding-like FP operations. static MVT RoundingTypes[] = { MVT::f32, MVT::f64}; for (unsigned I = 0; I < array_lengthof(RoundingTypes); ++I) { MVT Ty = RoundingTypes[I]; @@ -304,8 +304,8 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM) setOperationAction(ISD::FSINCOS, MVT::f32, Expand); } - // ARM64 does not have floating-point extending loads, i1 sign-extending load, - // floating-point truncating stores, or v2i32->v2i16 truncating store. + // AArch64 does not have floating-point extending loads, i1 sign-extending + // load, floating-point truncating stores, or v2i32->v2i16 truncating store. setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f80, Expand); @@ -371,7 +371,7 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM) MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4; MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4; - setStackPointerRegisterToSaveRestore(ARM64::SP); + setStackPointerRegisterToSaveRestore(AArch64::SP); setSchedulingPreference(Sched::Hybrid); @@ -421,7 +421,7 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM) setOperationAction(ISD::MUL, MVT::v1i64, Expand); - // ARM64 doesn't have a direct vector ->f32 conversion instructions for + // AArch64 doesn't have a direct vector ->f32 conversion instructions for // elements smaller than i32, so promote the input to i32 first. setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Promote); setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Promote); @@ -433,7 +433,7 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM) setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom); - // ARM64 doesn't have MUL.2d: + // AArch64 doesn't have MUL.2d: setOperationAction(ISD::MUL, MVT::v2i64, Expand); setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal); setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand); @@ -461,7 +461,7 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM) setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); } - // ARM64 has implementations of a lot of rounding-like FP operations. + // AArch64 has implementations of a lot of rounding-like FP operations. static MVT RoundingVecTypes[] = {MVT::v2f32, MVT::v4f32, MVT::v2f64 }; for (unsigned I = 0; I < array_lengthof(RoundingVecTypes); ++I) { MVT Ty = RoundingVecTypes[I]; @@ -475,7 +475,7 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM) } } -void ARM64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) { +void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) { if (VT == MVT::v2f32) { setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), MVT::v2i32); @@ -543,17 +543,17 @@ void ARM64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) { } } -void ARM64TargetLowering::addDRTypeForNEON(MVT VT) { - addRegisterClass(VT, &ARM64::FPR64RegClass); +void AArch64TargetLowering::addDRTypeForNEON(MVT VT) { + addRegisterClass(VT, &AArch64::FPR64RegClass); addTypeForNEON(VT, MVT::v2i32); } -void ARM64TargetLowering::addQRTypeForNEON(MVT VT) { - addRegisterClass(VT, &ARM64::FPR128RegClass); +void AArch64TargetLowering::addQRTypeForNEON(MVT VT) { + addRegisterClass(VT, &AArch64::FPR128RegClass); addTypeForNEON(VT, MVT::v4i32); } -EVT ARM64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { +EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { if (!VT.isVector()) return MVT::i32; return VT.changeVectorElementTypeToInteger(); @@ -562,13 +562,13 @@ EVT ARM64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { /// computeKnownBitsForTargetNode - Determine which of the bits specified in /// Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. -void ARM64TargetLowering::computeKnownBitsForTargetNode( +void AArch64TargetLowering::computeKnownBitsForTargetNode( const SDValue Op, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { switch (Op.getOpcode()) { default: break; - case ARM64ISD::CSEL: { + case AArch64ISD::CSEL: { APInt KnownZero2, KnownOne2; DAG.computeKnownBits(Op->getOperand(0), KnownZero, KnownOne, Depth + 1); DAG.computeKnownBits(Op->getOperand(1), KnownZero2, KnownOne2, Depth + 1); @@ -581,8 +581,8 @@ void ARM64TargetLowering::computeKnownBitsForTargetNode( Intrinsic::ID IntID = static_cast(CN->getZExtValue()); switch (IntID) { default: return; - case Intrinsic::arm64_ldaxr: - case Intrinsic::arm64_ldxr: { + case Intrinsic::aarch64_ldaxr: + case Intrinsic::aarch64_ldxr: { unsigned BitWidth = KnownOne.getBitWidth(); EVT VT = cast(Op)->getMemoryVT(); unsigned MemBits = VT.getScalarType().getSizeInBits(); @@ -598,8 +598,8 @@ void ARM64TargetLowering::computeKnownBitsForTargetNode( switch (IntNo) { default: break; - case Intrinsic::arm64_neon_umaxv: - case Intrinsic::arm64_neon_uminv: { + case Intrinsic::aarch64_neon_umaxv: + case Intrinsic::aarch64_neon_uminv: { // Figure out the datatype of the vector operand. The UMINV instruction // will zero extend the result, so we can mark as known zero all the // bits larger than the element datatype. 32-bit or larget doesn't need @@ -622,142 +622,142 @@ void ARM64TargetLowering::computeKnownBitsForTargetNode( } } -MVT ARM64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const { +MVT AArch64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i64; } -unsigned ARM64TargetLowering::getMaximalGlobalOffset() const { - // FIXME: On ARM64, this depends on the type. +unsigned AArch64TargetLowering::getMaximalGlobalOffset() const { + // FIXME: On AArch64, this depends on the type. // Basically, the addressable offsets are o to 4095 * Ty.getSizeInBytes(). // and the offset has to be a multiple of the related size in bytes. return 4095; } FastISel * -ARM64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, - const TargetLibraryInfo *libInfo) const { - return ARM64::createFastISel(funcInfo, libInfo); +AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) const { + return AArch64::createFastISel(funcInfo, libInfo); } -const char *ARM64TargetLowering::getTargetNodeName(unsigned Opcode) const { +const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { default: return nullptr; - case ARM64ISD::CALL: return "ARM64ISD::CALL"; - case ARM64ISD::ADRP: return "ARM64ISD::ADRP"; - case ARM64ISD::ADDlow: return "ARM64ISD::ADDlow"; - case ARM64ISD::LOADgot: return "ARM64ISD::LOADgot"; - case ARM64ISD::RET_FLAG: return "ARM64ISD::RET_FLAG"; - case ARM64ISD::BRCOND: return "ARM64ISD::BRCOND"; - case ARM64ISD::CSEL: return "ARM64ISD::CSEL"; - case ARM64ISD::FCSEL: return "ARM64ISD::FCSEL"; - case ARM64ISD::CSINV: return "ARM64ISD::CSINV"; - case ARM64ISD::CSNEG: return "ARM64ISD::CSNEG"; - case ARM64ISD::CSINC: return "ARM64ISD::CSINC"; - case ARM64ISD::THREAD_POINTER: return "ARM64ISD::THREAD_POINTER"; - case ARM64ISD::TLSDESC_CALL: return "ARM64ISD::TLSDESC_CALL"; - case ARM64ISD::ADC: return "ARM64ISD::ADC"; - case ARM64ISD::SBC: return "ARM64ISD::SBC"; - case ARM64ISD::ADDS: return "ARM64ISD::ADDS"; - case ARM64ISD::SUBS: return "ARM64ISD::SUBS"; - case ARM64ISD::ADCS: return "ARM64ISD::ADCS"; - case ARM64ISD::SBCS: return "ARM64ISD::SBCS"; - case ARM64ISD::ANDS: return "ARM64ISD::ANDS"; - case ARM64ISD::FCMP: return "ARM64ISD::FCMP"; - case ARM64ISD::FMIN: return "ARM64ISD::FMIN"; - case ARM64ISD::FMAX: return "ARM64ISD::FMAX"; - case ARM64ISD::DUP: return "ARM64ISD::DUP"; - case ARM64ISD::DUPLANE8: return "ARM64ISD::DUPLANE8"; - case ARM64ISD::DUPLANE16: return "ARM64ISD::DUPLANE16"; - case ARM64ISD::DUPLANE32: return "ARM64ISD::DUPLANE32"; - case ARM64ISD::DUPLANE64: return "ARM64ISD::DUPLANE64"; - case ARM64ISD::MOVI: return "ARM64ISD::MOVI"; - case ARM64ISD::MOVIshift: return "ARM64ISD::MOVIshift"; - case ARM64ISD::MOVIedit: return "ARM64ISD::MOVIedit"; - case ARM64ISD::MOVImsl: return "ARM64ISD::MOVImsl"; - case ARM64ISD::FMOV: return "ARM64ISD::FMOV"; - case ARM64ISD::MVNIshift: return "ARM64ISD::MVNIshift"; - case ARM64ISD::MVNImsl: return "ARM64ISD::MVNImsl"; - case ARM64ISD::BICi: return "ARM64ISD::BICi"; - case ARM64ISD::ORRi: return "ARM64ISD::ORRi"; - case ARM64ISD::BSL: return "ARM64ISD::BSL"; - case ARM64ISD::NEG: return "ARM64ISD::NEG"; - case ARM64ISD::EXTR: return "ARM64ISD::EXTR"; - case ARM64ISD::ZIP1: return "ARM64ISD::ZIP1"; - case ARM64ISD::ZIP2: return "ARM64ISD::ZIP2"; - case ARM64ISD::UZP1: return "ARM64ISD::UZP1"; - case ARM64ISD::UZP2: return "ARM64ISD::UZP2"; - case ARM64ISD::TRN1: return "ARM64ISD::TRN1"; - case ARM64ISD::TRN2: return "ARM64ISD::TRN2"; - case ARM64ISD::REV16: return "ARM64ISD::REV16"; - case ARM64ISD::REV32: return "ARM64ISD::REV32"; - case ARM64ISD::REV64: return "ARM64ISD::REV64"; - case ARM64ISD::EXT: return "ARM64ISD::EXT"; - case ARM64ISD::VSHL: return "ARM64ISD::VSHL"; - case ARM64ISD::VLSHR: return "ARM64ISD::VLSHR"; - case ARM64ISD::VASHR: return "ARM64ISD::VASHR"; - case ARM64ISD::CMEQ: return "ARM64ISD::CMEQ"; - case ARM64ISD::CMGE: return "ARM64ISD::CMGE"; - case ARM64ISD::CMGT: return "ARM64ISD::CMGT"; - case ARM64ISD::CMHI: return "ARM64ISD::CMHI"; - case ARM64ISD::CMHS: return "ARM64ISD::CMHS"; - case ARM64ISD::FCMEQ: return "ARM64ISD::FCMEQ"; - case ARM64ISD::FCMGE: return "ARM64ISD::FCMGE"; - case ARM64ISD::FCMGT: return "ARM64ISD::FCMGT"; - case ARM64ISD::CMEQz: return "ARM64ISD::CMEQz"; - case ARM64ISD::CMGEz: return "ARM64ISD::CMGEz"; - case ARM64ISD::CMGTz: return "ARM64ISD::CMGTz"; - case ARM64ISD::CMLEz: return "ARM64ISD::CMLEz"; - case ARM64ISD::CMLTz: return "ARM64ISD::CMLTz"; - case ARM64ISD::FCMEQz: return "ARM64ISD::FCMEQz"; - case ARM64ISD::FCMGEz: return "ARM64ISD::FCMGEz"; - case ARM64ISD::FCMGTz: return "ARM64ISD::FCMGTz"; - case ARM64ISD::FCMLEz: return "ARM64ISD::FCMLEz"; - case ARM64ISD::FCMLTz: return "ARM64ISD::FCMLTz"; - case ARM64ISD::NOT: return "ARM64ISD::NOT"; - case ARM64ISD::BIT: return "ARM64ISD::BIT"; - case ARM64ISD::CBZ: return "ARM64ISD::CBZ"; - case ARM64ISD::CBNZ: return "ARM64ISD::CBNZ"; - case ARM64ISD::TBZ: return "ARM64ISD::TBZ"; - case ARM64ISD::TBNZ: return "ARM64ISD::TBNZ"; - case ARM64ISD::TC_RETURN: return "ARM64ISD::TC_RETURN"; - case ARM64ISD::SITOF: return "ARM64ISD::SITOF"; - case ARM64ISD::UITOF: return "ARM64ISD::UITOF"; - case ARM64ISD::SQSHL_I: return "ARM64ISD::SQSHL_I"; - case ARM64ISD::UQSHL_I: return "ARM64ISD::UQSHL_I"; - case ARM64ISD::SRSHR_I: return "ARM64ISD::SRSHR_I"; - case ARM64ISD::URSHR_I: return "ARM64ISD::URSHR_I"; - case ARM64ISD::SQSHLU_I: return "ARM64ISD::SQSHLU_I"; - case ARM64ISD::WrapperLarge: return "ARM64ISD::WrapperLarge"; - case ARM64ISD::LD2post: return "ARM64ISD::LD2post"; - case ARM64ISD::LD3post: return "ARM64ISD::LD3post"; - case ARM64ISD::LD4post: return "ARM64ISD::LD4post"; - case ARM64ISD::ST2post: return "ARM64ISD::ST2post"; - case ARM64ISD::ST3post: return "ARM64ISD::ST3post"; - case ARM64ISD::ST4post: return "ARM64ISD::ST4post"; - case ARM64ISD::LD1x2post: return "ARM64ISD::LD1x2post"; - case ARM64ISD::LD1x3post: return "ARM64ISD::LD1x3post"; - case ARM64ISD::LD1x4post: return "ARM64ISD::LD1x4post"; - case ARM64ISD::ST1x2post: return "ARM64ISD::ST1x2post"; - case ARM64ISD::ST1x3post: return "ARM64ISD::ST1x3post"; - case ARM64ISD::ST1x4post: return "ARM64ISD::ST1x4post"; - case ARM64ISD::LD1DUPpost: return "ARM64ISD::LD1DUPpost"; - case ARM64ISD::LD2DUPpost: return "ARM64ISD::LD2DUPpost"; - case ARM64ISD::LD3DUPpost: return "ARM64ISD::LD3DUPpost"; - case ARM64ISD::LD4DUPpost: return "ARM64ISD::LD4DUPpost"; - case ARM64ISD::LD1LANEpost: return "ARM64ISD::LD1LANEpost"; - case ARM64ISD::LD2LANEpost: return "ARM64ISD::LD2LANEpost"; - case ARM64ISD::LD3LANEpost: return "ARM64ISD::LD3LANEpost"; - case ARM64ISD::LD4LANEpost: return "ARM64ISD::LD4LANEpost"; - case ARM64ISD::ST2LANEpost: return "ARM64ISD::ST2LANEpost"; - case ARM64ISD::ST3LANEpost: return "ARM64ISD::ST3LANEpost"; - case ARM64ISD::ST4LANEpost: return "ARM64ISD::ST4LANEpost"; + case AArch64ISD::CALL: return "AArch64ISD::CALL"; + case AArch64ISD::ADRP: return "AArch64ISD::ADRP"; + case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow"; + case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot"; + case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG"; + case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND"; + case AArch64ISD::CSEL: return "AArch64ISD::CSEL"; + case AArch64ISD::FCSEL: return "AArch64ISD::FCSEL"; + case AArch64ISD::CSINV: return "AArch64ISD::CSINV"; + case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG"; + case AArch64ISD::CSINC: return "AArch64ISD::CSINC"; + case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER"; + case AArch64ISD::TLSDESC_CALL: return "AArch64ISD::TLSDESC_CALL"; + case AArch64ISD::ADC: return "AArch64ISD::ADC"; + case AArch64ISD::SBC: return "AArch64ISD::SBC"; + case AArch64ISD::ADDS: return "AArch64ISD::ADDS"; + case AArch64ISD::SUBS: return "AArch64ISD::SUBS"; + case AArch64ISD::ADCS: return "AArch64ISD::ADCS"; + case AArch64ISD::SBCS: return "AArch64ISD::SBCS"; + case AArch64ISD::ANDS: return "AArch64ISD::ANDS"; + case AArch64ISD::FCMP: return "AArch64ISD::FCMP"; + case AArch64ISD::FMIN: return "AArch64ISD::FMIN"; + case AArch64ISD::FMAX: return "AArch64ISD::FMAX"; + case AArch64ISD::DUP: return "AArch64ISD::DUP"; + case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8"; + case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16"; + case AArch64ISD::DUPLANE32: return "AArch64ISD::DUPLANE32"; + case AArch64ISD::DUPLANE64: return "AArch64ISD::DUPLANE64"; + case AArch64ISD::MOVI: return "AArch64ISD::MOVI"; + case AArch64ISD::MOVIshift: return "AArch64ISD::MOVIshift"; + case AArch64ISD::MOVIedit: return "AArch64ISD::MOVIedit"; + case AArch64ISD::MOVImsl: return "AArch64ISD::MOVImsl"; + case AArch64ISD::FMOV: return "AArch64ISD::FMOV"; + case AArch64ISD::MVNIshift: return "AArch64ISD::MVNIshift"; + case AArch64ISD::MVNImsl: return "AArch64ISD::MVNImsl"; + case AArch64ISD::BICi: return "AArch64ISD::BICi"; + case AArch64ISD::ORRi: return "AArch64ISD::ORRi"; + case AArch64ISD::BSL: return "AArch64ISD::BSL"; + case AArch64ISD::NEG: return "AArch64ISD::NEG"; + case AArch64ISD::EXTR: return "AArch64ISD::EXTR"; + case AArch64ISD::ZIP1: return "AArch64ISD::ZIP1"; + case AArch64ISD::ZIP2: return "AArch64ISD::ZIP2"; + case AArch64ISD::UZP1: return "AArch64ISD::UZP1"; + case AArch64ISD::UZP2: return "AArch64ISD::UZP2"; + case AArch64ISD::TRN1: return "AArch64ISD::TRN1"; + case AArch64ISD::TRN2: return "AArch64ISD::TRN2"; + case AArch64ISD::REV16: return "AArch64ISD::REV16"; + case AArch64ISD::REV32: return "AArch64ISD::REV32"; + case AArch64ISD::REV64: return "AArch64ISD::REV64"; + case AArch64ISD::EXT: return "AArch64ISD::EXT"; + case AArch64ISD::VSHL: return "AArch64ISD::VSHL"; + case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR"; + case AArch64ISD::VASHR: return "AArch64ISD::VASHR"; + case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ"; + case AArch64ISD::CMGE: return "AArch64ISD::CMGE"; + case AArch64ISD::CMGT: return "AArch64ISD::CMGT"; + case AArch64ISD::CMHI: return "AArch64ISD::CMHI"; + case AArch64ISD::CMHS: return "AArch64ISD::CMHS"; + case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ"; + case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE"; + case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT"; + case AArch64ISD::CMEQz: return "AArch64ISD::CMEQz"; + case AArch64ISD::CMGEz: return "AArch64ISD::CMGEz"; + case AArch64ISD::CMGTz: return "AArch64ISD::CMGTz"; + case AArch64ISD::CMLEz: return "AArch64ISD::CMLEz"; + case AArch64ISD::CMLTz: return "AArch64ISD::CMLTz"; + case AArch64ISD::FCMEQz: return "AArch64ISD::FCMEQz"; + case AArch64ISD::FCMGEz: return "AArch64ISD::FCMGEz"; + case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz"; + case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz"; + case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz"; + case AArch64ISD::NOT: return "AArch64ISD::NOT"; + case AArch64ISD::BIT: return "AArch64ISD::BIT"; + case AArch64ISD::CBZ: return "AArch64ISD::CBZ"; + case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ"; + case AArch64ISD::TBZ: return "AArch64ISD::TBZ"; + case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ"; + case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN"; + case AArch64ISD::SITOF: return "AArch64ISD::SITOF"; + case AArch64ISD::UITOF: return "AArch64ISD::UITOF"; + case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I"; + case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I"; + case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I"; + case AArch64ISD::URSHR_I: return "AArch64ISD::URSHR_I"; + case AArch64ISD::SQSHLU_I: return "AArch64ISD::SQSHLU_I"; + case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge"; + case AArch64ISD::LD2post: return "AArch64ISD::LD2post"; + case AArch64ISD::LD3post: return "AArch64ISD::LD3post"; + case AArch64ISD::LD4post: return "AArch64ISD::LD4post"; + case AArch64ISD::ST2post: return "AArch64ISD::ST2post"; + case AArch64ISD::ST3post: return "AArch64ISD::ST3post"; + case AArch64ISD::ST4post: return "AArch64ISD::ST4post"; + case AArch64ISD::LD1x2post: return "AArch64ISD::LD1x2post"; + case AArch64ISD::LD1x3post: return "AArch64ISD::LD1x3post"; + case AArch64ISD::LD1x4post: return "AArch64ISD::LD1x4post"; + case AArch64ISD::ST1x2post: return "AArch64ISD::ST1x2post"; + case AArch64ISD::ST1x3post: return "AArch64ISD::ST1x3post"; + case AArch64ISD::ST1x4post: return "AArch64ISD::ST1x4post"; + case AArch64ISD::LD1DUPpost: return "AArch64ISD::LD1DUPpost"; + case AArch64ISD::LD2DUPpost: return "AArch64ISD::LD2DUPpost"; + case AArch64ISD::LD3DUPpost: return "AArch64ISD::LD3DUPpost"; + case AArch64ISD::LD4DUPpost: return "AArch64ISD::LD4DUPpost"; + case AArch64ISD::LD1LANEpost: return "AArch64ISD::LD1LANEpost"; + case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost"; + case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost"; + case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost"; + case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost"; + case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost"; + case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost"; } } MachineBasicBlock * -ARM64TargetLowering::EmitF128CSEL(MachineInstr *MI, - MachineBasicBlock *MBB) const { +AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI, + MachineBasicBlock *MBB) const { // We materialise the F128CSEL pseudo-instruction as some control flow and a // phi node: @@ -793,8 +793,8 @@ ARM64TargetLowering::EmitF128CSEL(MachineInstr *MI, MBB->end()); EndBB->transferSuccessorsAndUpdatePHIs(MBB); - BuildMI(MBB, DL, TII->get(ARM64::Bcc)).addImm(CondCode).addMBB(TrueBB); - BuildMI(MBB, DL, TII->get(ARM64::B)).addMBB(EndBB); + BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB); + BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB); MBB->addSuccessor(TrueBB); MBB->addSuccessor(EndBB); @@ -802,11 +802,11 @@ ARM64TargetLowering::EmitF128CSEL(MachineInstr *MI, TrueBB->addSuccessor(EndBB); if (!NZCVKilled) { - TrueBB->addLiveIn(ARM64::NZCV); - EndBB->addLiveIn(ARM64::NZCV); + TrueBB->addLiveIn(AArch64::NZCV); + EndBB->addLiveIn(AArch64::NZCV); } - BuildMI(*EndBB, EndBB->begin(), DL, TII->get(ARM64::PHI), DestReg) + BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg) .addReg(IfTrueReg) .addMBB(TrueBB) .addReg(IfFalseReg) @@ -817,7 +817,7 @@ ARM64TargetLowering::EmitF128CSEL(MachineInstr *MI, } MachineBasicBlock * -ARM64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, +AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { switch (MI->getOpcode()) { default: @@ -827,7 +827,7 @@ ARM64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, assert(0 && "Unexpected instruction for custom inserter!"); break; - case ARM64::F128CSEL: + case AArch64::F128CSEL: return EmitF128CSEL(MI, BB); case TargetOpcode::STACKMAP: @@ -838,120 +838,122 @@ ARM64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, } //===----------------------------------------------------------------------===// -// ARM64 Lowering private implementation. +// AArch64 Lowering private implementation. //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // Lowering Code //===----------------------------------------------------------------------===// -/// changeIntCCToARM64CC - Convert a DAG integer condition code to an ARM64 CC -static ARM64CC::CondCode changeIntCCToARM64CC(ISD::CondCode CC) { +/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64 +/// CC +static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) { switch (CC) { default: llvm_unreachable("Unknown condition code!"); case ISD::SETNE: - return ARM64CC::NE; + return AArch64CC::NE; case ISD::SETEQ: - return ARM64CC::EQ; + return AArch64CC::EQ; case ISD::SETGT: - return ARM64CC::GT; + return AArch64CC::GT; case ISD::SETGE: - return ARM64CC::GE; + return AArch64CC::GE; case ISD::SETLT: - return ARM64CC::LT; + return AArch64CC::LT; case ISD::SETLE: - return ARM64CC::LE; + return AArch64CC::LE; case ISD::SETUGT: - return ARM64CC::HI; + return AArch64CC::HI; case ISD::SETUGE: - return ARM64CC::HS; + return AArch64CC::HS; case ISD::SETULT: - return ARM64CC::LO; + return AArch64CC::LO; case ISD::SETULE: - return ARM64CC::LS; + return AArch64CC::LS; } } -/// changeFPCCToARM64CC - Convert a DAG fp condition code to an ARM64 CC. -static void changeFPCCToARM64CC(ISD::CondCode CC, ARM64CC::CondCode &CondCode, - ARM64CC::CondCode &CondCode2) { - CondCode2 = ARM64CC::AL; +/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC. +static void changeFPCCToAArch64CC(ISD::CondCode CC, + AArch64CC::CondCode &CondCode, + AArch64CC::CondCode &CondCode2) { + CondCode2 = AArch64CC::AL; switch (CC) { default: llvm_unreachable("Unknown FP condition!"); case ISD::SETEQ: case ISD::SETOEQ: - CondCode = ARM64CC::EQ; + CondCode = AArch64CC::EQ; break; case ISD::SETGT: case ISD::SETOGT: - CondCode = ARM64CC::GT; + CondCode = AArch64CC::GT; break; case ISD::SETGE: case ISD::SETOGE: - CondCode = ARM64CC::GE; + CondCode = AArch64CC::GE; break; case ISD::SETOLT: - CondCode = ARM64CC::MI; + CondCode = AArch64CC::MI; break; case ISD::SETOLE: - CondCode = ARM64CC::LS; + CondCode = AArch64CC::LS; break; case ISD::SETONE: - CondCode = ARM64CC::MI; - CondCode2 = ARM64CC::GT; + CondCode = AArch64CC::MI; + CondCode2 = AArch64CC::GT; break; case ISD::SETO: - CondCode = ARM64CC::VC; + CondCode = AArch64CC::VC; break; case ISD::SETUO: - CondCode = ARM64CC::VS; + CondCode = AArch64CC::VS; break; case ISD::SETUEQ: - CondCode = ARM64CC::EQ; - CondCode2 = ARM64CC::VS; + CondCode = AArch64CC::EQ; + CondCode2 = AArch64CC::VS; break; case ISD::SETUGT: - CondCode = ARM64CC::HI; + CondCode = AArch64CC::HI; break; case ISD::SETUGE: - CondCode = ARM64CC::PL; + CondCode = AArch64CC::PL; break; case ISD::SETLT: case ISD::SETULT: - CondCode = ARM64CC::LT; + CondCode = AArch64CC::LT; break; case ISD::SETLE: case ISD::SETULE: - CondCode = ARM64CC::LE; + CondCode = AArch64CC::LE; break; case ISD::SETNE: case ISD::SETUNE: - CondCode = ARM64CC::NE; + CondCode = AArch64CC::NE; break; } } -/// changeVectorFPCCToARM64CC - Convert a DAG fp condition code to an ARM64 CC -/// usable with the vector instructions. Fewer operations are available without -/// a real NZCV register, so we have to use less efficient combinations to get -/// the same effect. -static void changeVectorFPCCToARM64CC(ISD::CondCode CC, - ARM64CC::CondCode &CondCode, - ARM64CC::CondCode &CondCode2, - bool &Invert) { +/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 +/// CC usable with the vector instructions. Fewer operations are available +/// without a real NZCV register, so we have to use less efficient combinations +/// to get the same effect. +static void changeVectorFPCCToAArch64CC(ISD::CondCode CC, + AArch64CC::CondCode &CondCode, + AArch64CC::CondCode &CondCode2, + bool &Invert) { Invert = false; switch (CC) { default: // Mostly the scalar mappings work fine. - changeFPCCToARM64CC(CC, CondCode, CondCode2); + changeFPCCToAArch64CC(CC, CondCode, CondCode2); break; case ISD::SETUO: Invert = true; // Fallthrough case ISD::SETO: - CondCode = ARM64CC::MI; - CondCode2 = ARM64CC::GE; + CondCode = AArch64CC::MI; + CondCode2 = AArch64CC::GE; break; case ISD::SETUEQ: case ISD::SETULT: @@ -961,13 +963,13 @@ static void changeVectorFPCCToARM64CC(ISD::CondCode CC, // All of the compare-mask comparisons are ordered, but we can switch // between the two by a double inversion. E.g. ULE == !OGT. Invert = true; - changeFPCCToARM64CC(getSetCCInverse(CC, false), CondCode, CondCode2); + changeFPCCToAArch64CC(getSetCCInverse(CC, false), CondCode, CondCode2); break; } } static bool isLegalArithImmed(uint64_t C) { - // Matches ARM64DAGToDAGISel::SelectArithImmed(). + // Matches AArch64DAGToDAGISel::SelectArithImmed(). return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0); } @@ -976,13 +978,13 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, EVT VT = LHS.getValueType(); if (VT.isFloatingPoint()) - return DAG.getNode(ARM64ISD::FCMP, dl, VT, LHS, RHS); + return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS); // The CMP instruction is just an alias for SUBS, and representing it as // SUBS means that it's possible to get CSE with subtract operations. // A later phase can perform the optimization of setting the destination // register to WZR/XZR if it ends up being unused. - unsigned Opcode = ARM64ISD::SUBS; + unsigned Opcode = AArch64ISD::SUBS; if (RHS.getOpcode() == ISD::SUB && isa(RHS.getOperand(0)) && cast(RHS.getOperand(0))->getZExtValue() == 0 && @@ -997,7 +999,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, // So, finally, the only LLVM-native comparisons that don't mention C and V // are SETEQ and SETNE. They're the only ones we can safely use CMN for in // the absence of information about op2. - Opcode = ARM64ISD::ADDS; + Opcode = AArch64ISD::ADDS; RHS = RHS.getOperand(1); } else if (LHS.getOpcode() == ISD::AND && isa(RHS) && cast(RHS)->getZExtValue() == 0 && @@ -1005,7 +1007,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, // Similarly, (CMP (and X, Y), 0) can be implemented with a TST // (a.k.a. ANDS) except that the flags are only guaranteed to work for one // of the signed comparisons. - Opcode = ARM64ISD::ANDS; + Opcode = AArch64ISD::ANDS; RHS = LHS.getOperand(1); LHS = LHS.getOperand(0); } @@ -1014,8 +1016,8 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, .getValue(1); } -static SDValue getARM64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &ARM64cc, SelectionDAG &DAG, SDLoc dl) { +static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, + SDValue &AArch64cc, SelectionDAG &DAG, SDLoc dl) { if (ConstantSDNode *RHSC = dyn_cast(RHS.getNode())) { EVT VT = RHS.getValueType(); uint64_t C = RHSC->getZExtValue(); @@ -1072,13 +1074,13 @@ static SDValue getARM64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, } SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG); - ARM64CC::CondCode ARM64CC = changeIntCCToARM64CC(CC); - ARM64cc = DAG.getConstant(ARM64CC, MVT::i32); + AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC); + AArch64cc = DAG.getConstant(AArch64CC, MVT::i32); return Cmp; } static std::pair -getARM64XALUOOp(ARM64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { +getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && "Unsupported value type"); SDValue Value, Overflow; @@ -1090,25 +1092,25 @@ getARM64XALUOOp(ARM64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { default: llvm_unreachable("Unknown overflow instruction!"); case ISD::SADDO: - Opc = ARM64ISD::ADDS; - CC = ARM64CC::VS; + Opc = AArch64ISD::ADDS; + CC = AArch64CC::VS; break; case ISD::UADDO: - Opc = ARM64ISD::ADDS; - CC = ARM64CC::HS; + Opc = AArch64ISD::ADDS; + CC = AArch64CC::HS; break; case ISD::SSUBO: - Opc = ARM64ISD::SUBS; - CC = ARM64CC::VS; + Opc = AArch64ISD::SUBS; + CC = AArch64CC::VS; break; case ISD::USUBO: - Opc = ARM64ISD::SUBS; - CC = ARM64CC::LO; + Opc = AArch64ISD::SUBS; + CC = AArch64CC::LO; break; // Multiply needs a little bit extra work. case ISD::SMULO: case ISD::UMULO: { - CC = ARM64CC::NE; + CC = AArch64CC::NE; bool IsSigned = (Op.getOpcode() == ISD::SMULO) ? true : false; if (Op.getValueType() == MVT::i32) { unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; @@ -1121,7 +1123,7 @@ getARM64XALUOOp(ARM64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS); SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul, DAG.getConstant(0, MVT::i64)); - // On ARM64 the upper 32 bits are always zero extended for a 32 bit + // On AArch64 the upper 32 bits are always zero extended for a 32 bit // operation. We need to clear out the upper 32 bits, because we used a // widening multiply that wrote all 64 bits. In the end this should be a // noop. @@ -1140,19 +1142,19 @@ getARM64XALUOOp(ARM64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { // It is important that LowerBits is last, otherwise the arithmetic // shift will not be folded into the compare (SUBS). SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32); - Overflow = DAG.getNode(ARM64ISD::SUBS, DL, VTs, UpperBits, LowerBits) + Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits) .getValue(1); } else { // The overflow check for unsigned multiply is easy. We only need to // check if any of the upper 32 bits are set. This can be done with a // CMP (shifted register). For that we need to generate the following // pattern: - // (i64 ARM64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32) + // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32) SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, DAG.getConstant(32, MVT::i64)); SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); Overflow = - DAG.getNode(ARM64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64), + DAG.getNode(AArch64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64), UpperBits).getValue(1); } break; @@ -1167,13 +1169,13 @@ getARM64XALUOOp(ARM64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { // It is important that LowerBits is last, otherwise the arithmetic // shift will not be folded into the compare (SUBS). SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); - Overflow = DAG.getNode(ARM64ISD::SUBS, DL, VTs, UpperBits, LowerBits) + Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits) .getValue(1); } else { SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS); SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); Overflow = - DAG.getNode(ARM64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64), + DAG.getNode(AArch64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64), UpperBits).getValue(1); } break; @@ -1183,15 +1185,15 @@ getARM64XALUOOp(ARM64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { if (Opc) { SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32); - // Emit the ARM64 operation with overflow check. + // Emit the AArch64 operation with overflow check. Value = DAG.getNode(Opc, DL, VTs, LHS, RHS); Overflow = Value.getValue(1); } return std::make_pair(Value, Overflow); } -SDValue ARM64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG, - RTLIB::Libcall Call) const { +SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG, + RTLIB::Libcall Call) const { SmallVector Ops; for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) Ops.push_back(Op.getOperand(i)); @@ -1246,13 +1248,13 @@ static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) { // If the constants line up, perform the transform! if (CTVal->isNullValue() && CFVal->isAllOnesValue()) { SDValue CCVal; - SDValue Cmp = getARM64Cmp(LHS, RHS, CC, CCVal, DAG, dl); + SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl); FVal = Other; TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other, DAG.getConstant(-1ULL, Other.getValueType())); - return DAG.getNode(ARM64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal, + return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal, CCVal, Cmp); } @@ -1274,17 +1276,17 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { default: assert(0 && "Invalid code"); case ISD::ADDC: - Opc = ARM64ISD::ADDS; + Opc = AArch64ISD::ADDS; break; case ISD::SUBC: - Opc = ARM64ISD::SUBS; + Opc = AArch64ISD::SUBS; break; case ISD::ADDE: - Opc = ARM64ISD::ADCS; + Opc = AArch64ISD::ADCS; ExtraOp = true; break; case ISD::SUBE: - Opc = ARM64ISD::SBCS; + Opc = AArch64ISD::SBCS; ExtraOp = true; break; } @@ -1300,10 +1302,10 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) return SDValue(); - ARM64CC::CondCode CC; + AArch64CC::CondCode CC; // The actual operation that sets the overflow or carry flag. SDValue Value, Overflow; - std::tie(Value, Overflow) = getARM64XALUOOp(CC, Op, DAG); + std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG); // We use 0 and 1 as false and true values. SDValue TVal = DAG.getConstant(1, MVT::i32); @@ -1313,8 +1315,8 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { // too. This will allow it to be selected to a single instruction: // CSINC Wd, WZR, WZR, invert(cond). SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), MVT::i32); - Overflow = DAG.getNode(ARM64ISD::CSEL, SDLoc(Op), MVT::i32, FVal, TVal, CCVal, - Overflow); + Overflow = DAG.getNode(AArch64ISD::CSEL, SDLoc(Op), MVT::i32, FVal, TVal, + CCVal, Overflow); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow); @@ -1347,12 +1349,12 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) { unsigned PrfOp = (IsWrite << 4) | // Load/Store bit (Locality << 1) | // Cache level bits (unsigned)IsStream; // Stream bit - return DAG.getNode(ARM64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0), + return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0), DAG.getConstant(PrfOp, MVT::i32), Op.getOperand(1)); } -SDValue ARM64TargetLowering::LowerFP_EXTEND(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, + SelectionDAG &DAG) const { assert(Op.getValueType() == MVT::f128 && "Unexpected lowering"); RTLIB::Libcall LC; @@ -1361,8 +1363,8 @@ SDValue ARM64TargetLowering::LowerFP_EXTEND(SDValue Op, return LowerF128Call(Op, DAG, LC); } -SDValue ARM64TargetLowering::LowerFP_ROUND(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op, + SelectionDAG &DAG) const { if (Op.getOperand(0).getValueType() != MVT::f128) { // It's legal except when f128 is involved return Op; @@ -1380,7 +1382,7 @@ SDValue ARM64TargetLowering::LowerFP_ROUND(SDValue Op, } static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { - // Warning: We maintain cost tables in ARM64TargetTransformInfo.cpp. + // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp. // Any additional optimization in this function should be recorded // in the cost tables. EVT InVT = Op.getOperand(0).getValueType(); @@ -1406,8 +1408,8 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { return SDValue(); } -SDValue ARM64TargetLowering::LowerFP_TO_INT(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, + SelectionDAG &DAG) const { if (Op.getOperand(0).getValueType().isVector()) return LowerVectorFP_TO_INT(Op, DAG); @@ -1431,7 +1433,7 @@ SDValue ARM64TargetLowering::LowerFP_TO_INT(SDValue Op, } static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { - // Warning: We maintain cost tables in ARM64TargetTransformInfo.cpp. + // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp. // Any additional optimization in this function should be recorded // in the cost tables. EVT VT = Op.getValueType(); @@ -1467,7 +1469,7 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, BuildVectorOps); } -SDValue ARM64TargetLowering::LowerINT_TO_FP(SDValue Op, +SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { if (Op.getValueType().isVector()) return LowerVectorINT_TO_FP(Op, DAG); @@ -1490,7 +1492,8 @@ SDValue ARM64TargetLowering::LowerINT_TO_FP(SDValue Op, return LowerF128Call(Op, DAG, LC); } -SDValue ARM64TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op, + SelectionDAG &DAG) const { // For iOS, we want to call an alternative entry point: __sincos_stret, // which returns the values in two S / D registers. SDLoc dl(Op); @@ -1520,8 +1523,8 @@ SDValue ARM64TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { return CallResult.first; } -SDValue ARM64TargetLowering::LowerOperation(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("unimplemented operand"); @@ -1621,7 +1624,7 @@ SDValue ARM64TargetLowering::LowerOperation(SDValue Op, } /// getFunctionAlignment - Return the Log2 alignment of this function. -unsigned ARM64TargetLowering::getFunctionAlignment(const Function *F) const { +unsigned AArch64TargetLowering::getFunctionAlignment(const Function *F) const { return 2; } @@ -1629,26 +1632,26 @@ unsigned ARM64TargetLowering::getFunctionAlignment(const Function *F) const { // Calling Convention Implementation //===----------------------------------------------------------------------===// -#include "ARM64GenCallingConv.inc" +#include "AArch64GenCallingConv.inc" /// Selects the correct CCAssignFn for a the given CallingConvention /// value. -CCAssignFn *ARM64TargetLowering::CCAssignFnForCall(CallingConv::ID CC, - bool IsVarArg) const { +CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC, + bool IsVarArg) const { switch (CC) { default: llvm_unreachable("Unsupported calling convention."); case CallingConv::WebKit_JS: - return CC_ARM64_WebKit_JS; + return CC_AArch64_WebKit_JS; case CallingConv::C: case CallingConv::Fast: if (!Subtarget->isTargetDarwin()) - return CC_ARM64_AAPCS; - return IsVarArg ? CC_ARM64_DarwinPCS_VarArg : CC_ARM64_DarwinPCS; + return CC_AArch64_AAPCS; + return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS; } } -SDValue ARM64TargetLowering::LowerFormalArguments( +SDValue AArch64TargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, SDLoc DL, SelectionDAG &DAG, SmallVectorImpl &InVals) const { @@ -1662,7 +1665,7 @@ SDValue ARM64TargetLowering::LowerFormalArguments( // At this point, Ins[].VT may already be promoted to i32. To correctly // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and - // i8 to CC_ARM64_AAPCS with i32 being ValVT and i8 being LocVT. + // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT. // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here // we use a special version of AnalyzeFormalArguments to pass in ValVT and // LocVT. @@ -1718,15 +1721,15 @@ SDValue ARM64TargetLowering::LowerFormalArguments( const TargetRegisterClass *RC; if (RegVT == MVT::i32) - RC = &ARM64::GPR32RegClass; + RC = &AArch64::GPR32RegClass; else if (RegVT == MVT::i64) - RC = &ARM64::GPR64RegClass; + RC = &AArch64::GPR64RegClass; else if (RegVT == MVT::f32) - RC = &ARM64::FPR32RegClass; + RC = &AArch64::FPR32RegClass; else if (RegVT == MVT::f64 || RegVT.is64BitVector()) - RC = &ARM64::FPR64RegClass; + RC = &AArch64::FPR64RegClass; else if (RegVT == MVT::f128 || RegVT.is128BitVector()) - RC = &ARM64::FPR128RegClass; + RC = &AArch64::FPR128RegClass; else llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); @@ -1802,7 +1805,7 @@ SDValue ARM64TargetLowering::LowerFormalArguments( saveVarArgRegisters(CCInfo, DAG, DL, Chain); } - ARM64FunctionInfo *AFI = MF.getInfo(); + AArch64FunctionInfo *AFI = MF.getInfo(); // This will point to the next argument passed via stack. unsigned StackOffset = CCInfo.getNextStackOffset(); // We currently pass all varargs at 8-byte alignment. @@ -1810,7 +1813,7 @@ SDValue ARM64TargetLowering::LowerFormalArguments( AFI->setVarArgsStackIndex(MFI->CreateFixedObject(4, StackOffset, true)); } - ARM64FunctionInfo *FuncInfo = MF.getInfo(); + AArch64FunctionInfo *FuncInfo = MF.getInfo(); unsigned StackArgSize = CCInfo.getNextStackOffset(); bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) { @@ -1834,18 +1837,18 @@ SDValue ARM64TargetLowering::LowerFormalArguments( return Chain; } -void ARM64TargetLowering::saveVarArgRegisters(CCState &CCInfo, - SelectionDAG &DAG, SDLoc DL, - SDValue &Chain) const { +void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, + SelectionDAG &DAG, SDLoc DL, + SDValue &Chain) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - ARM64FunctionInfo *FuncInfo = MF.getInfo(); + AArch64FunctionInfo *FuncInfo = MF.getInfo(); SmallVector MemOps; - static const MCPhysReg GPRArgRegs[] = { ARM64::X0, ARM64::X1, ARM64::X2, - ARM64::X3, ARM64::X4, ARM64::X5, - ARM64::X6, ARM64::X7 }; + static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2, + AArch64::X3, AArch64::X4, AArch64::X5, + AArch64::X6, AArch64::X7 }; static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs); unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs, NumGPRArgRegs); @@ -1858,7 +1861,7 @@ void ARM64TargetLowering::saveVarArgRegisters(CCState &CCInfo, SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy()); for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) { - unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &ARM64::GPR64RegClass); + unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass); SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64); SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN, @@ -1872,9 +1875,9 @@ void ARM64TargetLowering::saveVarArgRegisters(CCState &CCInfo, FuncInfo->setVarArgsGPRSize(GPRSaveSize); if (Subtarget->hasFPARMv8()) { - static const MCPhysReg FPRArgRegs[] = { ARM64::Q0, ARM64::Q1, ARM64::Q2, - ARM64::Q3, ARM64::Q4, ARM64::Q5, - ARM64::Q6, ARM64::Q7 }; + static const MCPhysReg FPRArgRegs[] = { + AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, + AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7}; static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs); unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs, NumFPRArgRegs); @@ -1887,7 +1890,7 @@ void ARM64TargetLowering::saveVarArgRegisters(CCState &CCInfo, SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy()); for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) { - unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &ARM64::FPR128RegClass); + unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass); SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128); SDValue Store = @@ -1909,13 +1912,14 @@ void ARM64TargetLowering::saveVarArgRegisters(CCState &CCInfo, /// LowerCallResult - Lower the result values of a call into the /// appropriate copies out of appropriate physical registers. -SDValue ARM64TargetLowering::LowerCallResult( +SDValue AArch64TargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, SDLoc DL, SelectionDAG &DAG, SmallVectorImpl &InVals, bool isThisReturn, SDValue ThisVal) const { - CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS - : RetCC_ARM64_AAPCS; + CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS + ? RetCC_AArch64_WebKit_JS + : RetCC_AArch64_AAPCS; // Assign locations to each value returned by this call. SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), @@ -1956,7 +1960,7 @@ SDValue ARM64TargetLowering::LowerCallResult( return Chain; } -bool ARM64TargetLowering::isEligibleForTailCallOptimization( +bool AArch64TargetLowering::isEligibleForTailCallOptimization( SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, bool isCalleeStructRet, bool isCallerStructRet, const SmallVectorImpl &Outs, @@ -2054,17 +2058,17 @@ bool ARM64TargetLowering::isEligibleForTailCallOptimization( CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg)); - const ARM64FunctionInfo *FuncInfo = MF.getInfo(); + const AArch64FunctionInfo *FuncInfo = MF.getInfo(); // If the stack arguments for this call would fit into our own save area then // the call can be made tail. return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea(); } -SDValue ARM64TargetLowering::addTokenForArgument(SDValue Chain, - SelectionDAG &DAG, - MachineFrameInfo *MFI, - int ClobberedFI) const { +SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain, + SelectionDAG &DAG, + MachineFrameInfo *MFI, + int ClobberedFI) const { SmallVector ArgChains; int64_t FirstByte = MFI->getObjectOffset(ClobberedFI); int64_t LastByte = FirstByte + MFI->getObjectSize(ClobberedFI) - 1; @@ -2094,19 +2098,20 @@ SDValue ARM64TargetLowering::addTokenForArgument(SDValue Chain, return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains); } -bool ARM64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC, - bool TailCallOpt) const { +bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC, + bool TailCallOpt) const { return CallCC == CallingConv::Fast && TailCallOpt; } -bool ARM64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const { +bool AArch64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const { return CallCC == CallingConv::Fast; } /// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain, /// and add input and output parameter nodes. -SDValue ARM64TargetLowering::LowerCall(CallLoweringInfo &CLI, - SmallVectorImpl &InVals) const { +SDValue +AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc &DL = CLI.DL; SmallVector &Outs = CLI.Outs; @@ -2122,7 +2127,7 @@ SDValue ARM64TargetLowering::LowerCall(CallLoweringInfo &CLI, bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); bool IsThisReturn = false; - ARM64FunctionInfo *FuncInfo = MF.getInfo(); + AArch64FunctionInfo *FuncInfo = MF.getInfo(); bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; bool IsSibCall = false; @@ -2166,7 +2171,7 @@ SDValue ARM64TargetLowering::LowerCall(CallLoweringInfo &CLI, } else { // At this point, Outs[].VT may already be promoted to i32. To correctly // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and - // i8 to CC_ARM64_AAPCS with i32 being ValVT and i8 being LocVT. + // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT. // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here // we use a special version of AnalyzeCallOperands to pass in ValVT and // LocVT. @@ -2234,7 +2239,7 @@ SDValue ARM64TargetLowering::LowerCall(CallLoweringInfo &CLI, Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), DL); - SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, ARM64::SP, getPointerTy()); + SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP, getPointerTy()); SmallVector, 8> RegsToPass; SmallVector MemOpChains; @@ -2367,15 +2372,15 @@ SDValue ARM64TargetLowering::LowerCall(CallLoweringInfo &CLI, Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0); else { Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, - ARM64II::MO_GOT); - Callee = DAG.getNode(ARM64ISD::LOADgot, DL, getPointerTy(), Callee); + AArch64II::MO_GOT); + Callee = DAG.getNode(AArch64ISD::LOADgot, DL, getPointerTy(), Callee); } } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { const char *Sym = S->getSymbol(); Callee = - DAG.getTargetExternalSymbol(Sym, getPointerTy(), ARM64II::MO_GOT); - Callee = DAG.getNode(ARM64ISD::LOADgot, DL, getPointerTy(), Callee); + DAG.getTargetExternalSymbol(Sym, getPointerTy(), AArch64II::MO_GOT); + Callee = DAG.getNode(AArch64ISD::LOADgot, DL, getPointerTy(), Callee); } } else if (GlobalAddressSDNode *G = dyn_cast(Callee)) { const GlobalValue *GV = G->getGlobal(); @@ -2415,7 +2420,8 @@ SDValue ARM64TargetLowering::LowerCall(CallLoweringInfo &CLI, // Add a register mask operand representing the call-preserved registers. const uint32_t *Mask; const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const ARM64RegisterInfo *ARI = static_cast(TRI); + const AArch64RegisterInfo *ARI = + static_cast(TRI); if (IsThisReturn) { // For 'this' returns, use the X0-preserving mask if applicable Mask = ARI->getThisReturnPreservedMask(CallConv); @@ -2437,10 +2443,10 @@ SDValue ARM64TargetLowering::LowerCall(CallLoweringInfo &CLI, // If we're doing a tall call, use a TC_RETURN here rather than an // actual call instruction. if (IsTailCall) - return DAG.getNode(ARM64ISD::TC_RETURN, DL, NodeTys, Ops); + return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops); // Returns a chain and a flag for retval copy to use. - Chain = DAG.getNode(ARM64ISD::CALL, DL, NodeTys, Ops); + Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops); InFlag = Chain.getValue(1); uint64_t CalleePopBytes = DoesCalleeRestoreStack(CallConv, TailCallOpt) @@ -2460,24 +2466,26 @@ SDValue ARM64TargetLowering::LowerCall(CallLoweringInfo &CLI, IsThisReturn ? OutVals[0] : SDValue()); } -bool ARM64TargetLowering::CanLowerReturn( +bool AArch64TargetLowering::CanLowerReturn( CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const { - CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS - : RetCC_ARM64_AAPCS; + CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS + ? RetCC_AArch64_WebKit_JS + : RetCC_AArch64_AAPCS; SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context); return CCInfo.CheckReturn(Outs, RetCC); } SDValue -ARM64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - SDLoc DL, SelectionDAG &DAG) const { - CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS - : RetCC_ARM64_AAPCS; +AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + SDLoc DL, SelectionDAG &DAG) const { + CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS + ? RetCC_AArch64_WebKit_JS + : RetCC_AArch64_AAPCS; SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), RVLocs, *DAG.getContext()); @@ -2513,15 +2521,15 @@ ARM64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, if (Flag.getNode()) RetOps.push_back(Flag); - return DAG.getNode(ARM64ISD::RET_FLAG, DL, MVT::Other, RetOps); + return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps); } //===----------------------------------------------------------------------===// // Other Lowering Code //===----------------------------------------------------------------------===// -SDValue ARM64TargetLowering::LowerGlobalAddress(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(); SDLoc DL(Op); const GlobalValue *GV = cast(Op)->getGlobal(); @@ -2532,31 +2540,31 @@ SDValue ARM64TargetLowering::LowerGlobalAddress(SDValue Op, "unexpected offset in global node"); // This also catched the large code model case for Darwin. - if ((OpFlags & ARM64II::MO_GOT) != 0) { + if ((OpFlags & AArch64II::MO_GOT) != 0) { SDValue GotAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into two nodes instead of using a wrapper node. - return DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, GotAddr); + return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr); } if (getTargetMachine().getCodeModel() == CodeModel::Large) { - const unsigned char MO_NC = ARM64II::MO_NC; + const unsigned char MO_NC = AArch64II::MO_NC; return DAG.getNode( - ARM64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_G3), - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_G2 | MO_NC), - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_G1 | MO_NC), - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_G0 | MO_NC)); + AArch64ISD::WrapperLarge, DL, PtrVT, + DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G3), + DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G2 | MO_NC), + DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G1 | MO_NC), + DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G0 | MO_NC)); } else { // Use ADRP/ADD or ADRP/LDR for everything else: the small model on ELF and // the only correct model on Darwin. SDValue Hi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, - OpFlags | ARM64II::MO_PAGE); - unsigned char LoFlags = OpFlags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC; + OpFlags | AArch64II::MO_PAGE); + unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC; SDValue Lo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, LoFlags); - SDValue ADRP = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, ADRP, Lo); + SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); + return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); } } @@ -2589,8 +2597,8 @@ SDValue ARM64TargetLowering::LowerGlobalAddress(SDValue Op, /// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for /// a slight efficiency gain. SDValue -ARM64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op, - SelectionDAG &DAG) const { +AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin"); SDLoc DL(Op); @@ -2598,8 +2606,8 @@ ARM64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op, const GlobalValue *GV = cast(Op)->getGlobal(); SDValue TLVPAddr = - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_TLS); - SDValue DescAddr = DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, TLVPAddr); + DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS); + SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr); // The first entry in the descriptor is a function pointer that we must call // to obtain the address of the variable. @@ -2616,17 +2624,19 @@ ARM64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op, // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be // silly). const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const ARM64RegisterInfo *ARI = static_cast(TRI); + const AArch64RegisterInfo *ARI = + static_cast(TRI); const uint32_t *Mask = ARI->getTLSCallPreservedMask(); // Finally, we can make the call. This is just a degenerate version of a - // normal ARM64 call node: x0 takes the address of the descriptor, and returns - // the address of the variable in this thread. - Chain = DAG.getCopyToReg(Chain, DL, ARM64::X0, DescAddr, SDValue()); - Chain = DAG.getNode(ARM64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue), - Chain, FuncTLVGet, DAG.getRegister(ARM64::X0, MVT::i64), - DAG.getRegisterMask(Mask), Chain.getValue(1)); - return DAG.getCopyFromReg(Chain, DL, ARM64::X0, PtrVT, Chain.getValue(1)); + // normal AArch64 call node: x0 takes the address of the descriptor, and + // returns the address of the variable in this thread. + Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue()); + Chain = + DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue), + Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64), + DAG.getRegisterMask(Mask), Chain.getValue(1)); + return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1)); } /// When accessing thread-local variables under either the general-dynamic or @@ -2651,26 +2661,27 @@ ARM64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op, /// /// FIXME: we currently produce an extra, duplicated, ADRP instruction, but this /// is harmless. -SDValue ARM64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr, - SDValue DescAddr, SDLoc DL, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr, + SDValue DescAddr, SDLoc DL, + SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(); // The function we need to call is simply the first entry in the GOT for this // descriptor, load it in preparation. - SDValue Func = DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, SymAddr); + SDValue Func = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, SymAddr); // TLS calls preserve all registers except those that absolutely must be // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be // silly). const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const ARM64RegisterInfo *ARI = static_cast(TRI); + const AArch64RegisterInfo *ARI = + static_cast(TRI); const uint32_t *Mask = ARI->getTLSCallPreservedMask(); // The function takes only one argument: the address of the descriptor itself // in X0. SDValue Glue, Chain; - Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM64::X0, DescAddr, Glue); + Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue); Glue = Chain.getValue(1); // We're now ready to populate the argument list, as with a normal call: @@ -2678,19 +2689,20 @@ SDValue ARM64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr, Ops.push_back(Chain); Ops.push_back(Func); Ops.push_back(SymAddr); - Ops.push_back(DAG.getRegister(ARM64::X0, PtrVT)); + Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT)); Ops.push_back(DAG.getRegisterMask(Mask)); Ops.push_back(Glue); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - Chain = DAG.getNode(ARM64ISD::TLSDESC_CALL, DL, NodeTys, Ops); + Chain = DAG.getNode(AArch64ISD::TLSDESC_CALL, DL, NodeTys, Ops); Glue = Chain.getValue(1); - return DAG.getCopyFromReg(Chain, DL, ARM64::X0, PtrVT, Glue); + return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue); } -SDValue ARM64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, - SelectionDAG &DAG) const { +SDValue +AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { assert(Subtarget->isTargetELF() && "This function expects an ELF target"); assert(getTargetMachine().getCodeModel() == CodeModel::Small && "ELF TLS only supported in small memory model"); @@ -2703,23 +2715,24 @@ SDValue ARM64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, SDLoc DL(Op); const GlobalValue *GV = GA->getGlobal(); - SDValue ThreadBase = DAG.getNode(ARM64ISD::THREAD_POINTER, DL, PtrVT); + SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT); if (Model == TLSModel::LocalExec) { SDValue HiVar = DAG.getTargetGlobalAddress( - GV, DL, PtrVT, 0, ARM64II::MO_TLS | ARM64II::MO_G1); + GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1); SDValue LoVar = DAG.getTargetGlobalAddress( - GV, DL, PtrVT, 0, ARM64II::MO_TLS | ARM64II::MO_G0 | ARM64II::MO_NC); + GV, DL, PtrVT, 0, + AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC); - TPOff = SDValue(DAG.getMachineNode(ARM64::MOVZXi, DL, PtrVT, HiVar, + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar, DAG.getTargetConstant(16, MVT::i32)), 0); - TPOff = SDValue(DAG.getMachineNode(ARM64::MOVKXi, DL, PtrVT, TPOff, LoVar, + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar, DAG.getTargetConstant(0, MVT::i32)), 0); } else if (Model == TLSModel::InitialExec) { - TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_TLS); - TPOff = DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, TPOff); + TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS); + TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff); } else if (Model == TLSModel::LocalDynamic) { // Local-dynamic accesses proceed in two phases. A general-dynamic TLS // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate @@ -2727,28 +2740,28 @@ SDValue ARM64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, // calculation. // These accesses will need deduplicating if there's more than one. - ARM64FunctionInfo *MFI = - DAG.getMachineFunction().getInfo(); + AArch64FunctionInfo *MFI = + DAG.getMachineFunction().getInfo(); MFI->incNumLocalDynamicTLSAccesses(); // Accesses used in this sequence go via the TLS descriptor which lives in // the GOT. Prepare an address we can use to handle this. SDValue HiDesc = DAG.getTargetExternalSymbol( - "_TLS_MODULE_BASE_", PtrVT, ARM64II::MO_TLS | ARM64II::MO_PAGE); + "_TLS_MODULE_BASE_", PtrVT, AArch64II::MO_TLS | AArch64II::MO_PAGE); SDValue LoDesc = DAG.getTargetExternalSymbol( "_TLS_MODULE_BASE_", PtrVT, - ARM64II::MO_TLS | ARM64II::MO_PAGEOFF | ARM64II::MO_NC); + AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); // First argument to the descriptor call is the address of the descriptor // itself. - SDValue DescAddr = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, HiDesc); - DescAddr = DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc); + SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc); + DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc); // The call needs a relocation too for linker relaxation. It doesn't make // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of // the address. SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, - ARM64II::MO_TLS); + AArch64II::MO_TLS); // Now we can calculate the offset from TPIDR_EL0 to this module's // thread-local area. @@ -2757,38 +2770,40 @@ SDValue ARM64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, // Now use :dtprel_whatever: operations to calculate this variable's offset // in its thread-storage area. SDValue HiVar = DAG.getTargetGlobalAddress( - GV, DL, MVT::i64, 0, ARM64II::MO_TLS | ARM64II::MO_G1); + GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_G1); SDValue LoVar = DAG.getTargetGlobalAddress( - GV, DL, MVT::i64, 0, ARM64II::MO_TLS | ARM64II::MO_G0 | ARM64II::MO_NC); + GV, DL, MVT::i64, 0, + AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC); SDValue DTPOff = - SDValue(DAG.getMachineNode(ARM64::MOVZXi, DL, PtrVT, HiVar, + SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar, DAG.getTargetConstant(16, MVT::i32)), 0); - DTPOff = SDValue(DAG.getMachineNode(ARM64::MOVKXi, DL, PtrVT, DTPOff, LoVar, - DAG.getTargetConstant(0, MVT::i32)), - 0); + DTPOff = + SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, DTPOff, LoVar, + DAG.getTargetConstant(0, MVT::i32)), + 0); TPOff = DAG.getNode(ISD::ADD, DL, PtrVT, TPOff, DTPOff); } else if (Model == TLSModel::GeneralDynamic) { // Accesses used in this sequence go via the TLS descriptor which lives in // the GOT. Prepare an address we can use to handle this. SDValue HiDesc = DAG.getTargetGlobalAddress( - GV, DL, PtrVT, 0, ARM64II::MO_TLS | ARM64II::MO_PAGE); + GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGE); SDValue LoDesc = DAG.getTargetGlobalAddress( GV, DL, PtrVT, 0, - ARM64II::MO_TLS | ARM64II::MO_PAGEOFF | ARM64II::MO_NC); + AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); // First argument to the descriptor call is the address of the descriptor // itself. - SDValue DescAddr = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, HiDesc); - DescAddr = DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc); + SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc); + DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc); // The call needs a relocation too for linker relaxation. It doesn't make // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of // the address. SDValue SymAddr = - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_TLS); + DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS); // Finally we can make a call to calculate the offset from tpidr_el0. TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG); @@ -2798,8 +2813,8 @@ SDValue ARM64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff); } -SDValue ARM64TargetLowering::LowerGlobalTLSAddress(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { if (Subtarget->isTargetDarwin()) return LowerDarwinGlobalTLSAddress(Op, DAG); else if (Subtarget->isTargetELF()) @@ -2807,7 +2822,7 @@ SDValue ARM64TargetLowering::LowerGlobalTLSAddress(SDValue Op, llvm_unreachable("Unexpected platform trying to use TLS"); } -SDValue ARM64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast(Op.getOperand(1))->get(); SDValue LHS = Op.getOperand(2); @@ -2843,15 +2858,15 @@ SDValue ARM64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { return SDValue(); // The actual operation with overflow check. - ARM64CC::CondCode OFCC; + AArch64CC::CondCode OFCC; SDValue Value, Overflow; - std::tie(Value, Overflow) = getARM64XALUOOp(OFCC, LHS.getValue(0), DAG); + std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG); if (CC == ISD::SETNE) OFCC = getInvertedCondCode(OFCC); SDValue CCVal = DAG.getConstant(OFCC, MVT::i32); - return DAG.getNode(ARM64ISD::BRCOND, SDLoc(LHS), MVT::Other, Chain, Dest, + return DAG.getNode(AArch64ISD::BRCOND, SDLoc(LHS), MVT::Other, Chain, Dest, CCVal, Overflow); } @@ -2878,11 +2893,11 @@ SDValue ARM64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { if (Test.getValueType() == MVT::i32) Test = DAG.getAnyExtOrTrunc(Test, dl, MVT::i64); - return DAG.getNode(ARM64ISD::TBZ, dl, MVT::Other, Chain, Test, + return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test, DAG.getConstant(Log2_64(Mask), MVT::i64), Dest); } - return DAG.getNode(ARM64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest); + return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest); } else if (CC == ISD::SETNE) { // See if we can use a TBZ to fold in an AND as well. // TBZ has a smaller branch displacement than CBZ. If the offset is @@ -2898,41 +2913,41 @@ SDValue ARM64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { if (Test.getValueType() == MVT::i32) Test = DAG.getAnyExtOrTrunc(Test, dl, MVT::i64); - return DAG.getNode(ARM64ISD::TBNZ, dl, MVT::Other, Chain, Test, + return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test, DAG.getConstant(Log2_64(Mask), MVT::i64), Dest); } - return DAG.getNode(ARM64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest); + return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest); } } SDValue CCVal; - SDValue Cmp = getARM64Cmp(LHS, RHS, CC, CCVal, DAG, dl); - return DAG.getNode(ARM64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, + SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl); + return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, Cmp); } assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); - // Unfortunately, the mapping of LLVM FP CC's onto ARM64 CC's isn't totally + // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally // clean. Some of them require two branches to implement. SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG); - ARM64CC::CondCode CC1, CC2; - changeFPCCToARM64CC(CC, CC1, CC2); + AArch64CC::CondCode CC1, CC2; + changeFPCCToAArch64CC(CC, CC1, CC2); SDValue CC1Val = DAG.getConstant(CC1, MVT::i32); SDValue BR1 = - DAG.getNode(ARM64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp); - if (CC2 != ARM64CC::AL) { + DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp); + if (CC2 != AArch64CC::AL) { SDValue CC2Val = DAG.getConstant(CC2, MVT::i32); - return DAG.getNode(ARM64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val, + return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val, Cmp); } return BR1; } -SDValue ARM64TargetLowering::LowerFCOPYSIGN(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op, + SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc DL(Op); @@ -2959,9 +2974,9 @@ SDValue ARM64TargetLowering::LowerFCOPYSIGN(SDValue Op, EltMask = DAG.getConstant(0x80000000ULL, EltVT); if (!VT.isVector()) { - VecVal1 = DAG.getTargetInsertSubreg(ARM64::ssub, DL, VecVT, + VecVal1 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT, DAG.getUNDEF(VecVT), In1); - VecVal2 = DAG.getTargetInsertSubreg(ARM64::ssub, DL, VecVT, + VecVal2 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT, DAG.getUNDEF(VecVT), In2); } else { VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1); @@ -2977,9 +2992,9 @@ SDValue ARM64TargetLowering::LowerFCOPYSIGN(SDValue Op, EltMask = DAG.getConstant(0, EltVT); if (!VT.isVector()) { - VecVal1 = DAG.getTargetInsertSubreg(ARM64::dsub, DL, VecVT, + VecVal1 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT, DAG.getUNDEF(VecVT), In1); - VecVal2 = DAG.getTargetInsertSubreg(ARM64::dsub, DL, VecVT, + VecVal2 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT, DAG.getUNDEF(VecVT), In2); } else { VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1); @@ -3004,17 +3019,17 @@ SDValue ARM64TargetLowering::LowerFCOPYSIGN(SDValue Op, } SDValue Sel = - DAG.getNode(ARM64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec); + DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec); if (VT == MVT::f32) - return DAG.getTargetExtractSubreg(ARM64::ssub, DL, VT, Sel); + return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel); else if (VT == MVT::f64) - return DAG.getTargetExtractSubreg(ARM64::dsub, DL, VT, Sel); + return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel); else return DAG.getNode(ISD::BITCAST, DL, VT, Sel); } -SDValue ARM64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { if (DAG.getMachineFunction().getFunction()->getAttributes().hasAttribute( AttributeSet::FunctionIndex, Attribute::NoImplicitFloat)) return SDValue(); @@ -3035,8 +3050,8 @@ SDValue ARM64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { SDValue VecVal; if (VT == MVT::i32) { VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val); - VecVal = - DAG.getTargetInsertSubreg(ARM64::ssub, DL, MVT::v8i8, ZeroVec, VecVal); + VecVal = DAG.getTargetInsertSubreg(AArch64::ssub, DL, MVT::v8i8, ZeroVec, + VecVal); } else { VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val); } @@ -3044,14 +3059,14 @@ SDValue ARM64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, VecVal); SDValue UaddLV = DAG.getNode( ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, - DAG.getConstant(Intrinsic::arm64_neon_uaddlv, MVT::i32), CtPop); + DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, MVT::i32), CtPop); if (VT == MVT::i64) UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV); return UaddLV; } -SDValue ARM64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { if (Op.getValueType().isVector()) return LowerVSETCC(Op, DAG); @@ -3082,12 +3097,12 @@ SDValue ARM64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { if (LHS.getValueType().isInteger()) { SDValue CCVal; SDValue Cmp = - getARM64Cmp(LHS, RHS, ISD::getSetCCInverse(CC, true), CCVal, DAG, dl); + getAArch64Cmp(LHS, RHS, ISD::getSetCCInverse(CC, true), CCVal, DAG, dl); // Note that we inverted the condition above, so we reverse the order of // the true and false operands here. This will allow the setcc to be // matched to a single CSINC instruction. - return DAG.getNode(ARM64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp); + return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp); } // Now we know we're dealing with FP values. @@ -3097,28 +3112,29 @@ SDValue ARM64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // and do the comparison. SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG); - ARM64CC::CondCode CC1, CC2; - changeFPCCToARM64CC(CC, CC1, CC2); - if (CC2 == ARM64CC::AL) { - changeFPCCToARM64CC(ISD::getSetCCInverse(CC, false), CC1, CC2); + AArch64CC::CondCode CC1, CC2; + changeFPCCToAArch64CC(CC, CC1, CC2); + if (CC2 == AArch64CC::AL) { + changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, false), CC1, CC2); SDValue CC1Val = DAG.getConstant(CC1, MVT::i32); // Note that we inverted the condition above, so we reverse the order of // the true and false operands here. This will allow the setcc to be // matched to a single CSINC instruction. - return DAG.getNode(ARM64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp); + return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp); } else { - // Unfortunately, the mapping of LLVM FP CC's onto ARM64 CC's isn't totally - // clean. Some of them require two CSELs to implement. As is in this case, - // we emit the first CSEL and then emit a second using the output of the - // first as the RHS. We're effectively OR'ing the two CC's together. + // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't + // totally clean. Some of them require two CSELs to implement. As is in + // this case, we emit the first CSEL and then emit a second using the output + // of the first as the RHS. We're effectively OR'ing the two CC's together. // FIXME: It would be nice if we could match the two CSELs to two CSINCs. SDValue CC1Val = DAG.getConstant(CC1, MVT::i32); - SDValue CS1 = DAG.getNode(ARM64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp); + SDValue CS1 = + DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp); SDValue CC2Val = DAG.getConstant(CC2, MVT::i32); - return DAG.getNode(ARM64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp); + return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp); } } @@ -3147,7 +3163,8 @@ static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) { return Result->getOpcode() == ISD::FP_EXTEND && Result->getOperand(0) == Cmp; } -SDValue ARM64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerSELECT(SDValue Op, + SelectionDAG &DAG) const { SDValue CC = Op->getOperand(0); SDValue TVal = Op->getOperand(1); SDValue FVal = Op->getOperand(2); @@ -3163,13 +3180,13 @@ SDValue ARM64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { if (!DAG.getTargetLoweringInfo().isTypeLegal(CC->getValueType(0))) return SDValue(); - ARM64CC::CondCode OFCC; + AArch64CC::CondCode OFCC; SDValue Value, Overflow; - std::tie(Value, Overflow) = getARM64XALUOOp(OFCC, CC.getValue(0), DAG); + std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CC.getValue(0), DAG); SDValue CCVal = DAG.getConstant(OFCC, MVT::i32); - return DAG.getNode(ARM64ISD::CSEL, DL, Op.getValueType(), TVal, FVal, CCVal, - Overflow); + return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal, + CCVal, Overflow); } if (CC.getOpcode() == ISD::SETCC) @@ -3180,8 +3197,8 @@ SDValue ARM64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { FVal, ISD::SETNE); } -SDValue ARM64TargetLowering::LowerSELECT_CC(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op, + SelectionDAG &DAG) const { ISD::CondCode CC = cast(Op.getOperand(4))->get(); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); @@ -3207,7 +3224,7 @@ SDValue ARM64TargetLowering::LowerSELECT_CC(SDValue Op, assert((LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)); - unsigned Opcode = ARM64ISD::CSEL; + unsigned Opcode = AArch64ISD::CSEL; // If both the TVal and the FVal are constants, see if we can swap them in // order to for a CSINV or CSINC out of them. @@ -3251,9 +3268,9 @@ SDValue ARM64TargetLowering::LowerSELECT_CC(SDValue Op, // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC // instead of a CSEL in that case. if (TrueVal == ~FalseVal) { - Opcode = ARM64ISD::CSINV; + Opcode = AArch64ISD::CSINV; } else if (TrueVal == -FalseVal) { - Opcode = ARM64ISD::CSNEG; + Opcode = AArch64ISD::CSNEG; } else if (TVal.getValueType() == MVT::i32) { // If our operands are only 32-bit wide, make sure we use 32-bit // arithmetic for the check whether we can use CSINC. This ensures that @@ -3264,7 +3281,7 @@ SDValue ARM64TargetLowering::LowerSELECT_CC(SDValue Op, const uint32_t FalseVal32 = CFVal->getZExtValue(); if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) { - Opcode = ARM64ISD::CSINC; + Opcode = AArch64ISD::CSINC; if (TrueVal32 > FalseVal32) { Swap = true; @@ -3272,7 +3289,7 @@ SDValue ARM64TargetLowering::LowerSELECT_CC(SDValue Op, } // 64-bit check whether we can use CSINC. } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) { - Opcode = ARM64ISD::CSINC; + Opcode = AArch64ISD::CSINC; if (TrueVal > FalseVal) { Swap = true; @@ -3286,7 +3303,7 @@ SDValue ARM64TargetLowering::LowerSELECT_CC(SDValue Op, CC = ISD::getSetCCInverse(CC, true); } - if (Opcode != ARM64ISD::CSEL) { + if (Opcode != AArch64ISD::CSEL) { // Drop FVal since we can get its value by simply inverting/negating // TVal. FVal = TVal; @@ -3294,7 +3311,7 @@ SDValue ARM64TargetLowering::LowerSELECT_CC(SDValue Op, } SDValue CCVal; - SDValue Cmp = getARM64Cmp(LHS, RHS, CC, CCVal, DAG, dl); + SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl); EVT VT = Op.getValueType(); return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp); @@ -3328,7 +3345,7 @@ SDValue ARM64TargetLowering::LowerSELECT_CC(SDValue Op, case ISD::SETUGE: case ISD::SETOGT: case ISD::SETOGE: - return DAG.getNode(ARM64ISD::FMAX, dl, VT, MinMaxLHS, MinMaxRHS); + return DAG.getNode(AArch64ISD::FMAX, dl, VT, MinMaxLHS, MinMaxRHS); break; case ISD::SETLT: case ISD::SETLE: @@ -3336,7 +3353,7 @@ SDValue ARM64TargetLowering::LowerSELECT_CC(SDValue Op, case ISD::SETULE: case ISD::SETOLT: case ISD::SETOLE: - return DAG.getNode(ARM64ISD::FMIN, dl, VT, MinMaxLHS, MinMaxRHS); + return DAG.getNode(AArch64ISD::FMIN, dl, VT, MinMaxLHS, MinMaxRHS); break; } } @@ -3346,26 +3363,26 @@ SDValue ARM64TargetLowering::LowerSELECT_CC(SDValue Op, // and do the comparison. SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG); - // Unfortunately, the mapping of LLVM FP CC's onto ARM64 CC's isn't totally + // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally // clean. Some of them require two CSELs to implement. - ARM64CC::CondCode CC1, CC2; - changeFPCCToARM64CC(CC, CC1, CC2); + AArch64CC::CondCode CC1, CC2; + changeFPCCToAArch64CC(CC, CC1, CC2); SDValue CC1Val = DAG.getConstant(CC1, MVT::i32); - SDValue CS1 = DAG.getNode(ARM64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp); + SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp); // If we need a second CSEL, emit it, using the output of the first as the // RHS. We're effectively OR'ing the two CC's together. - if (CC2 != ARM64CC::AL) { + if (CC2 != AArch64CC::AL) { SDValue CC2Val = DAG.getConstant(CC2, MVT::i32); - return DAG.getNode(ARM64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp); + return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp); } // Otherwise, return the output of the first CSEL. return CS1; } -SDValue ARM64TargetLowering::LowerJumpTable(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op, + SelectionDAG &DAG) const { // Jump table entries as PC relative offsets. No additional tweaking // is necessary here. Just get the address of the jump table. JumpTableSDNode *JT = cast(Op); @@ -3374,24 +3391,26 @@ SDValue ARM64TargetLowering::LowerJumpTable(SDValue Op, if (getTargetMachine().getCodeModel() == CodeModel::Large && !Subtarget->isTargetMachO()) { - const unsigned char MO_NC = ARM64II::MO_NC; + const unsigned char MO_NC = AArch64II::MO_NC; return DAG.getNode( - ARM64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, ARM64II::MO_G3), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, ARM64II::MO_G2 | MO_NC), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, ARM64II::MO_G1 | MO_NC), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, ARM64II::MO_G0 | MO_NC)); + AArch64ISD::WrapperLarge, DL, PtrVT, + DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G3), + DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G2 | MO_NC), + DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G1 | MO_NC), + DAG.getTargetJumpTable(JT->getIndex(), PtrVT, + AArch64II::MO_G0 | MO_NC)); } - SDValue Hi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, ARM64II::MO_PAGE); + SDValue Hi = + DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_PAGE); SDValue Lo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, - ARM64II::MO_PAGEOFF | ARM64II::MO_NC); - SDValue ADRP = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, ADRP, Lo); + AArch64II::MO_PAGEOFF | AArch64II::MO_NC); + SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); + return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); } -SDValue ARM64TargetLowering::LowerConstantPool(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op, + SelectionDAG &DAG) const { ConstantPoolSDNode *CP = cast(Op); EVT PtrVT = getPointerTy(); SDLoc DL(Op); @@ -3401,63 +3420,63 @@ SDValue ARM64TargetLowering::LowerConstantPool(SDValue Op, if (Subtarget->isTargetMachO()) { SDValue GotAddr = DAG.getTargetConstantPool( CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), - ARM64II::MO_GOT); - return DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, GotAddr); + AArch64II::MO_GOT); + return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr); } - const unsigned char MO_NC = ARM64II::MO_NC; + const unsigned char MO_NC = AArch64II::MO_NC; return DAG.getNode( - ARM64ISD::WrapperLarge, DL, PtrVT, + AArch64ISD::WrapperLarge, DL, PtrVT, DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), ARM64II::MO_G3), + CP->getOffset(), AArch64II::MO_G3), DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), ARM64II::MO_G2 | MO_NC), + CP->getOffset(), AArch64II::MO_G2 | MO_NC), DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), ARM64II::MO_G1 | MO_NC), + CP->getOffset(), AArch64II::MO_G1 | MO_NC), DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), ARM64II::MO_G0 | MO_NC)); + CP->getOffset(), AArch64II::MO_G0 | MO_NC)); } else { // Use ADRP/ADD or ADRP/LDR for everything else: the small memory model on // ELF, the only valid one on Darwin. SDValue Hi = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), ARM64II::MO_PAGE); + CP->getOffset(), AArch64II::MO_PAGE); SDValue Lo = DAG.getTargetConstantPool( CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), - ARM64II::MO_PAGEOFF | ARM64II::MO_NC); + AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - SDValue ADRP = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, ADRP, Lo); + SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); + return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); } } -SDValue ARM64TargetLowering::LowerBlockAddress(SDValue Op, +SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { const BlockAddress *BA = cast(Op)->getBlockAddress(); EVT PtrVT = getPointerTy(); SDLoc DL(Op); if (getTargetMachine().getCodeModel() == CodeModel::Large && !Subtarget->isTargetMachO()) { - const unsigned char MO_NC = ARM64II::MO_NC; + const unsigned char MO_NC = AArch64II::MO_NC; return DAG.getNode( - ARM64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_G3), - DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_G2 | MO_NC), - DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_G1 | MO_NC), - DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_G0 | MO_NC)); + AArch64ISD::WrapperLarge, DL, PtrVT, + DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G3), + DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G2 | MO_NC), + DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G1 | MO_NC), + DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G0 | MO_NC)); } else { - SDValue Hi = DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_PAGE); - SDValue Lo = DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_PAGEOFF | - ARM64II::MO_NC); - SDValue ADRP = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, ADRP, Lo); + SDValue Hi = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGE); + SDValue Lo = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGEOFF | + AArch64II::MO_NC); + SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); + return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); } } -SDValue ARM64TargetLowering::LowerDarwin_VASTART(SDValue Op, +SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const { - ARM64FunctionInfo *FuncInfo = - DAG.getMachineFunction().getInfo(); + AArch64FunctionInfo *FuncInfo = + DAG.getMachineFunction().getInfo(); SDLoc DL(Op); SDValue FR = @@ -3467,12 +3486,12 @@ SDValue ARM64TargetLowering::LowerDarwin_VASTART(SDValue Op, MachinePointerInfo(SV), false, false, 0); } -SDValue ARM64TargetLowering::LowerAAPCS_VASTART(SDValue Op, +SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const { // The layout of the va_list struct is specified in the AArch64 Procedure Call // Standard, section B.3. MachineFunction &MF = DAG.getMachineFunction(); - ARM64FunctionInfo *FuncInfo = MF.getInfo(); + AArch64FunctionInfo *FuncInfo = MF.getInfo(); SDLoc DL(Op); SDValue Chain = Op.getOperand(0); @@ -3534,12 +3553,14 @@ SDValue ARM64TargetLowering::LowerAAPCS_VASTART(SDValue Op, return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); } -SDValue ARM64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerVASTART(SDValue Op, + SelectionDAG &DAG) const { return Subtarget->isTargetDarwin() ? LowerDarwin_VASTART(Op, DAG) : LowerAAPCS_VASTART(Op, DAG); } -SDValue ARM64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op, + SelectionDAG &DAG) const { // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single // pointer. unsigned VaListSize = Subtarget->isTargetDarwin() ? 8 : 32; @@ -3552,7 +3573,7 @@ SDValue ARM64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(SrcSV)); } -SDValue ARM64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetDarwin() && "automatic va_arg instruction only works on Darwin"); @@ -3614,15 +3635,16 @@ SDValue ARM64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { false, false, 0); } -SDValue ARM64TargetLowering::LowerFRAMEADDR(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op, + SelectionDAG &DAG) const { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); SDLoc DL(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); - SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, ARM64::FP, VT); + SDValue FrameAddr = + DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT); while (Depth--) FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr, MachinePointerInfo(), false, false, false, 0); @@ -3631,18 +3653,18 @@ SDValue ARM64TargetLowering::LowerFRAMEADDR(SDValue Op, // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -unsigned ARM64TargetLowering::getRegisterByName(const char* RegName, - EVT VT) const { +unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, + EVT VT) const { unsigned Reg = StringSwitch(RegName) - .Case("sp", ARM64::SP) + .Case("sp", AArch64::SP) .Default(0); if (Reg) return Reg; report_fatal_error("Invalid register name global variable"); } -SDValue ARM64TargetLowering::LowerRETURNADDR(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op, + SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setReturnAddressIsTaken(true); @@ -3659,14 +3681,14 @@ SDValue ARM64TargetLowering::LowerRETURNADDR(SDValue Op, } // Return LR, which contains the return address. Mark it an implicit live-in. - unsigned Reg = MF.addLiveIn(ARM64::LR, &ARM64::GPR64RegClass); + unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass); return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT); } /// LowerShiftRightParts - Lower SRA_PARTS, which returns two /// i64 values and take a 2 x i64 value to shift plus a shift amount. -SDValue ARM64TargetLowering::LowerShiftRightParts(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op, + SelectionDAG &DAG) const { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); @@ -3688,14 +3710,14 @@ SDValue ARM64TargetLowering::LowerShiftRightParts(SDValue Op, SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64), ISD::SETGE, dl, DAG); - SDValue CCVal = DAG.getConstant(ARM64CC::GE, MVT::i32); + SDValue CCVal = DAG.getConstant(AArch64CC::GE, MVT::i32); SDValue FalseValLo = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); SDValue TrueValLo = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); SDValue Lo = - DAG.getNode(ARM64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp); + DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp); - // ARM64 shifts larger than the register width are wrapped rather than + // AArch64 shifts larger than the register width are wrapped rather than // clamped, so we can't just emit "hi >> x". SDValue FalseValHi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); SDValue TrueValHi = Opc == ISD::SRA @@ -3703,7 +3725,7 @@ SDValue ARM64TargetLowering::LowerShiftRightParts(SDValue Op, DAG.getConstant(VTBits - 1, MVT::i64)) : DAG.getConstant(0, VT); SDValue Hi = - DAG.getNode(ARM64ISD::CSEL, dl, VT, TrueValHi, FalseValHi, CCVal, Cmp); + DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValHi, FalseValHi, CCVal, Cmp); SDValue Ops[2] = { Lo, Hi }; return DAG.getMergeValues(Ops, dl); @@ -3711,7 +3733,7 @@ SDValue ARM64TargetLowering::LowerShiftRightParts(SDValue Op, /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two /// i64 values and take a 2 x i64 value to shift plus a shift amount. -SDValue ARM64TargetLowering::LowerShiftLeftParts(SDValue Op, +SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); @@ -3735,45 +3757,46 @@ SDValue ARM64TargetLowering::LowerShiftLeftParts(SDValue Op, SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64), ISD::SETGE, dl, DAG); - SDValue CCVal = DAG.getConstant(ARM64CC::GE, MVT::i32); - SDValue Hi = DAG.getNode(ARM64ISD::CSEL, dl, VT, Tmp3, FalseVal, CCVal, Cmp); + SDValue CCVal = DAG.getConstant(AArch64CC::GE, MVT::i32); + SDValue Hi = + DAG.getNode(AArch64ISD::CSEL, dl, VT, Tmp3, FalseVal, CCVal, Cmp); - // ARM64 shifts of larger than register sizes are wrapped rather than clamped, - // so we can't just emit "lo << a" if a is too big. + // AArch64 shifts of larger than register sizes are wrapped rather than + // clamped, so we can't just emit "lo << a" if a is too big. SDValue TrueValLo = DAG.getConstant(0, VT); SDValue FalseValLo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); SDValue Lo = - DAG.getNode(ARM64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp); + DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp); SDValue Ops[2] = { Lo, Hi }; return DAG.getMergeValues(Ops, dl); } -bool -ARM64TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { - // The ARM64 target doesn't support folding offsets into global addresses. +bool AArch64TargetLowering::isOffsetFoldingLegal( + const GlobalAddressSDNode *GA) const { + // The AArch64 target doesn't support folding offsets into global addresses. return false; } -bool ARM64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { +bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases. // FIXME: We should be able to handle f128 as well with a clever lowering. if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32)) return true; if (VT == MVT::f64) - return ARM64_AM::getFP64Imm(Imm) != -1; + return AArch64_AM::getFP64Imm(Imm) != -1; else if (VT == MVT::f32) - return ARM64_AM::getFP32Imm(Imm) != -1; + return AArch64_AM::getFP32Imm(Imm) != -1; return false; } //===----------------------------------------------------------------------===// -// ARM64 Optimization Hooks +// AArch64 Optimization Hooks //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// ARM64 Inline Assembly Support +// AArch64 Inline Assembly Support //===----------------------------------------------------------------------===// // Table of Constraints @@ -3802,8 +3825,8 @@ bool ARM64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. -ARM64TargetLowering::ConstraintType -ARM64TargetLowering::getConstraintType(const std::string &Constraint) const { +AArch64TargetLowering::ConstraintType +AArch64TargetLowering::getConstraintType(const std::string &Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { default: @@ -3826,7 +3849,7 @@ ARM64TargetLowering::getConstraintType(const std::string &Constraint) const { /// This object must already have been set up with the operand type /// and the current alternative constraint selected. TargetLowering::ConstraintWeight -ARM64TargetLowering::getSingleConstraintMatchWeight( +AArch64TargetLowering::getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const { ConstraintWeight weight = CW_Invalid; Value *CallOperandVal = info.CallOperandVal; @@ -3853,32 +3876,32 @@ ARM64TargetLowering::getSingleConstraintMatchWeight( } std::pair -ARM64TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const { +AArch64TargetLowering::getRegForInlineAsmConstraint( + const std::string &Constraint, MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': if (VT.getSizeInBits() == 64) - return std::make_pair(0U, &ARM64::GPR64commonRegClass); - return std::make_pair(0U, &ARM64::GPR32commonRegClass); + return std::make_pair(0U, &AArch64::GPR64commonRegClass); + return std::make_pair(0U, &AArch64::GPR32commonRegClass); case 'w': if (VT == MVT::f32) - return std::make_pair(0U, &ARM64::FPR32RegClass); + return std::make_pair(0U, &AArch64::FPR32RegClass); if (VT.getSizeInBits() == 64) - return std::make_pair(0U, &ARM64::FPR64RegClass); + return std::make_pair(0U, &AArch64::FPR64RegClass); if (VT.getSizeInBits() == 128) - return std::make_pair(0U, &ARM64::FPR128RegClass); + return std::make_pair(0U, &AArch64::FPR128RegClass); break; // The instructions that this constraint is designed for can // only take 128-bit registers so just use that regclass. case 'x': if (VT.getSizeInBits() == 128) - return std::make_pair(0U, &ARM64::FPR128_loRegClass); + return std::make_pair(0U, &AArch64::FPR128_loRegClass); break; } } if (StringRef("{cc}").equals_lower(Constraint)) - return std::make_pair(unsigned(ARM64::NZCV), &ARM64::CCRRegClass); + return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass); // Use the default implementation in TargetLowering to convert the register // constraint into a member of a register class. @@ -3897,8 +3920,8 @@ ARM64TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // v0 - v31 are aliases of q0 - q31. // By default we'll emit v0-v31 for this unless there's a modifier where // we'll emit the correct register as well. - Res.first = ARM64::FPR128RegClass.getRegister(RegNo); - Res.second = &ARM64::FPR128RegClass; + Res.first = AArch64::FPR128RegClass.getRegister(RegNo); + Res.second = &AArch64::FPR128RegClass; } } } @@ -3908,7 +3931,7 @@ ARM64TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. -void ARM64TargetLowering::LowerAsmOperandForConstraint( +void AArch64TargetLowering::LowerAsmOperandForConstraint( SDValue Op, std::string &Constraint, std::vector &Ops, SelectionDAG &DAG) const { SDValue Result; @@ -3931,9 +3954,9 @@ void ARM64TargetLowering::LowerAsmOperandForConstraint( return; if (Op.getValueType() == MVT::i64) - Result = DAG.getRegister(ARM64::XZR, MVT::i64); + Result = DAG.getRegister(AArch64::XZR, MVT::i64); else - Result = DAG.getRegister(ARM64::WZR, MVT::i32); + Result = DAG.getRegister(AArch64::WZR, MVT::i32); break; } @@ -3974,11 +3997,11 @@ void ARM64TargetLowering::LowerAsmOperandForConstraint( // not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice // versa. case 'K': - if (ARM64_AM::isLogicalImmediate(CVal, 32)) + if (AArch64_AM::isLogicalImmediate(CVal, 32)) break; return; case 'L': - if (ARM64_AM::isLogicalImmediate(CVal, 64)) + if (AArch64_AM::isLogicalImmediate(CVal, 64)) break; return; // The M and N constraints are a superset of K and L respectively, for use @@ -3990,7 +4013,7 @@ void ARM64TargetLowering::LowerAsmOperandForConstraint( case 'M': { if (!isUInt<32>(CVal)) return; - if (ARM64_AM::isLogicalImmediate(CVal, 32)) + if (AArch64_AM::isLogicalImmediate(CVal, 32)) break; if ((CVal & 0xFFFF) == CVal) break; @@ -4004,7 +4027,7 @@ void ARM64TargetLowering::LowerAsmOperandForConstraint( return; } case 'N': { - if (ARM64_AM::isLogicalImmediate(CVal, 64)) + if (AArch64_AM::isLogicalImmediate(CVal, 64)) break; if ((CVal & 0xFFFFULL) == CVal) break; @@ -4043,7 +4066,7 @@ void ARM64TargetLowering::LowerAsmOperandForConstraint( } //===----------------------------------------------------------------------===// -// ARM64 Advanced SIMD Support +// AArch64 Advanced SIMD Support //===----------------------------------------------------------------------===// /// WidenVector - Given a value in the V64 register class, produce the @@ -4075,13 +4098,13 @@ static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); SDLoc DL(V128Reg); - return DAG.getTargetExtractSubreg(ARM64::dsub, DL, NarrowTy, V128Reg); + return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg); } // Gather data to see if the operation can be modelled as a // shuffle in combination with VEXTs. -SDValue ARM64TargetLowering::ReconstructShuffle(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, + SelectionDAG &DAG) const { SDLoc dl(Op); EVT VT = Op.getValueType(); unsigned NumElts = VT.getVectorNumElements(); @@ -4186,7 +4209,7 @@ SDValue ARM64TargetLowering::ReconstructShuffle(SDValue Op, DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i], DAG.getIntPtrConstant(NumElts)); unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1); - ShuffleSrcs[i] = DAG.getNode(ARM64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2, + ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2, DAG.getConstant(Imm, MVT::i32)); } } @@ -4542,13 +4565,13 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, // VREV divides the vector in half and swaps within the half. if (VT.getVectorElementType() == MVT::i32 || VT.getVectorElementType() == MVT::f32) - return DAG.getNode(ARM64ISD::REV64, dl, VT, OpLHS); + return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS); // vrev <4 x i16> -> REV32 if (VT.getVectorElementType() == MVT::i16) - return DAG.getNode(ARM64ISD::REV32, dl, VT, OpLHS); + return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS); // vrev <4 x i8> -> REV16 assert(VT.getVectorElementType() == MVT::i8); - return DAG.getNode(ARM64ISD::REV16, dl, VT, OpLHS); + return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS); case OP_VDUP0: case OP_VDUP1: case OP_VDUP2: @@ -4556,13 +4579,13 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, EVT EltTy = VT.getVectorElementType(); unsigned Opcode; if (EltTy == MVT::i8) - Opcode = ARM64ISD::DUPLANE8; + Opcode = AArch64ISD::DUPLANE8; else if (EltTy == MVT::i16) - Opcode = ARM64ISD::DUPLANE16; + Opcode = AArch64ISD::DUPLANE16; else if (EltTy == MVT::i32 || EltTy == MVT::f32) - Opcode = ARM64ISD::DUPLANE32; + Opcode = AArch64ISD::DUPLANE32; else if (EltTy == MVT::i64 || EltTy == MVT::f64) - Opcode = ARM64ISD::DUPLANE64; + Opcode = AArch64ISD::DUPLANE64; else llvm_unreachable("Invalid vector element type?"); @@ -4575,21 +4598,27 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, case OP_VEXT2: case OP_VEXT3: { unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS); - return DAG.getNode(ARM64ISD::EXT, dl, VT, OpLHS, OpRHS, + return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS, DAG.getConstant(Imm, MVT::i32)); } case OP_VUZPL: - return DAG.getNode(ARM64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS); + return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS, + OpRHS); case OP_VUZPR: - return DAG.getNode(ARM64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS); + return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS, + OpRHS); case OP_VZIPL: - return DAG.getNode(ARM64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS); + return DAG.getNode(AArch64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS, + OpRHS); case OP_VZIPR: - return DAG.getNode(ARM64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS); + return DAG.getNode(AArch64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS, + OpRHS); case OP_VTRNL: - return DAG.getNode(ARM64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS); + return DAG.getNode(AArch64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS, + OpRHS); case OP_VTRNR: - return DAG.getNode(ARM64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS); + return DAG.getNode(AArch64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS, + OpRHS); } } @@ -4627,7 +4656,7 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef ShuffleMask, V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst); Shuffle = DAG.getNode( ISD::INTRINSIC_WO_CHAIN, DL, IndexVT, - DAG.getConstant(Intrinsic::arm64_neon_tbl1, MVT::i32), V1Cst, + DAG.getConstant(Intrinsic::aarch64_neon_tbl1, MVT::i32), V1Cst, DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT, makeArrayRef(TBLMask.data(), IndexLen))); } else { @@ -4635,19 +4664,19 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef ShuffleMask, V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst); Shuffle = DAG.getNode( ISD::INTRINSIC_WO_CHAIN, DL, IndexVT, - DAG.getConstant(Intrinsic::arm64_neon_tbl1, MVT::i32), V1Cst, + DAG.getConstant(Intrinsic::aarch64_neon_tbl1, MVT::i32), V1Cst, DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT, makeArrayRef(TBLMask.data(), IndexLen))); } else { // FIXME: We cannot, for the moment, emit a TBL2 instruction because we // cannot currently represent the register constraints on the input // table registers. - // Shuffle = DAG.getNode(ARM64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst, + // Shuffle = DAG.getNode(AArch64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst, // DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT, // &TBLMask[0], IndexLen)); Shuffle = DAG.getNode( ISD::INTRINSIC_WO_CHAIN, DL, IndexVT, - DAG.getConstant(Intrinsic::arm64_neon_tbl2, MVT::i32), V1Cst, V2Cst, + DAG.getConstant(Intrinsic::aarch64_neon_tbl2, MVT::i32), V1Cst, V2Cst, DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT, makeArrayRef(TBLMask.data(), IndexLen))); } @@ -4657,19 +4686,19 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef ShuffleMask, static unsigned getDUPLANEOp(EVT EltType) { if (EltType == MVT::i8) - return ARM64ISD::DUPLANE8; + return AArch64ISD::DUPLANE8; if (EltType == MVT::i16) - return ARM64ISD::DUPLANE16; + return AArch64ISD::DUPLANE16; if (EltType == MVT::i32 || EltType == MVT::f32) - return ARM64ISD::DUPLANE32; + return AArch64ISD::DUPLANE32; if (EltType == MVT::i64 || EltType == MVT::f64) - return ARM64ISD::DUPLANE64; + return AArch64ISD::DUPLANE64; llvm_unreachable("Invalid vector element type?"); } -SDValue ARM64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, + SelectionDAG &DAG) const { SDLoc dl(Op); EVT VT = Op.getValueType(); @@ -4692,13 +4721,13 @@ SDValue ARM64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, Lane = 0; if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) - return DAG.getNode(ARM64ISD::DUP, dl, V1.getValueType(), + return DAG.getNode(AArch64ISD::DUP, dl, V1.getValueType(), V1.getOperand(0)); // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non- // constant. If so, we can just reference the lane's definition directly. if (V1.getOpcode() == ISD::BUILD_VECTOR && !isa(V1.getOperand(Lane))) - return DAG.getNode(ARM64ISD::DUP, dl, VT, V1.getOperand(Lane)); + return DAG.getNode(AArch64ISD::DUP, dl, VT, V1.getOperand(Lane)); // Otherwise, duplicate from the lane of the input vector. unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType()); @@ -4720,11 +4749,11 @@ SDValue ARM64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, } if (isREVMask(ShuffleMask, VT, 64)) - return DAG.getNode(ARM64ISD::REV64, dl, V1.getValueType(), V1, V2); + return DAG.getNode(AArch64ISD::REV64, dl, V1.getValueType(), V1, V2); if (isREVMask(ShuffleMask, VT, 32)) - return DAG.getNode(ARM64ISD::REV32, dl, V1.getValueType(), V1, V2); + return DAG.getNode(AArch64ISD::REV32, dl, V1.getValueType(), V1, V2); if (isREVMask(ShuffleMask, VT, 16)) - return DAG.getNode(ARM64ISD::REV16, dl, V1.getValueType(), V1, V2); + return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1, V2); bool ReverseEXT = false; unsigned Imm; @@ -4732,39 +4761,39 @@ SDValue ARM64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, if (ReverseEXT) std::swap(V1, V2); Imm *= getExtFactor(V1); - return DAG.getNode(ARM64ISD::EXT, dl, V1.getValueType(), V1, V2, + return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2, DAG.getConstant(Imm, MVT::i32)); } else if (V2->getOpcode() == ISD::UNDEF && isSingletonEXTMask(ShuffleMask, VT, Imm)) { Imm *= getExtFactor(V1); - return DAG.getNode(ARM64ISD::EXT, dl, V1.getValueType(), V1, V1, + return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1, DAG.getConstant(Imm, MVT::i32)); } unsigned WhichResult; if (isZIPMask(ShuffleMask, VT, WhichResult)) { - unsigned Opc = (WhichResult == 0) ? ARM64ISD::ZIP1 : ARM64ISD::ZIP2; + unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2; return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2); } if (isUZPMask(ShuffleMask, VT, WhichResult)) { - unsigned Opc = (WhichResult == 0) ? ARM64ISD::UZP1 : ARM64ISD::UZP2; + unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2; return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2); } if (isTRNMask(ShuffleMask, VT, WhichResult)) { - unsigned Opc = (WhichResult == 0) ? ARM64ISD::TRN1 : ARM64ISD::TRN2; + unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2; return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2); } if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) { - unsigned Opc = (WhichResult == 0) ? ARM64ISD::ZIP1 : ARM64ISD::ZIP2; + unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2; return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1); } if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) { - unsigned Opc = (WhichResult == 0) ? ARM64ISD::UZP1 : ARM64ISD::UZP2; + unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2; return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1); } if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) { - unsigned Opc = (WhichResult == 0) ? ARM64ISD::TRN1 : ARM64ISD::TRN2; + unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2; return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1); } @@ -4844,8 +4873,8 @@ static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits, return false; } -SDValue ARM64TargetLowering::LowerVectorAND(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op, + SelectionDAG &DAG) const { BuildVectorSDNode *BVN = dyn_cast(Op.getOperand(1).getNode()); SDValue LHS = Op.getOperand(0); @@ -4870,55 +4899,55 @@ SDValue ARM64TargetLowering::LowerVectorAND(SDValue Op, CnstBits = CnstBits.zextOrTrunc(64); uint64_t CnstVal = CnstBits.getZExtValue(); - if (ARM64_AM::isAdvSIMDModImmType1(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType1(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS, + SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(0, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType2(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType2(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS, + SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(8, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType3(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType3(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS, + SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(16, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType4(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType4(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS, + SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(24, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType5(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType5(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS, + SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(0, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType6(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType6(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS, + SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(8, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); @@ -4990,12 +5019,12 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) { // Is the second op an shl or lshr? SDValue Shift = N->getOperand(1); - // This will have been turned into: ARM64ISD::VSHL vector, #shift - // or ARM64ISD::VLSHR vector, #shift + // This will have been turned into: AArch64ISD::VSHL vector, #shift + // or AArch64ISD::VLSHR vector, #shift unsigned ShiftOpc = Shift.getOpcode(); - if ((ShiftOpc != ARM64ISD::VSHL && ShiftOpc != ARM64ISD::VLSHR)) + if ((ShiftOpc != AArch64ISD::VSHL && ShiftOpc != AArch64ISD::VLSHR)) return SDValue(); - bool IsShiftRight = ShiftOpc == ARM64ISD::VLSHR; + bool IsShiftRight = ShiftOpc == AArch64ISD::VLSHR; // Is the shift amount constant? ConstantSDNode *C2node = dyn_cast(Shift.getOperand(1)); @@ -5021,12 +5050,12 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) { SDValue Y = Shift.getOperand(0); unsigned Intrin = - IsShiftRight ? Intrinsic::arm64_neon_vsri : Intrinsic::arm64_neon_vsli; + IsShiftRight ? Intrinsic::aarch64_neon_vsri : Intrinsic::aarch64_neon_vsli; SDValue ResultSLI = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, DAG.getConstant(Intrin, MVT::i32), X, Y, Shift.getOperand(1)); - DEBUG(dbgs() << "arm64-lower: transformed: \n"); + DEBUG(dbgs() << "aarch64-lower: transformed: \n"); DEBUG(N->dump(&DAG)); DEBUG(dbgs() << "into: \n"); DEBUG(ResultSLI->dump(&DAG)); @@ -5035,10 +5064,10 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) { return ResultSLI; } -SDValue ARM64TargetLowering::LowerVectorOR(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op, + SelectionDAG &DAG) const { // Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2)) - if (EnableARM64SlrGeneration) { + if (EnableAArch64SlrGeneration) { SDValue Res = tryLowerToSLI(Op.getNode(), DAG); if (Res.getNode()) return Res; @@ -5070,55 +5099,55 @@ SDValue ARM64TargetLowering::LowerVectorOR(SDValue Op, CnstBits = CnstBits.zextOrTrunc(64); uint64_t CnstVal = CnstBits.getZExtValue(); - if (ARM64_AM::isAdvSIMDModImmType1(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType1(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS, + SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(0, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType2(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType2(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS, + SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(8, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType3(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType3(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS, + SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(16, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType4(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType4(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS, + SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(24, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType5(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType5(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS, + SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(0, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType6(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType6(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS, + SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(8, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); @@ -5137,8 +5166,8 @@ SDValue ARM64TargetLowering::LowerVectorOR(SDValue Op, return Op; } -SDValue ARM64TargetLowering::LowerBUILD_VECTOR(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { BuildVectorSDNode *BVN = cast(Op.getNode()); SDLoc dl(Op); EVT VT = Op.getValueType(); @@ -5163,186 +5192,186 @@ SDValue ARM64TargetLowering::LowerBUILD_VECTOR(SDValue Op, return Op; // The many faces of MOVI... - if (ARM64_AM::isAdvSIMDModImmType10(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType10(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType10(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType10(CnstVal); if (VT.getSizeInBits() == 128) { - SDValue Mov = DAG.getNode(ARM64ISD::MOVIedit, dl, MVT::v2i64, + SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::v2i64, DAG.getConstant(CnstVal, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } // Support the V64 version via subregister insertion. - SDValue Mov = DAG.getNode(ARM64ISD::MOVIedit, dl, MVT::f64, + SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::f64, DAG.getConstant(CnstVal, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType1(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType1(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy, + SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(0, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType2(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType2(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy, + SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(8, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType3(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType3(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy, + SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(16, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType4(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType4(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy, + SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(24, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType5(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType5(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy, + SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(0, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType6(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType6(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy, + SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(8, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType7(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType7(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MOVImsl, dl, MovTy, + SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(264, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType8(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType8(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MOVImsl, dl, MovTy, + SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(272, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType9(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType9(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType9(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType9(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8; - SDValue Mov = DAG.getNode(ARM64ISD::MOVI, dl, MovTy, + SDValue Mov = DAG.getNode(AArch64ISD::MOVI, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } // The few faces of FMOV... - if (ARM64_AM::isAdvSIMDModImmType11(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType11(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType11(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType11(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4f32 : MVT::v2f32; - SDValue Mov = DAG.getNode(ARM64ISD::FMOV, dl, MovTy, + SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType12(CnstVal) && + if (AArch64_AM::isAdvSIMDModImmType12(CnstVal) && VT.getSizeInBits() == 128) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType12(CnstVal); - SDValue Mov = DAG.getNode(ARM64ISD::FMOV, dl, MVT::v2f64, + CnstVal = AArch64_AM::encodeAdvSIMDModImmType12(CnstVal); + SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MVT::v2f64, DAG.getConstant(CnstVal, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } // The many faces of MVNI... CnstVal = ~CnstVal; - if (ARM64_AM::isAdvSIMDModImmType1(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType1(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy, + SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(0, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType2(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType2(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy, + SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(8, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType3(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType3(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy, + SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(16, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType4(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType4(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy, + SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(24, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType5(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType5(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy, + SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(0, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType6(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType6(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy, + SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(8, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType7(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType7(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MVNImsl, dl, MovTy, + SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(264, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - if (ARM64_AM::isAdvSIMDModImmType8(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType8(CnstVal); + if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MVNImsl, dl, MovTy, + SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(272, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Mov); @@ -5411,7 +5440,7 @@ SDValue ARM64TargetLowering::LowerBUILD_VECTOR(SDValue Op, if (!isConstant) { if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT || Value.getValueType() != VT) - return DAG.getNode(ARM64ISD::DUP, dl, VT, Value); + return DAG.getNode(AArch64ISD::DUP, dl, VT, Value); // This is actually a DUPLANExx operation, which keeps everything vectory. @@ -5444,7 +5473,7 @@ SDValue ARM64TargetLowering::LowerBUILD_VECTOR(SDValue Op, // is better than the default, which will perform a separate initialization // for each lane. if (NumConstantLanes > 0 && usesOnlyOneConstantValue) { - SDValue Val = DAG.getNode(ARM64ISD::DUP, dl, VT, ConstantValue); + SDValue Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue); // Now insert the non-constant lanes. for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); @@ -5487,7 +5516,7 @@ SDValue ARM64TargetLowering::LowerBUILD_VECTOR(SDValue Op, // b) Allow the register coalescer to fold away the copy if the // value is already in an S or D register. if (Op0.getOpcode() != ISD::UNDEF && (ElemSize == 32 || ElemSize == 64)) { - unsigned SubIdx = ElemSize == 32 ? ARM64::ssub : ARM64::dsub; + unsigned SubIdx = ElemSize == 32 ? AArch64::ssub : AArch64::dsub; MachineSDNode *N = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, Vec, Op0, DAG.getTargetConstant(SubIdx, MVT::i32)); @@ -5508,8 +5537,8 @@ SDValue ARM64TargetLowering::LowerBUILD_VECTOR(SDValue Op, return SDValue(); } -SDValue ARM64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!"); // Check for non-constant lane. @@ -5539,8 +5568,9 @@ SDValue ARM64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, return NarrowVector(Node, DAG); } -SDValue ARM64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, - SelectionDAG &DAG) const { +SDValue +AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!"); // Check for non-constant lane. @@ -5573,8 +5603,8 @@ SDValue ARM64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, Op.getOperand(1)); } -SDValue ARM64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, + SelectionDAG &DAG) const { EVT VT = Op.getOperand(0).getValueType(); SDLoc dl(Op); // Just in case... @@ -5590,16 +5620,16 @@ SDValue ARM64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, if (Val == 0) { switch (Size) { case 8: - return DAG.getTargetExtractSubreg(ARM64::bsub, dl, Op.getValueType(), + return DAG.getTargetExtractSubreg(AArch64::bsub, dl, Op.getValueType(), Op.getOperand(0)); case 16: - return DAG.getTargetExtractSubreg(ARM64::hsub, dl, Op.getValueType(), + return DAG.getTargetExtractSubreg(AArch64::hsub, dl, Op.getValueType(), Op.getOperand(0)); case 32: - return DAG.getTargetExtractSubreg(ARM64::ssub, dl, Op.getValueType(), + return DAG.getTargetExtractSubreg(AArch64::ssub, dl, Op.getValueType(), Op.getOperand(0)); case 64: - return DAG.getTargetExtractSubreg(ARM64::dsub, dl, Op.getValueType(), + return DAG.getTargetExtractSubreg(AArch64::dsub, dl, Op.getValueType(), Op.getOperand(0)); default: llvm_unreachable("Unexpected vector type in extract_subvector!"); @@ -5613,8 +5643,8 @@ SDValue ARM64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, return SDValue(); } -bool ARM64TargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, - EVT VT) const { +bool AArch64TargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, + EVT VT) const { if (VT.getVectorNumElements() == 4 && (VT.is128BitVector() || VT.is64BitVector())) { unsigned PFIndexes[4]; @@ -5700,8 +5730,8 @@ static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits)); } -SDValue ARM64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, + SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc DL(Op); int64_t Cnt; @@ -5716,10 +5746,10 @@ SDValue ARM64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, case ISD::SHL: if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) - return DAG.getNode(ARM64ISD::VSHL, SDLoc(Op), VT, Op.getOperand(0), + return DAG.getNode(AArch64ISD::VSHL, SDLoc(Op), VT, Op.getOperand(0), DAG.getConstant(Cnt, MVT::i32)); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Intrinsic::arm64_neon_ushl, MVT::i32), + DAG.getConstant(Intrinsic::aarch64_neon_ushl, MVT::i32), Op.getOperand(0), Op.getOperand(1)); case ISD::SRA: case ISD::SRL: @@ -5727,7 +5757,7 @@ SDValue ARM64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, if (isVShiftRImm(Op.getOperand(1), VT, false, false, Cnt) && Cnt < EltSize) { unsigned Opc = - (Op.getOpcode() == ISD::SRA) ? ARM64ISD::VASHR : ARM64ISD::VLSHR; + (Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR; return DAG.getNode(Opc, SDLoc(Op), VT, Op.getOperand(0), DAG.getConstant(Cnt, MVT::i32)); } @@ -5735,10 +5765,10 @@ SDValue ARM64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, // Right shift register. Note, there is not a shift right register // instruction, but the shift left register instruction takes a signed // value, where negative numbers specify a right shift. - unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::arm64_neon_sshl - : Intrinsic::arm64_neon_ushl; + unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::aarch64_neon_sshl + : Intrinsic::aarch64_neon_ushl; // negate the shift amount - SDValue NegShift = DAG.getNode(ARM64ISD::NEG, DL, VT, Op.getOperand(1)); + SDValue NegShift = DAG.getNode(AArch64ISD::NEG, DL, VT, Op.getOperand(1)); SDValue NegShiftLeft = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, DAG.getConstant(Opc, MVT::i32), Op.getOperand(0), NegShift); @@ -5749,7 +5779,7 @@ SDValue ARM64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, } static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS, - ARM64CC::CondCode CC, bool NoNans, EVT VT, + AArch64CC::CondCode CC, bool NoNans, EVT VT, SDLoc dl, SelectionDAG &DAG) { EVT SrcVT = LHS.getValueType(); @@ -5763,85 +5793,86 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS, switch (CC) { default: return SDValue(); - case ARM64CC::NE: { + case AArch64CC::NE: { SDValue Fcmeq; if (IsZero) - Fcmeq = DAG.getNode(ARM64ISD::FCMEQz, dl, VT, LHS); + Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS); else - Fcmeq = DAG.getNode(ARM64ISD::FCMEQ, dl, VT, LHS, RHS); - return DAG.getNode(ARM64ISD::NOT, dl, VT, Fcmeq); + Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS); + return DAG.getNode(AArch64ISD::NOT, dl, VT, Fcmeq); } - case ARM64CC::EQ: + case AArch64CC::EQ: if (IsZero) - return DAG.getNode(ARM64ISD::FCMEQz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::FCMEQ, dl, VT, LHS, RHS); - case ARM64CC::GE: + return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS); + return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS); + case AArch64CC::GE: if (IsZero) - return DAG.getNode(ARM64ISD::FCMGEz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::FCMGE, dl, VT, LHS, RHS); - case ARM64CC::GT: + return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS); + return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS); + case AArch64CC::GT: if (IsZero) - return DAG.getNode(ARM64ISD::FCMGTz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::FCMGT, dl, VT, LHS, RHS); - case ARM64CC::LS: + return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS); + return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS); + case AArch64CC::LS: if (IsZero) - return DAG.getNode(ARM64ISD::FCMLEz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::FCMGE, dl, VT, RHS, LHS); - case ARM64CC::LT: + return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS); + return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS); + case AArch64CC::LT: if (!NoNans) return SDValue(); // If we ignore NaNs then we can use to the MI implementation. // Fallthrough. - case ARM64CC::MI: + case AArch64CC::MI: if (IsZero) - return DAG.getNode(ARM64ISD::FCMLTz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::FCMGT, dl, VT, RHS, LHS); + return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS); + return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS); } } switch (CC) { default: return SDValue(); - case ARM64CC::NE: { + case AArch64CC::NE: { SDValue Cmeq; if (IsZero) - Cmeq = DAG.getNode(ARM64ISD::CMEQz, dl, VT, LHS); + Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS); else - Cmeq = DAG.getNode(ARM64ISD::CMEQ, dl, VT, LHS, RHS); - return DAG.getNode(ARM64ISD::NOT, dl, VT, Cmeq); + Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS); + return DAG.getNode(AArch64ISD::NOT, dl, VT, Cmeq); } - case ARM64CC::EQ: + case AArch64CC::EQ: if (IsZero) - return DAG.getNode(ARM64ISD::CMEQz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::CMEQ, dl, VT, LHS, RHS); - case ARM64CC::GE: + return DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS); + return DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS); + case AArch64CC::GE: if (IsZero) - return DAG.getNode(ARM64ISD::CMGEz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::CMGE, dl, VT, LHS, RHS); - case ARM64CC::GT: + return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS); + return DAG.getNode(AArch64ISD::CMGE, dl, VT, LHS, RHS); + case AArch64CC::GT: if (IsZero) - return DAG.getNode(ARM64ISD::CMGTz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::CMGT, dl, VT, LHS, RHS); - case ARM64CC::LE: + return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS); + return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS); + case AArch64CC::LE: if (IsZero) - return DAG.getNode(ARM64ISD::CMLEz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::CMGE, dl, VT, RHS, LHS); - case ARM64CC::LS: - return DAG.getNode(ARM64ISD::CMHS, dl, VT, RHS, LHS); - case ARM64CC::LO: - return DAG.getNode(ARM64ISD::CMHI, dl, VT, RHS, LHS); - case ARM64CC::LT: + return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS); + return DAG.getNode(AArch64ISD::CMGE, dl, VT, RHS, LHS); + case AArch64CC::LS: + return DAG.getNode(AArch64ISD::CMHS, dl, VT, RHS, LHS); + case AArch64CC::LO: + return DAG.getNode(AArch64ISD::CMHI, dl, VT, RHS, LHS); + case AArch64CC::LT: if (IsZero) - return DAG.getNode(ARM64ISD::CMLTz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::CMGT, dl, VT, RHS, LHS); - case ARM64CC::HI: - return DAG.getNode(ARM64ISD::CMHI, dl, VT, LHS, RHS); - case ARM64CC::HS: - return DAG.getNode(ARM64ISD::CMHS, dl, VT, LHS, RHS); + return DAG.getNode(AArch64ISD::CMLTz, dl, VT, LHS); + return DAG.getNode(AArch64ISD::CMGT, dl, VT, RHS, LHS); + case AArch64CC::HI: + return DAG.getNode(AArch64ISD::CMHI, dl, VT, LHS, RHS); + case AArch64CC::HS: + return DAG.getNode(AArch64ISD::CMHS, dl, VT, LHS, RHS); } } -SDValue ARM64TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op, + SelectionDAG &DAG) const { ISD::CondCode CC = cast(Op.getOperand(2))->get(); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); @@ -5849,19 +5880,19 @@ SDValue ARM64TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { if (LHS.getValueType().getVectorElementType().isInteger()) { assert(LHS.getValueType() == RHS.getValueType()); - ARM64CC::CondCode ARM64CC = changeIntCCToARM64CC(CC); - return EmitVectorComparison(LHS, RHS, ARM64CC, false, Op.getValueType(), dl, - DAG); + AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC); + return EmitVectorComparison(LHS, RHS, AArch64CC, false, Op.getValueType(), + dl, DAG); } assert(LHS.getValueType().getVectorElementType() == MVT::f32 || LHS.getValueType().getVectorElementType() == MVT::f64); - // Unfortunately, the mapping of LLVM FP CC's onto ARM64 CC's isn't totally + // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally // clean. Some of them require two branches to implement. - ARM64CC::CondCode CC1, CC2; + AArch64CC::CondCode CC1, CC2; bool ShouldInvert; - changeVectorFPCCToARM64CC(CC, CC1, CC2, ShouldInvert); + changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert); bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath; SDValue Cmp = @@ -5869,7 +5900,7 @@ SDValue ARM64TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { if (!Cmp.getNode()) return SDValue(); - if (CC2 != ARM64CC::AL) { + if (CC2 != AArch64CC::AL) { SDValue Cmp2 = EmitVectorComparison(LHS, RHS, CC2, NoNaNs, Op.getValueType(), dl, DAG); if (!Cmp2.getNode()) @@ -5887,22 +5918,22 @@ SDValue ARM64TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment /// specified in the intrinsic calls. -bool ARM64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, - unsigned Intrinsic) const { +bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, + const CallInst &I, + unsigned Intrinsic) const { switch (Intrinsic) { - case Intrinsic::arm64_neon_ld2: - case Intrinsic::arm64_neon_ld3: - case Intrinsic::arm64_neon_ld4: - case Intrinsic::arm64_neon_ld1x2: - case Intrinsic::arm64_neon_ld1x3: - case Intrinsic::arm64_neon_ld1x4: - case Intrinsic::arm64_neon_ld2lane: - case Intrinsic::arm64_neon_ld3lane: - case Intrinsic::arm64_neon_ld4lane: - case Intrinsic::arm64_neon_ld2r: - case Intrinsic::arm64_neon_ld3r: - case Intrinsic::arm64_neon_ld4r: { + case Intrinsic::aarch64_neon_ld2: + case Intrinsic::aarch64_neon_ld3: + case Intrinsic::aarch64_neon_ld4: + case Intrinsic::aarch64_neon_ld1x2: + case Intrinsic::aarch64_neon_ld1x3: + case Intrinsic::aarch64_neon_ld1x4: + case Intrinsic::aarch64_neon_ld2lane: + case Intrinsic::aarch64_neon_ld3lane: + case Intrinsic::aarch64_neon_ld4lane: + case Intrinsic::aarch64_neon_ld2r: + case Intrinsic::aarch64_neon_ld3r: + case Intrinsic::aarch64_neon_ld4r: { Info.opc = ISD::INTRINSIC_W_CHAIN; // Conservatively set memVT to the entire set of vectors loaded. uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8; @@ -5915,15 +5946,15 @@ bool ARM64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = false; return true; } - case Intrinsic::arm64_neon_st2: - case Intrinsic::arm64_neon_st3: - case Intrinsic::arm64_neon_st4: - case Intrinsic::arm64_neon_st1x2: - case Intrinsic::arm64_neon_st1x3: - case Intrinsic::arm64_neon_st1x4: - case Intrinsic::arm64_neon_st2lane: - case Intrinsic::arm64_neon_st3lane: - case Intrinsic::arm64_neon_st4lane: { + case Intrinsic::aarch64_neon_st2: + case Intrinsic::aarch64_neon_st3: + case Intrinsic::aarch64_neon_st4: + case Intrinsic::aarch64_neon_st1x2: + case Intrinsic::aarch64_neon_st1x3: + case Intrinsic::aarch64_neon_st1x4: + case Intrinsic::aarch64_neon_st2lane: + case Intrinsic::aarch64_neon_st3lane: + case Intrinsic::aarch64_neon_st4lane: { Info.opc = ISD::INTRINSIC_VOID; // Conservatively set memVT to the entire set of vectors stored. unsigned NumElts = 0; @@ -5942,8 +5973,8 @@ bool ARM64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; return true; } - case Intrinsic::arm64_ldaxr: - case Intrinsic::arm64_ldxr: { + case Intrinsic::aarch64_ldaxr: + case Intrinsic::aarch64_ldxr: { PointerType *PtrTy = cast(I.getArgOperand(0)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(PtrTy->getElementType()); @@ -5955,8 +5986,8 @@ bool ARM64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = false; return true; } - case Intrinsic::arm64_stlxr: - case Intrinsic::arm64_stxr: { + case Intrinsic::aarch64_stlxr: + case Intrinsic::aarch64_stxr: { PointerType *PtrTy = cast(I.getArgOperand(1)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(PtrTy->getElementType()); @@ -5968,8 +5999,8 @@ bool ARM64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; return true; } - case Intrinsic::arm64_ldaxp: - case Intrinsic::arm64_ldxp: { + case Intrinsic::aarch64_ldaxp: + case Intrinsic::aarch64_ldxp: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i128; Info.ptrVal = I.getArgOperand(0); @@ -5980,8 +6011,8 @@ bool ARM64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = false; return true; } - case Intrinsic::arm64_stlxp: - case Intrinsic::arm64_stxp: { + case Intrinsic::aarch64_stlxp: + case Intrinsic::aarch64_stxp: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i128; Info.ptrVal = I.getArgOperand(2); @@ -6000,7 +6031,7 @@ bool ARM64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, } // Truncations from 64-bit GPR to 32-bit GPR is free. -bool ARM64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { +bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); @@ -6009,7 +6040,7 @@ bool ARM64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { return false; return true; } -bool ARM64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { +bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { if (!VT1.isInteger() || !VT2.isInteger()) return false; unsigned NumBits1 = VT1.getSizeInBits(); @@ -6021,7 +6052,7 @@ bool ARM64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { // All 32-bit GPR operations implicitly zero the high-half of the corresponding // 64-bit GPR. -bool ARM64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { +bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); @@ -6030,7 +6061,7 @@ bool ARM64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { return true; return false; } -bool ARM64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { +bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { if (!VT1.isInteger() || !VT2.isInteger()) return false; unsigned NumBits1 = VT1.getSizeInBits(); @@ -6040,7 +6071,7 @@ bool ARM64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { return false; } -bool ARM64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { +bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { EVT VT1 = Val.getValueType(); if (isZExtFree(VT1, VT2)) { return true; @@ -6054,8 +6085,8 @@ bool ARM64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { VT2.isInteger() && VT1.getSizeInBits() <= 32); } -bool ARM64TargetLowering::hasPairedLoad(Type *LoadedType, - unsigned &RequiredAligment) const { +bool AArch64TargetLowering::hasPairedLoad(Type *LoadedType, + unsigned &RequiredAligment) const { if (!LoadedType->isIntegerTy() && !LoadedType->isFloatTy()) return false; // Cyclone supports unaligned accesses. @@ -6064,8 +6095,8 @@ bool ARM64TargetLowering::hasPairedLoad(Type *LoadedType, return NumBits == 32 || NumBits == 64; } -bool ARM64TargetLowering::hasPairedLoad(EVT LoadedType, - unsigned &RequiredAligment) const { +bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType, + unsigned &RequiredAligment) const { if (!LoadedType.isSimple() || (!LoadedType.isInteger() && !LoadedType.isFloatingPoint())) return false; @@ -6081,10 +6112,11 @@ static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, (DstAlign == 0 || DstAlign % AlignCheck == 0)); } -EVT ARM64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, - unsigned SrcAlign, bool IsMemset, - bool ZeroMemset, bool MemcpyStrSrc, - MachineFunction &MF) const { +EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, + unsigned SrcAlign, bool IsMemset, + bool ZeroMemset, + bool MemcpyStrSrc, + MachineFunction &MF) const { // Don't use AdvSIMD to implement 16-byte memset. It would have taken one // instruction to materialize the v2i64 zero and one store (with restrictive // addressing mode). Just do two i64 store of zero-registers. @@ -6101,7 +6133,7 @@ EVT ARM64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, } // 12-bit optionally shifted immediates are legal for adds. -bool ARM64TargetLowering::isLegalAddImmediate(int64_t Immed) const { +bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const { if ((Immed >> 12) == 0 || ((Immed & 0xfff) == 0 && Immed >> 24 == 0)) return true; return false; @@ -6109,7 +6141,7 @@ bool ARM64TargetLowering::isLegalAddImmediate(int64_t Immed) const { // Integer comparisons are implemented with ADDS/SUBS, so the range of valid // immediates is the same as for an add or a sub. -bool ARM64TargetLowering::isLegalICmpImmediate(int64_t Immed) const { +bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const { if (Immed < 0) Immed *= -1; return isLegalAddImmediate(Immed); @@ -6117,9 +6149,9 @@ bool ARM64TargetLowering::isLegalICmpImmediate(int64_t Immed) const { /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. -bool ARM64TargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { - // ARM64 has five basic addressing modes: +bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM, + Type *Ty) const { + // AArch64 has five basic addressing modes: // reg // reg + 9-bit signed offset // reg + SIZE_IN_BYTES * 12-bit unsigned offset @@ -6168,8 +6200,8 @@ bool ARM64TargetLowering::isLegalAddressingMode(const AddrMode &AM, return false; } -int ARM64TargetLowering::getScalingFactorCost(const AddrMode &AM, - Type *Ty) const { +int AArch64TargetLowering::getScalingFactorCost(const AddrMode &AM, + Type *Ty) const { // Scaling factors are not free at all. // Operands | Rt Latency // ------------------------------------------- @@ -6184,7 +6216,7 @@ int ARM64TargetLowering::getScalingFactorCost(const AddrMode &AM, return -1; } -bool ARM64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { +bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { VT = VT.getScalarType(); if (!VT.isSimple()) @@ -6202,17 +6234,18 @@ bool ARM64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { } const MCPhysReg * -ARM64TargetLowering::getScratchRegisters(CallingConv::ID) const { +AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const { // LR is a callee-save register, but we must treat it as clobbered by any call // site. Hence we include LR in the scratch registers, which are in turn added // as implicit-defs for stackmaps and patchpoints. static const MCPhysReg ScratchRegs[] = { - ARM64::X16, ARM64::X17, ARM64::LR, 0 + AArch64::X16, AArch64::X17, AArch64::LR, 0 }; return ScratchRegs; } -bool ARM64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N) const { +bool +AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N) const { EVT VT = N->getValueType(0); // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine // it with shift to let it be lowered to UBFX. @@ -6227,8 +6260,8 @@ bool ARM64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N) const { return true; } -bool ARM64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, - Type *Ty) const { +bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, + Type *Ty) const { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -6236,7 +6269,7 @@ bool ARM64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, return false; int64_t Val = Imm.getSExtValue(); - if (Val == 0 || ARM64_AM::isLogicalImmediate(Val, BitSize)) + if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, BitSize)) return true; if ((int64_t)Val < 0) @@ -6269,10 +6302,10 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) { N0.getOperand(0)); // Generate SUBS & CSEL. SDValue Cmp = - DAG.getNode(ARM64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32), + DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32), N0.getOperand(0), DAG.getConstant(0, VT)); - return DAG.getNode(ARM64ISD::CSEL, DL, VT, N0.getOperand(0), Neg, - DAG.getConstant(ARM64CC::PL, MVT::i32), + return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0.getOperand(0), Neg, + DAG.getConstant(AArch64CC::PL, MVT::i32), SDValue(Cmp.getNode(), 1)); } return SDValue(); @@ -6281,7 +6314,7 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) { // performXorCombine - Attempts to handle integer ABS. static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const ARM64Subtarget *Subtarget) { + const AArch64Subtarget *Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -6290,7 +6323,7 @@ static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG, static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const ARM64Subtarget *Subtarget) { + const AArch64Subtarget *Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -6350,7 +6383,7 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG) { DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1)); unsigned Opcode = - (N->getOpcode() == ISD::SINT_TO_FP) ? ARM64ISD::SITOF : ARM64ISD::UITOF; + (N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF; return DAG.getNode(Opcode, SDLoc(N), VT, Load); } @@ -6417,7 +6450,7 @@ static SDValue tryCombineToEXTR(SDNode *N, std::swap(ShiftLHS, ShiftRHS); } - return DAG.getNode(ARM64ISD::EXTR, DL, VT, LHS, RHS, + return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS, DAG.getConstant(ShiftRHS, MVT::i64)); } @@ -6461,7 +6494,7 @@ static SDValue tryCombineToBSL(SDNode *N, } if (FoundMatch) - return DAG.getNode(ARM64ISD::BSL, DL, VT, SDValue(BVN0, 0), + return DAG.getNode(AArch64ISD::BSL, DL, VT, SDValue(BVN0, 0), N0->getOperand(1 - i), N1->getOperand(1 - j)); } @@ -6469,9 +6502,9 @@ static SDValue tryCombineToBSL(SDNode *N, } static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, - const ARM64Subtarget *Subtarget) { + const AArch64Subtarget *Subtarget) { // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) - if (!EnableARM64ExtrGeneration) + if (!EnableAArch64ExtrGeneration) return SDValue(); SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); @@ -6517,14 +6550,14 @@ static SDValue performBitcastCombine(SDNode *N, SDValue Op0 = N->getOperand(0); if (Op0->getOpcode() != ISD::EXTRACT_SUBVECTOR && !(Op0->isMachineOpcode() && - Op0->getMachineOpcode() == ARM64::EXTRACT_SUBREG)) + Op0->getMachineOpcode() == AArch64::EXTRACT_SUBREG)) return SDValue(); uint64_t idx = cast(Op0->getOperand(1))->getZExtValue(); if (Op0->getOpcode() == ISD::EXTRACT_SUBVECTOR) { if (Op0->getValueType(0).getVectorNumElements() != idx && idx != 0) return SDValue(); - } else if (Op0->getMachineOpcode() == ARM64::EXTRACT_SUBREG) { - if (idx != ARM64::dsub) + } else if (Op0->getMachineOpcode() == AArch64::EXTRACT_SUBREG) { + if (idx != AArch64::dsub) return SDValue(); // The dsub reference is equivalent to a lane zero subvector reference. idx = 0; @@ -6539,7 +6572,7 @@ static SDValue performBitcastCombine(SDNode *N, if (SVT.getVectorNumElements() != VT.getVectorNumElements() * 2) return SDValue(); - DEBUG(dbgs() << "arm64-lower: bitcast extract_subvector simplification\n"); + DEBUG(dbgs() << "aarch64-lower: bitcast extract_subvector simplification\n"); // Create the simplified form to just extract the low or high half of the // vector directly rather than bothering with the bitcasts. @@ -6549,7 +6582,7 @@ static SDValue performBitcastCombine(SDNode *N, SDValue HalfIdx = DAG.getConstant(NumElements, MVT::i64); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Source, HalfIdx); } else { - SDValue SubReg = DAG.getTargetConstant(ARM64::dsub, MVT::i32); + SDValue SubReg = DAG.getTargetConstant(AArch64::dsub, MVT::i32); return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, VT, Source, SubReg), 0); @@ -6572,7 +6605,7 @@ static SDValue performConcatVectorsCombine(SDNode *N, // canonicalise to that. if (N->getOperand(0) == N->getOperand(1) && VT.getVectorNumElements() == 2) { assert(VT.getVectorElementType().getSizeInBits() == 64); - return DAG.getNode(ARM64ISD::DUPLANE64, dl, VT, + return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N->getOperand(0), DAG), DAG.getConstant(0, MVT::i64)); } @@ -6595,7 +6628,7 @@ static SDValue performConcatVectorsCombine(SDNode *N, if (!RHSTy.isVector()) return SDValue(); - DEBUG(dbgs() << "arm64-lower: concat_vectors bitcast simplification\n"); + DEBUG(dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n"); MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(), RHSTy.getVectorNumElements() * 2); @@ -6670,13 +6703,13 @@ static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) { // operand saying *which* lane, so we need to know. bool IsDUPLANE; switch (N.getOpcode()) { - case ARM64ISD::DUP: + case AArch64ISD::DUP: IsDUPLANE = false; break; - case ARM64ISD::DUPLANE8: - case ARM64ISD::DUPLANE16: - case ARM64ISD::DUPLANE32: - case ARM64ISD::DUPLANE64: + case AArch64ISD::DUPLANE8: + case AArch64ISD::DUPLANE16: + case AArch64ISD::DUPLANE32: + case AArch64ISD::DUPLANE64: IsDUPLANE = true; break; default: @@ -6696,7 +6729,7 @@ static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) { NewDUP = DAG.getNode(N.getOpcode(), SDLoc(N), NewDUPVT, N.getOperand(0), N.getOperand(1)); else - NewDUP = DAG.getNode(ARM64ISD::DUP, SDLoc(N), NewDUPVT, N.getOperand(0)); + NewDUP = DAG.getNode(AArch64ISD::DUP, SDLoc(N), NewDUPVT, N.getOperand(0)); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N.getNode()), NarrowTy, NewDUP, DAG.getConstant(NumElems, MVT::i64)); @@ -6717,29 +6750,29 @@ struct GenericSetCCInfo { ISD::CondCode CC; }; -/// \brief Helper structure to keep track of a SET_CC lowered into ARM64 code. -struct ARM64SetCCInfo { +/// \brief Helper structure to keep track of a SET_CC lowered into AArch64 code. +struct AArch64SetCCInfo { const SDValue *Cmp; - ARM64CC::CondCode CC; + AArch64CC::CondCode CC; }; /// \brief Helper structure to keep track of SetCC information. union SetCCInfo { GenericSetCCInfo Generic; - ARM64SetCCInfo ARM64; + AArch64SetCCInfo AArch64; }; -/// \brief Helper structure to be able to read SetCC information. -/// If set to true, IsARM64 field, Info is a ARM64SetCCInfo, otherwise Info is -/// a GenericSetCCInfo. +/// \brief Helper structure to be able to read SetCC information. If set to +/// true, IsAArch64 field, Info is a AArch64SetCCInfo, otherwise Info is a +/// GenericSetCCInfo. struct SetCCInfoAndKind { SetCCInfo Info; - bool IsARM64; + bool IsAArch64; }; /// \brief Check whether or not \p Op is a SET_CC operation, either a generic or /// an -/// ARM64 lowered one. +/// AArch64 lowered one. /// \p SetCCInfo is filled accordingly. /// \post SetCCInfo is meanginfull only when this function returns true. /// \return True when Op is a kind of SET_CC operation. @@ -6749,20 +6782,20 @@ static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) { SetCCInfo.Info.Generic.Opnd0 = &Op.getOperand(0); SetCCInfo.Info.Generic.Opnd1 = &Op.getOperand(1); SetCCInfo.Info.Generic.CC = cast(Op.getOperand(2))->get(); - SetCCInfo.IsARM64 = false; + SetCCInfo.IsAArch64 = false; return true; } // Otherwise, check if this is a matching csel instruction. // In other words: // - csel 1, 0, cc // - csel 0, 1, !cc - if (Op.getOpcode() != ARM64ISD::CSEL) + if (Op.getOpcode() != AArch64ISD::CSEL) return false; // Set the information about the operands. // TODO: we want the operands of the Cmp not the csel - SetCCInfo.Info.ARM64.Cmp = &Op.getOperand(3); - SetCCInfo.IsARM64 = true; - SetCCInfo.Info.ARM64.CC = static_cast( + SetCCInfo.Info.AArch64.Cmp = &Op.getOperand(3); + SetCCInfo.IsAArch64 = true; + SetCCInfo.Info.AArch64.CC = static_cast( cast(Op.getOperand(2))->getZExtValue()); // Check that the operands matches the constraints: @@ -6779,8 +6812,8 @@ static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) { if (!TValue->isOne()) { // Update the comparison when we are interested in !cc. std::swap(TValue, FValue); - SetCCInfo.Info.ARM64.CC = - ARM64CC::getInvertedCondCode(SetCCInfo.Info.ARM64.CC); + SetCCInfo.Info.AArch64.CC = + AArch64CC::getInvertedCondCode(SetCCInfo.Info.AArch64.CC); } return TValue->isOne() && FValue->isNullValue(); } @@ -6813,8 +6846,8 @@ static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) { } // FIXME: This could be generatized to work for FP comparisons. - EVT CmpVT = InfoAndKind.IsARM64 - ? InfoAndKind.Info.ARM64.Cmp->getOperand(0).getValueType() + EVT CmpVT = InfoAndKind.IsAArch64 + ? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType() : InfoAndKind.Info.Generic.Opnd0->getValueType(); if (CmpVT != MVT::i32 && CmpVT != MVT::i64) return SDValue(); @@ -6822,19 +6855,19 @@ static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) { SDValue CCVal; SDValue Cmp; SDLoc dl(Op); - if (InfoAndKind.IsARM64) { + if (InfoAndKind.IsAArch64) { CCVal = DAG.getConstant( - ARM64CC::getInvertedCondCode(InfoAndKind.Info.ARM64.CC), MVT::i32); - Cmp = *InfoAndKind.Info.ARM64.Cmp; + AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), MVT::i32); + Cmp = *InfoAndKind.Info.AArch64.Cmp; } else - Cmp = getARM64Cmp(*InfoAndKind.Info.Generic.Opnd0, + Cmp = getAArch64Cmp(*InfoAndKind.Info.Generic.Opnd0, *InfoAndKind.Info.Generic.Opnd1, ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, true), CCVal, DAG, dl); EVT VT = Op->getValueType(0); LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, VT)); - return DAG.getNode(ARM64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp); + return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp); } // The basic add/sub long vector instructions have variants with "2" on the end @@ -6893,8 +6926,8 @@ static SDValue performAddSubLongCombine(SDNode *N, // Massage DAGs which we can use the high-half "long" operations on into // something isel will recognize better. E.g. // -// (arm64_neon_umull (extract_high vec) (dupv64 scalar)) --> -// (arm64_neon_umull (extract_high (v2i64 vec))) +// (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) --> +// (aarch64_neon_umull (extract_high (v2i64 vec))) // (extract_high (v2i64 (dup128 scalar))))) // static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N, @@ -6951,24 +6984,24 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) { switch (IID) { default: llvm_unreachable("Unknown shift intrinsic"); - case Intrinsic::arm64_neon_sqshl: - Opcode = ARM64ISD::SQSHL_I; + case Intrinsic::aarch64_neon_sqshl: + Opcode = AArch64ISD::SQSHL_I; IsRightShift = false; break; - case Intrinsic::arm64_neon_uqshl: - Opcode = ARM64ISD::UQSHL_I; + case Intrinsic::aarch64_neon_uqshl: + Opcode = AArch64ISD::UQSHL_I; IsRightShift = false; break; - case Intrinsic::arm64_neon_srshl: - Opcode = ARM64ISD::SRSHR_I; + case Intrinsic::aarch64_neon_srshl: + Opcode = AArch64ISD::SRSHR_I; IsRightShift = true; break; - case Intrinsic::arm64_neon_urshl: - Opcode = ARM64ISD::URSHR_I; + case Intrinsic::aarch64_neon_urshl: + Opcode = AArch64ISD::URSHR_I; IsRightShift = true; break; - case Intrinsic::arm64_neon_sqshlu: - Opcode = ARM64ISD::SQSHLU_I; + case Intrinsic::aarch64_neon_sqshlu: + Opcode = AArch64ISD::SQSHLU_I; IsRightShift = false; break; } @@ -7001,38 +7034,38 @@ static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) { static SDValue performIntrinsicCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, - const ARM64Subtarget *Subtarget) { + const AArch64Subtarget *Subtarget) { SelectionDAG &DAG = DCI.DAG; unsigned IID = getIntrinsicID(N); switch (IID) { default: break; - case Intrinsic::arm64_neon_vcvtfxs2fp: - case Intrinsic::arm64_neon_vcvtfxu2fp: + case Intrinsic::aarch64_neon_vcvtfxs2fp: + case Intrinsic::aarch64_neon_vcvtfxu2fp: return tryCombineFixedPointConvert(N, DCI, DAG); break; - case Intrinsic::arm64_neon_fmax: - return DAG.getNode(ARM64ISD::FMAX, SDLoc(N), N->getValueType(0), + case Intrinsic::aarch64_neon_fmax: + return DAG.getNode(AArch64ISD::FMAX, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2)); - case Intrinsic::arm64_neon_fmin: - return DAG.getNode(ARM64ISD::FMIN, SDLoc(N), N->getValueType(0), + case Intrinsic::aarch64_neon_fmin: + return DAG.getNode(AArch64ISD::FMIN, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2)); - case Intrinsic::arm64_neon_smull: - case Intrinsic::arm64_neon_umull: - case Intrinsic::arm64_neon_pmull: - case Intrinsic::arm64_neon_sqdmull: + case Intrinsic::aarch64_neon_smull: + case Intrinsic::aarch64_neon_umull: + case Intrinsic::aarch64_neon_pmull: + case Intrinsic::aarch64_neon_sqdmull: return tryCombineLongOpWithDup(IID, N, DCI, DAG); - case Intrinsic::arm64_neon_sqshl: - case Intrinsic::arm64_neon_uqshl: - case Intrinsic::arm64_neon_sqshlu: - case Intrinsic::arm64_neon_srshl: - case Intrinsic::arm64_neon_urshl: + case Intrinsic::aarch64_neon_sqshl: + case Intrinsic::aarch64_neon_uqshl: + case Intrinsic::aarch64_neon_sqshlu: + case Intrinsic::aarch64_neon_srshl: + case Intrinsic::aarch64_neon_urshl: return tryCombineShiftImm(IID, N, DAG); - case Intrinsic::arm64_crc32b: - case Intrinsic::arm64_crc32cb: + case Intrinsic::aarch64_crc32b: + case Intrinsic::aarch64_crc32cb: return tryCombineCRC32(0xff, N, DAG); - case Intrinsic::arm64_crc32h: - case Intrinsic::arm64_crc32ch: + case Intrinsic::aarch64_crc32h: + case Intrinsic::aarch64_crc32ch: return tryCombineCRC32(0xffff, N, DAG); } return SDValue(); @@ -7049,8 +7082,8 @@ static SDValue performExtendCombine(SDNode *N, N->getOperand(0).getOpcode() == ISD::INTRINSIC_WO_CHAIN) { SDNode *ABDNode = N->getOperand(0).getNode(); unsigned IID = getIntrinsicID(ABDNode); - if (IID == Intrinsic::arm64_neon_sabd || - IID == Intrinsic::arm64_neon_uabd) { + if (IID == Intrinsic::aarch64_neon_sabd || + IID == Intrinsic::aarch64_neon_uabd) { SDValue NewABD = tryCombineLongOpWithDup(IID, ABDNode, DCI, DAG); if (!NewABD.getNode()) return SDValue(); @@ -7060,7 +7093,7 @@ static SDValue performExtendCombine(SDNode *N, } } - // This is effectively a custom type legalization for ARM64. + // This is effectively a custom type legalization for AArch64. // // Type legalization will split an extend of a small, legal, type to a larger // illegal type by first splitting the destination type, often creating @@ -7074,7 +7107,7 @@ static SDValue performExtendCombine(SDNode *N, // %hi = v4i32 sext v4i8 %hisrc // Things go rapidly downhill from there. // - // For ARM64, the [sz]ext vector instructions can only go up one element + // For AArch64, the [sz]ext vector instructions can only go up one element // size, so we can, e.g., extend from i8 to i16, but to go from i8 to i32 // take two instructions. // @@ -7199,7 +7232,7 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode *St) { static SDValue performSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, - const ARM64Subtarget *Subtarget) { + const AArch64Subtarget *Subtarget) { if (!DCI.isBeforeLegalize()) return SDValue(); @@ -7322,7 +7355,7 @@ static SDValue performPostLD1Combine(SDNode *N, unsigned NumBytes = VT.getScalarSizeInBits() / 8; if (IncVal != NumBytes) continue; - Inc = DAG.getRegister(ARM64::XZR, MVT::i64); + Inc = DAG.getRegister(AArch64::XZR, MVT::i64); } SmallVector Ops; @@ -7336,7 +7369,7 @@ static SDValue performPostLD1Combine(SDNode *N, EVT Tys[3] = { VT, MVT::i64, MVT::Other }; SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, 3)); - unsigned NewOp = IsLaneOp ? ARM64ISD::LD1LANEpost : ARM64ISD::LD1DUPpost; + unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost; SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops, MemVT, LoadSDN->getMemOperand()); @@ -7387,47 +7420,47 @@ static SDValue performNEONPostLDSTCombine(SDNode *N, unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); switch (IntNo) { default: llvm_unreachable("unexpected intrinsic for Neon base update"); - case Intrinsic::arm64_neon_ld2: NewOpc = ARM64ISD::LD2post; + case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post; NumVecs = 2; break; - case Intrinsic::arm64_neon_ld3: NewOpc = ARM64ISD::LD3post; + case Intrinsic::aarch64_neon_ld3: NewOpc = AArch64ISD::LD3post; NumVecs = 3; break; - case Intrinsic::arm64_neon_ld4: NewOpc = ARM64ISD::LD4post; + case Intrinsic::aarch64_neon_ld4: NewOpc = AArch64ISD::LD4post; NumVecs = 4; break; - case Intrinsic::arm64_neon_st2: NewOpc = ARM64ISD::ST2post; + case Intrinsic::aarch64_neon_st2: NewOpc = AArch64ISD::ST2post; NumVecs = 2; IsStore = true; break; - case Intrinsic::arm64_neon_st3: NewOpc = ARM64ISD::ST3post; + case Intrinsic::aarch64_neon_st3: NewOpc = AArch64ISD::ST3post; NumVecs = 3; IsStore = true; break; - case Intrinsic::arm64_neon_st4: NewOpc = ARM64ISD::ST4post; + case Intrinsic::aarch64_neon_st4: NewOpc = AArch64ISD::ST4post; NumVecs = 4; IsStore = true; break; - case Intrinsic::arm64_neon_ld1x2: NewOpc = ARM64ISD::LD1x2post; + case Intrinsic::aarch64_neon_ld1x2: NewOpc = AArch64ISD::LD1x2post; NumVecs = 2; break; - case Intrinsic::arm64_neon_ld1x3: NewOpc = ARM64ISD::LD1x3post; + case Intrinsic::aarch64_neon_ld1x3: NewOpc = AArch64ISD::LD1x3post; NumVecs = 3; break; - case Intrinsic::arm64_neon_ld1x4: NewOpc = ARM64ISD::LD1x4post; + case Intrinsic::aarch64_neon_ld1x4: NewOpc = AArch64ISD::LD1x4post; NumVecs = 4; break; - case Intrinsic::arm64_neon_st1x2: NewOpc = ARM64ISD::ST1x2post; + case Intrinsic::aarch64_neon_st1x2: NewOpc = AArch64ISD::ST1x2post; NumVecs = 2; IsStore = true; break; - case Intrinsic::arm64_neon_st1x3: NewOpc = ARM64ISD::ST1x3post; + case Intrinsic::aarch64_neon_st1x3: NewOpc = AArch64ISD::ST1x3post; NumVecs = 3; IsStore = true; break; - case Intrinsic::arm64_neon_st1x4: NewOpc = ARM64ISD::ST1x4post; + case Intrinsic::aarch64_neon_st1x4: NewOpc = AArch64ISD::ST1x4post; NumVecs = 4; IsStore = true; break; - case Intrinsic::arm64_neon_ld2r: NewOpc = ARM64ISD::LD2DUPpost; + case Intrinsic::aarch64_neon_ld2r: NewOpc = AArch64ISD::LD2DUPpost; NumVecs = 2; IsDupOp = true; break; - case Intrinsic::arm64_neon_ld3r: NewOpc = ARM64ISD::LD3DUPpost; + case Intrinsic::aarch64_neon_ld3r: NewOpc = AArch64ISD::LD3DUPpost; NumVecs = 3; IsDupOp = true; break; - case Intrinsic::arm64_neon_ld4r: NewOpc = ARM64ISD::LD4DUPpost; + case Intrinsic::aarch64_neon_ld4r: NewOpc = AArch64ISD::LD4DUPpost; NumVecs = 4; IsDupOp = true; break; - case Intrinsic::arm64_neon_ld2lane: NewOpc = ARM64ISD::LD2LANEpost; + case Intrinsic::aarch64_neon_ld2lane: NewOpc = AArch64ISD::LD2LANEpost; NumVecs = 2; IsLaneOp = true; break; - case Intrinsic::arm64_neon_ld3lane: NewOpc = ARM64ISD::LD3LANEpost; + case Intrinsic::aarch64_neon_ld3lane: NewOpc = AArch64ISD::LD3LANEpost; NumVecs = 3; IsLaneOp = true; break; - case Intrinsic::arm64_neon_ld4lane: NewOpc = ARM64ISD::LD4LANEpost; + case Intrinsic::aarch64_neon_ld4lane: NewOpc = AArch64ISD::LD4LANEpost; NumVecs = 4; IsLaneOp = true; break; - case Intrinsic::arm64_neon_st2lane: NewOpc = ARM64ISD::ST2LANEpost; + case Intrinsic::aarch64_neon_st2lane: NewOpc = AArch64ISD::ST2LANEpost; NumVecs = 2; IsStore = true; IsLaneOp = true; break; - case Intrinsic::arm64_neon_st3lane: NewOpc = ARM64ISD::ST3LANEpost; + case Intrinsic::aarch64_neon_st3lane: NewOpc = AArch64ISD::ST3LANEpost; NumVecs = 3; IsStore = true; IsLaneOp = true; break; - case Intrinsic::arm64_neon_st4lane: NewOpc = ARM64ISD::ST4LANEpost; + case Intrinsic::aarch64_neon_st4lane: NewOpc = AArch64ISD::ST4LANEpost; NumVecs = 4; IsStore = true; IsLaneOp = true; break; } @@ -7446,7 +7479,7 @@ static SDValue performNEONPostLDSTCombine(SDNode *N, NumBytes /= VecTy.getVectorNumElements(); if (IncVal != NumBytes) continue; - Inc = DAG.getRegister(ARM64::XZR, MVT::i64); + Inc = DAG.getRegister(AArch64::XZR, MVT::i64); } SmallVector Ops; Ops.push_back(N->getOperand(0)); // Incoming chain @@ -7497,11 +7530,11 @@ static SDValue performBRCONDCombine(SDNode *N, assert(isa(CCVal) && "Expected a ConstantSDNode here!"); unsigned CC = cast(CCVal)->getZExtValue(); - if (CC != ARM64CC::EQ && CC != ARM64CC::NE) + if (CC != AArch64CC::EQ && CC != AArch64CC::NE) return SDValue(); unsigned CmpOpc = Cmp.getOpcode(); - if (CmpOpc != ARM64ISD::ADDS && CmpOpc != ARM64ISD::SUBS) + if (CmpOpc != AArch64ISD::ADDS && CmpOpc != AArch64ISD::SUBS) return SDValue(); // Only attempt folding if there is only one use of the flag and no use of the @@ -7529,10 +7562,10 @@ static SDValue performBRCONDCombine(SDNode *N, // Fold the compare into the branch instruction. SDValue BR; - if (CC == ARM64CC::EQ) - BR = DAG.getNode(ARM64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest); + if (CC == AArch64CC::EQ) + BR = DAG.getNode(AArch64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest); else - BR = DAG.getNode(ARM64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest); + BR = DAG.getNode(AArch64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest); // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, BR, false); @@ -7608,8 +7641,8 @@ static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) { return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2)); } -SDValue ARM64TargetLowering::PerformDAGCombine(SDNode *N, - DAGCombinerInfo &DCI) const { +SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; switch (N->getOpcode()) { default: @@ -7642,36 +7675,36 @@ SDValue ARM64TargetLowering::PerformDAGCombine(SDNode *N, return performVSelectCombine(N, DCI.DAG); case ISD::STORE: return performSTORECombine(N, DCI, DAG, Subtarget); - case ARM64ISD::BRCOND: + case AArch64ISD::BRCOND: return performBRCONDCombine(N, DCI, DAG); - case ARM64ISD::DUP: + case AArch64ISD::DUP: return performPostLD1Combine(N, DCI, false); case ISD::INSERT_VECTOR_ELT: return performPostLD1Combine(N, DCI, true); case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: switch (cast(N->getOperand(1))->getZExtValue()) { - case Intrinsic::arm64_neon_ld2: - case Intrinsic::arm64_neon_ld3: - case Intrinsic::arm64_neon_ld4: - case Intrinsic::arm64_neon_ld1x2: - case Intrinsic::arm64_neon_ld1x3: - case Intrinsic::arm64_neon_ld1x4: - case Intrinsic::arm64_neon_ld2lane: - case Intrinsic::arm64_neon_ld3lane: - case Intrinsic::arm64_neon_ld4lane: - case Intrinsic::arm64_neon_ld2r: - case Intrinsic::arm64_neon_ld3r: - case Intrinsic::arm64_neon_ld4r: - case Intrinsic::arm64_neon_st2: - case Intrinsic::arm64_neon_st3: - case Intrinsic::arm64_neon_st4: - case Intrinsic::arm64_neon_st1x2: - case Intrinsic::arm64_neon_st1x3: - case Intrinsic::arm64_neon_st1x4: - case Intrinsic::arm64_neon_st2lane: - case Intrinsic::arm64_neon_st3lane: - case Intrinsic::arm64_neon_st4lane: + case Intrinsic::aarch64_neon_ld2: + case Intrinsic::aarch64_neon_ld3: + case Intrinsic::aarch64_neon_ld4: + case Intrinsic::aarch64_neon_ld1x2: + case Intrinsic::aarch64_neon_ld1x3: + case Intrinsic::aarch64_neon_ld1x4: + case Intrinsic::aarch64_neon_ld2lane: + case Intrinsic::aarch64_neon_ld3lane: + case Intrinsic::aarch64_neon_ld4lane: + case Intrinsic::aarch64_neon_ld2r: + case Intrinsic::aarch64_neon_ld3r: + case Intrinsic::aarch64_neon_ld4r: + case Intrinsic::aarch64_neon_st2: + case Intrinsic::aarch64_neon_st3: + case Intrinsic::aarch64_neon_st4: + case Intrinsic::aarch64_neon_st1x2: + case Intrinsic::aarch64_neon_st1x3: + case Intrinsic::aarch64_neon_st1x4: + case Intrinsic::aarch64_neon_st2lane: + case Intrinsic::aarch64_neon_st3lane: + case Intrinsic::aarch64_neon_st4lane: return performNEONPostLDSTCombine(N, DCI, DAG); default: break; @@ -7684,7 +7717,8 @@ SDValue ARM64TargetLowering::PerformDAGCombine(SDNode *N, // we can't perform a tail-call. In particular, we need to check for // target ISD nodes that are returns and any other "odd" constructs // that the generic analysis code won't necessarily catch. -bool ARM64TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { +bool AArch64TargetLowering::isUsedByReturnOnly(SDNode *N, + SDValue &Chain) const { if (N->getNumValues() != 1) return false; if (!N->hasNUsesOfValue(1, 0)) @@ -7704,7 +7738,7 @@ bool ARM64TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { bool HasRet = false; for (SDNode *Node : Copy->uses()) { - if (Node->getOpcode() != ARM64ISD::RET_FLAG) + if (Node->getOpcode() != AArch64ISD::RET_FLAG) return false; HasRet = true; } @@ -7720,18 +7754,18 @@ bool ARM64TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { // call. This will cause the optimizers to attempt to move, or duplicate, // return instructions to help enable tail call optimizations for this // instruction. -bool ARM64TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { +bool AArch64TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { if (!CI->isTailCall()) return false; return true; } -bool ARM64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base, - SDValue &Offset, - ISD::MemIndexedMode &AM, - bool &IsInc, - SelectionDAG &DAG) const { +bool AArch64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + bool &IsInc, + SelectionDAG &DAG) const { if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) return false; @@ -7749,10 +7783,10 @@ bool ARM64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base, return false; } -bool ARM64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, - SDValue &Offset, - ISD::MemIndexedMode &AM, - SelectionDAG &DAG) const { +bool AArch64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const { EVT VT; SDValue Ptr; if (LoadSDNode *LD = dyn_cast(N)) { @@ -7771,11 +7805,9 @@ bool ARM64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, return true; } -bool ARM64TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, - SDValue &Base, - SDValue &Offset, - ISD::MemIndexedMode &AM, - SelectionDAG &DAG) const { +bool AArch64TargetLowering::getPostIndexedAddressParts( + SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, + ISD::MemIndexedMode &AM, SelectionDAG &DAG) const { EVT VT; SDValue Ptr; if (LoadSDNode *LD = dyn_cast(N)) { @@ -7798,9 +7830,8 @@ bool ARM64TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, return true; } -void ARM64TargetLowering::ReplaceNodeResults(SDNode *N, - SmallVectorImpl &Results, - SelectionDAG &DAG) const { +void AArch64TargetLowering::ReplaceNodeResults( + SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to custom expand this"); @@ -7812,7 +7843,7 @@ void ARM64TargetLowering::ReplaceNodeResults(SDNode *N, } } -bool ARM64TargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const { +bool AArch64TargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const { // Loads and stores less than 128-bits are already atomic; ones above that // are doomed anyway, so defer to the default libcall and blame the OS when // things go wrong: @@ -7825,8 +7856,8 @@ bool ARM64TargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const { return Inst->getType()->getPrimitiveSizeInBits() <= 128; } -Value *ARM64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, - AtomicOrdering Ord) const { +Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, + AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Type *ValTy = cast(Addr->getType())->getElementType(); bool IsAcquire = @@ -7837,7 +7868,7 @@ Value *ARM64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, // single i128 here. if (ValTy->getPrimitiveSizeInBits() == 128) { Intrinsic::ID Int = - IsAcquire ? Intrinsic::arm64_ldaxp : Intrinsic::arm64_ldxp; + IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp; Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int); Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); @@ -7853,7 +7884,7 @@ Value *ARM64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, Type *Tys[] = { Addr->getType() }; Intrinsic::ID Int = - IsAcquire ? Intrinsic::arm64_ldaxr : Intrinsic::arm64_ldxr; + IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr; Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int, Tys); return Builder.CreateTruncOrBitCast( @@ -7861,9 +7892,9 @@ Value *ARM64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, cast(Addr->getType())->getElementType()); } -Value *ARM64TargetLowering::emitStoreConditional(IRBuilder<> &Builder, - Value *Val, Value *Addr, - AtomicOrdering Ord) const { +Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder, + Value *Val, Value *Addr, + AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); bool IsRelease = Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent; @@ -7873,7 +7904,7 @@ Value *ARM64TargetLowering::emitStoreConditional(IRBuilder<> &Builder, // before the call. if (Val->getType()->getPrimitiveSizeInBits() == 128) { Intrinsic::ID Int = - IsRelease ? Intrinsic::arm64_stlxp : Intrinsic::arm64_stxp; + IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp; Function *Stxr = Intrinsic::getDeclaration(M, Int); Type *Int64Ty = Type::getInt64Ty(M->getContext()); @@ -7884,7 +7915,7 @@ Value *ARM64TargetLowering::emitStoreConditional(IRBuilder<> &Builder, } Intrinsic::ID Int = - IsRelease ? Intrinsic::arm64_stlxr : Intrinsic::arm64_stxr; + IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr; Type *Tys[] = { Addr->getType() }; Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys); diff --git a/lib/Target/ARM64/ARM64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h similarity index 96% rename from lib/Target/ARM64/ARM64ISelLowering.h rename to lib/Target/AArch64/AArch64ISelLowering.h index b2402c9791c7..de16c4d9d4b8 100644 --- a/lib/Target/ARM64/ARM64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -1,4 +1,4 @@ -//==-- ARM64ISelLowering.h - ARM64 DAG Lowering Interface --------*- C++ -*-==// +//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -7,13 +7,13 @@ // //===----------------------------------------------------------------------===// // -// This file defines the interfaces that ARM64 uses to lower LLVM code into a +// This file defines the interfaces that AArch64 uses to lower LLVM code into a // selection DAG. // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_ARM64_ISELLOWERING_H -#define LLVM_TARGET_ARM64_ISELLOWERING_H +#ifndef LLVM_TARGET_AArch64_ISELLOWERING_H +#define LLVM_TARGET_AArch64_ISELLOWERING_H #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -22,7 +22,7 @@ namespace llvm { -namespace ARM64ISD { +namespace AArch64ISD { enum { FIRST_NUMBER = ISD::BUILTIN_OP_END, @@ -188,16 +188,16 @@ enum { ST4LANEpost }; -} // end namespace ARM64ISD +} // end namespace AArch64ISD -class ARM64Subtarget; -class ARM64TargetMachine; +class AArch64Subtarget; +class AArch64TargetMachine; -class ARM64TargetLowering : public TargetLowering { +class AArch64TargetLowering : public TargetLowering { bool RequireStrictAlign; public: - explicit ARM64TargetLowering(ARM64TargetMachine &TM); + explicit AArch64TargetLowering(AArch64TargetMachine &TM); /// Selects the correct CCAssignFn for a the given CallingConvention /// value. @@ -325,9 +325,9 @@ class ARM64TargetLowering : public TargetLowering { bool shouldExpandAtomicInIR(Instruction *Inst) const override; private: - /// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can + /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can /// make the right decision when generating code for different targets. - const ARM64Subtarget *Subtarget; + const AArch64Subtarget *Subtarget; void addTypeForNEON(EVT VT, EVT PromotedBitwiseVT); void addDRTypeForNEON(MVT VT); @@ -454,11 +454,11 @@ class ARM64TargetLowering : public TargetLowering { SelectionDAG &DAG) const override; }; -namespace ARM64 { +namespace AArch64 { FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo); -} // end namespace ARM64 +} // end namespace AArch64 } // end namespace llvm -#endif // LLVM_TARGET_ARM64_ISELLOWERING_H +#endif // LLVM_TARGET_AArch64_ISELLOWERING_H diff --git a/lib/Target/ARM64/ARM64InstrAtomics.td b/lib/Target/AArch64/AArch64InstrAtomics.td similarity index 92% rename from lib/Target/ARM64/ARM64InstrAtomics.td rename to lib/Target/AArch64/AArch64InstrAtomics.td index 1d1483ac126c..3b9e3c630596 100644 --- a/lib/Target/ARM64/ARM64InstrAtomics.td +++ b/lib/Target/AArch64/AArch64InstrAtomics.td @@ -1,4 +1,4 @@ -//===- ARM64InstrAtomics.td - ARM64 Atomic codegen support -*- tablegen -*-===// +//=- AArch64InstrAtomics.td - AArch64 Atomic codegen support -*- tablegen -*-=// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// ARM64 Atomic operand code-gen constructs. +// AArch64 Atomic operand code-gen constructs. // //===----------------------------------------------------------------------===// @@ -117,7 +117,7 @@ class releasing_store return Ordering == Release || Ordering == SequentiallyConsistent; }]>; -// An atomic store operation that doesn't actually need to be atomic on ARM64. +// An atomic store operation that doesn't actually need to be atomic on AArch64. class relaxed_store : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ AtomicOrdering Ordering = cast(N)->getOrdering(); @@ -202,19 +202,19 @@ def : Pat<(relaxed_store // Load-exclusives. -def ldxr_1 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{ +def ldxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i8; }]>; -def ldxr_2 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{ +def ldxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i16; }]>; -def ldxr_4 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{ +def ldxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i32; }]>; -def ldxr_8 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{ +def ldxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i64; }]>; @@ -235,19 +235,19 @@ def : Pat<(and (ldxr_4 GPR64sp:$addr), 0xffffffff), // Load-exclusives. -def ldaxr_1 : PatFrag<(ops node:$ptr), (int_arm64_ldaxr node:$ptr), [{ +def ldaxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i8; }]>; -def ldaxr_2 : PatFrag<(ops node:$ptr), (int_arm64_ldaxr node:$ptr), [{ +def ldaxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i16; }]>; -def ldaxr_4 : PatFrag<(ops node:$ptr), (int_arm64_ldaxr node:$ptr), [{ +def ldaxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i32; }]>; -def ldaxr_8 : PatFrag<(ops node:$ptr), (int_arm64_ldaxr node:$ptr), [{ +def ldaxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i64; }]>; @@ -269,22 +269,22 @@ def : Pat<(and (ldaxr_4 GPR64sp:$addr), 0xffffffff), // Store-exclusives. def stxr_1 : PatFrag<(ops node:$val, node:$ptr), - (int_arm64_stxr node:$val, node:$ptr), [{ + (int_aarch64_stxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i8; }]>; def stxr_2 : PatFrag<(ops node:$val, node:$ptr), - (int_arm64_stxr node:$val, node:$ptr), [{ + (int_aarch64_stxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i16; }]>; def stxr_4 : PatFrag<(ops node:$val, node:$ptr), - (int_arm64_stxr node:$val, node:$ptr), [{ + (int_aarch64_stxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i32; }]>; def stxr_8 : PatFrag<(ops node:$val, node:$ptr), - (int_arm64_stxr node:$val, node:$ptr), [{ + (int_aarch64_stxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i64; }]>; @@ -315,22 +315,22 @@ def : Pat<(stxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr), // Store-release-exclusives. def stlxr_1 : PatFrag<(ops node:$val, node:$ptr), - (int_arm64_stlxr node:$val, node:$ptr), [{ + (int_aarch64_stlxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i8; }]>; def stlxr_2 : PatFrag<(ops node:$val, node:$ptr), - (int_arm64_stlxr node:$val, node:$ptr), [{ + (int_aarch64_stlxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i16; }]>; def stlxr_4 : PatFrag<(ops node:$val, node:$ptr), - (int_arm64_stlxr node:$val, node:$ptr), [{ + (int_aarch64_stlxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i32; }]>; def stlxr_8 : PatFrag<(ops node:$val, node:$ptr), - (int_arm64_stlxr node:$val, node:$ptr), [{ + (int_aarch64_stlxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i64; }]>; @@ -361,4 +361,4 @@ def : Pat<(stlxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr), // And clear exclusive. -def : Pat<(int_arm64_clrex), (CLREX 0xf)>; +def : Pat<(int_aarch64_clrex), (CLREX 0xf)>; diff --git a/lib/Target/ARM64/ARM64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td similarity index 98% rename from lib/Target/ARM64/ARM64InstrFormats.td rename to lib/Target/AArch64/AArch64InstrFormats.td index ea45b3d4fb22..d455d7e45e06 100644 --- a/lib/Target/ARM64/ARM64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -1,4 +1,4 @@ -//===- ARM64InstrFormats.td - ARM64 Instruction Formats ------*- tblgen -*-===// +//===- AArch64InstrFormats.td - AArch64 Instruction Formats --*- tblgen -*-===// // // The LLVM Compiler Infrastructure // @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Describe ARM64 instructions format here +// Describe AArch64 instructions format here // // Format specifies the encoding used by the instruction. This is part of the @@ -21,8 +21,8 @@ class Format val> { def PseudoFrm : Format<0>; def NormalFrm : Format<1>; // Do we need any others? -// ARM64 Instruction Format -class ARM64Inst : Instruction { +// AArch64 Instruction Format +class AArch64Inst : Instruction { field bits<32> Inst; // Instruction encoding. // Mask of bits that cause an encoding to be UNPREDICTABLE. // If a bit is set, then if the corresponding bit in the @@ -32,7 +32,7 @@ class ARM64Inst : Instruction { // SoftFail is the generic name for this field, but we alias it so // as to make it more obvious what it means in ARM-land. field bits<32> SoftFail = Unpredictable; - let Namespace = "ARM64"; + let Namespace = "AArch64"; Format F = f; bits<2> Form = F.Value; let Pattern = []; @@ -41,7 +41,7 @@ class ARM64Inst : Instruction { // Pseudo instructions (don't have encoding information) class Pseudo pattern, string cstr = ""> - : ARM64Inst { + : AArch64Inst { dag OutOperandList = oops; dag InOperandList = iops; let Pattern = pattern; @@ -49,7 +49,7 @@ class Pseudo pattern, string cstr = ""> } // Real instructions (have encoding information) -class EncodedI pattern> : ARM64Inst { +class EncodedI pattern> : AArch64Inst { let Pattern = pattern; let Size = 4; } @@ -440,11 +440,11 @@ def vecshiftL64 : Operand, ImmLeafgetZExtValue(), 32); + uint64_t enc = AArch64_AM::encodeLogicalImmediate(N->getZExtValue(), 32); return CurDAG->getTargetConstant(enc, MVT::i32); }]>; def logical_imm64_XFORM : SDNodeXFormgetZExtValue(), 64); + uint64_t enc = AArch64_AM::encodeLogicalImmediate(N->getZExtValue(), 64); return CurDAG->getTargetConstant(enc, MVT::i32); }]>; @@ -457,13 +457,13 @@ def LogicalImm64Operand : AsmOperandClass { let DiagnosticType = "LogicalSecondSource"; } def logical_imm32 : Operand, PatLeaf<(imm), [{ - return ARM64_AM::isLogicalImmediate(N->getZExtValue(), 32); + return AArch64_AM::isLogicalImmediate(N->getZExtValue(), 32); }], logical_imm32_XFORM> { let PrintMethod = "printLogicalImm32"; let ParserMatchClass = LogicalImm32Operand; } def logical_imm64 : Operand, PatLeaf<(imm), [{ - return ARM64_AM::isLogicalImmediate(N->getZExtValue(), 64); + return AArch64_AM::isLogicalImmediate(N->getZExtValue(), 64); }], logical_imm64_XFORM> { let PrintMethod = "printLogicalImm64"; let ParserMatchClass = LogicalImm64Operand; @@ -661,10 +661,10 @@ class arith_extended_reg32to64 : Operand, // Floating-point immediate. def fpimm32 : Operand, PatLeaf<(f32 fpimm), [{ - return ARM64_AM::getFP32Imm(N->getValueAPF()) != -1; + return AArch64_AM::getFP32Imm(N->getValueAPF()) != -1; }], SDNodeXFormgetValueAPF(); - uint32_t enc = ARM64_AM::getFP32Imm(InVal); + uint32_t enc = AArch64_AM::getFP32Imm(InVal); return CurDAG->getTargetConstant(enc, MVT::i32); }]>> { let ParserMatchClass = FPImmOperand; @@ -672,10 +672,10 @@ def fpimm32 : Operand, } def fpimm64 : Operand, PatLeaf<(f64 fpimm), [{ - return ARM64_AM::getFP64Imm(N->getValueAPF()) != -1; + return AArch64_AM::getFP64Imm(N->getValueAPF()) != -1; }], SDNodeXFormgetValueAPF(); - uint32_t enc = ARM64_AM::getFP64Imm(InVal); + uint32_t enc = AArch64_AM::getFP64Imm(InVal); return CurDAG->getTargetConstant(enc, MVT::i32); }]>> { let ParserMatchClass = FPImmOperand; @@ -743,12 +743,12 @@ def VectorIndexD : Operand, ImmLeaf, PatLeaf<(f64 fpimm), [{ - return ARM64_AM::isAdvSIMDModImmType10(N->getValueAPF() + return AArch64_AM::isAdvSIMDModImmType10(N->getValueAPF() .bitcastToAPInt() .getZExtValue()); }], SDNodeXFormgetValueAPF(); - uint32_t enc = ARM64_AM::encodeAdvSIMDModImmType10(N->getValueAPF() + uint32_t enc = AArch64_AM::encodeAdvSIMDModImmType10(N->getValueAPF() .bitcastToAPInt() .getZExtValue()); return CurDAG->getTargetConstant(enc, MVT::i32); @@ -982,7 +982,7 @@ def am_brcond : Operand { class BranchCond : I<(outs), (ins ccode:$cond, am_brcond:$target), "b", ".$cond\t$target", "", - [(ARM64brcond bb:$target, imm:$cond, NZCV)]>, + [(AArch64brcond bb:$target, imm:$cond, NZCV)]>, Sched<[WriteBr]> { let isBranch = 1; let isTerminator = 1; @@ -1759,7 +1759,7 @@ multiclass AddSubS { //--- def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisPtrTy<3>]>; -def ARM64Extr : SDNode<"ARM64ISD::EXTR", SDTA64EXTR>; +def AArch64Extr : SDNode<"AArch64ISD::EXTR", SDTA64EXTR>; class BaseExtractImm patterns> @@ -1782,7 +1782,7 @@ class BaseExtractImm { def Wrri : BaseExtractImm { + (AArch64Extr GPR32:$Rn, GPR32:$Rm, imm0_31:$imm))]> { let Inst{31} = 0; let Inst{22} = 0; // imm<5> must be zero. @@ -1790,7 +1790,7 @@ multiclass ExtractImm { } def Xrri : BaseExtractImm { + (AArch64Extr GPR64:$Rn, GPR64:$Rm, imm0_63:$imm))]> { let Inst{31} = 1; let Inst{22} = 1; @@ -2081,7 +2081,7 @@ class BaseCondSelect op2, RegisterClass regtype, string asm> : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond), asm, "\t$Rd, $Rn, $Rm, $cond", "", [(set regtype:$Rd, - (ARM64csel regtype:$Rn, regtype:$Rm, (i32 imm:$cond), NZCV))]>, + (AArch64csel regtype:$Rn, regtype:$Rm, (i32 imm:$cond), NZCV))]>, Sched<[WriteI, ReadI, ReadI]> { let Uses = [NZCV]; @@ -2113,7 +2113,7 @@ class BaseCondSelectOp op2, RegisterClass regtype, string asm, : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond), asm, "\t$Rd, $Rn, $Rm, $cond", "", [(set regtype:$Rd, - (ARM64csel regtype:$Rn, (frag regtype:$Rm), + (AArch64csel regtype:$Rn, (frag regtype:$Rm), (i32 imm:$cond), NZCV))]>, Sched<[WriteI, ReadI, ReadI]> { let Uses = [NZCV]; @@ -2133,8 +2133,8 @@ class BaseCondSelectOp op2, RegisterClass regtype, string asm, } def inv_cond_XFORM : SDNodeXForm(N->getZExtValue()); - return CurDAG->getTargetConstant(ARM64CC::getInvertedCondCode(CC), MVT::i32); + AArch64CC::CondCode CC = static_cast(N->getZExtValue()); + return CurDAG->getTargetConstant(AArch64CC::getInvertedCondCode(CC), MVT::i32); }]>; multiclass CondSelectOp op2, string asm, PatFrag frag> { @@ -2145,11 +2145,11 @@ multiclass CondSelectOp op2, string asm, PatFrag frag> { let Inst{31} = 1; } - def : Pat<(ARM64csel (frag GPR32:$Rm), GPR32:$Rn, (i32 imm:$cond), NZCV), + def : Pat<(AArch64csel (frag GPR32:$Rm), GPR32:$Rn, (i32 imm:$cond), NZCV), (!cast(NAME # Wr) GPR32:$Rn, GPR32:$Rm, (inv_cond_XFORM imm:$cond))>; - def : Pat<(ARM64csel (frag GPR64:$Rm), GPR64:$Rn, (i32 imm:$cond), NZCV), + def : Pat<(AArch64csel (frag GPR64:$Rm), GPR64:$Rn, (i32 imm:$cond), NZCV), (!cast(NAME # Xr) GPR64:$Rn, GPR64:$Rm, (inv_cond_XFORM imm:$cond))>; } @@ -2194,7 +2194,7 @@ class uimm12_scaled : Operand { let ParserMatchClass = !cast("UImm12OffsetScale" # Scale # "Operand"); let EncoderMethod - = "getLdStUImm12OpValue"; + = "getLdStUImm12OpValue"; let PrintMethod = "printUImm12Offset<" # Scale # ">"; } @@ -2782,7 +2782,7 @@ class BasePrefetchRO sz, bit V, bits<2> opc, dag outs, dag ins, multiclass PrefetchRO sz, bit V, bits<2> opc, string asm> { def roW : BasePrefetchRO { let Inst{13} = 0b0; @@ -2790,7 +2790,7 @@ multiclass PrefetchRO sz, bit V, bits<2> opc, string asm> { def roX : BasePrefetchRO { let Inst{13} = 0b1; @@ -3912,7 +3912,7 @@ class BaseFPCondSelect : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond), asm, "\t$Rd, $Rn, $Rm, $cond", "", [(set regtype:$Rd, - (ARM64csel (vt regtype:$Rn), regtype:$Rm, + (AArch64csel (vt regtype:$Rn), regtype:$Rm, (i32 imm:$cond), NZCV))]>, Sched<[WriteF]> { bits<5> Rd; @@ -5074,28 +5074,28 @@ multiclass SIMDLongThreeVectorSQDMLXTiedHS opc, string asm, asm, ".4s", ".4h", ".4h", [(set (v4i32 V128:$dst), (Accum (v4i32 V128:$Rd), - (v4i32 (int_arm64_neon_sqdmull (v4i16 V64:$Rn), + (v4i32 (int_aarch64_neon_sqdmull (v4i16 V64:$Rn), (v4i16 V64:$Rm)))))]>; def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied; def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied; def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied; } @@ -5140,7 +5140,7 @@ class BaseSIMDBitwiseExtract, + (AArch64ext regtype:$Rn, regtype:$Rm, (i32 imm:$imm)))]>, Sched<[WriteV]> { bits<5> Rd; bits<5> Rn; @@ -5409,7 +5409,7 @@ class BaseSIMDCmpTwoScalar size, bits<5> opcode, class SIMDInexactCvtTwoScalar opcode, string asm> : I<(outs FPR32:$Rd), (ins FPR64:$Rn), asm, "\t$Rd, $Rn", "", - [(set (f32 FPR32:$Rd), (int_arm64_sisd_fcvtxn (f64 FPR64:$Rn)))]>, + [(set (f32 FPR32:$Rd), (int_aarch64_sisd_fcvtxn (f64 FPR64:$Rn)))]>, Sched<[WriteV]> { bits<5> Rd; bits<5> Rn; @@ -5627,7 +5627,7 @@ class SIMDDupFromMain imm5, string size, ValueType vectype, : BaseSIMDInsDup { + [(set (vectype vecreg:$Rd), (AArch64dup regtype:$Rn))]> { let Inst{20-16} = imm5; let Inst{14-11} = 0b0001; } @@ -5646,7 +5646,7 @@ class SIMDDupFromElement { + VectorIndexD, i64, AArch64duplane64> { bits<1> idx; let Inst{20} = idx; let Inst{19-16} = 0b1000; @@ -5655,7 +5655,7 @@ class SIMDDup64FromElement class SIMDDup32FromElement : SIMDDupFromElement { + VectorIndexS, i64, AArch64duplane32> { bits<2> idx; let Inst{20-19} = idx; let Inst{18-16} = 0b100; @@ -5664,7 +5664,7 @@ class SIMDDup32FromElement : SIMDDupFromElement { + VectorIndexH, i64, AArch64duplane16> { bits<3> idx; let Inst{20-18} = idx; let Inst{17-16} = 0b10; @@ -5673,7 +5673,7 @@ class SIMDDup16FromElement : SIMDDupFromElement { + VectorIndexB, i64, AArch64duplane8> { bits<4> idx; let Inst{20-17} = idx; let Inst{16} = 1; @@ -6312,7 +6312,7 @@ multiclass SIMDFPIndexedSD opc, string asm, asm, ".2s", ".2s", ".2s", ".s", [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn), - (v2f32 (ARM64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> { + (v2f32 (AArch64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> { bits<2> idx; let Inst{11} = idx{1}; let Inst{21} = idx{0}; @@ -6324,7 +6324,7 @@ multiclass SIMDFPIndexedSD opc, string asm, asm, ".4s", ".4s", ".4s", ".s", [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn), - (v4f32 (ARM64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> { + (v4f32 (AArch64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> { bits<2> idx; let Inst{11} = idx{1}; let Inst{21} = idx{0}; @@ -6336,7 +6336,7 @@ multiclass SIMDFPIndexedSD opc, string asm, asm, ".2d", ".2d", ".2d", ".d", [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), - (v2f64 (ARM64duplane64 (v2f64 V128:$Rm), VectorIndexD:$idx))))]> { + (v2f64 (AArch64duplane64 (v2f64 V128:$Rm), VectorIndexD:$idx))))]> { bits<1> idx; let Inst{11} = idx{0}; let Inst{21} = 0; @@ -6370,35 +6370,35 @@ multiclass SIMDFPIndexedSD opc, string asm, multiclass SIMDFPIndexedSDTiedPatterns { // 2 variants for the .2s version: DUPLANE from 128-bit and DUP scalar. def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), - (ARM64duplane32 (v4f32 V128:$Rm), + (AArch64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))), (!cast(INST # v2i32_indexed) V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), - (ARM64dup (f32 FPR32Op:$Rm)))), + (AArch64dup (f32 FPR32Op:$Rm)))), (!cast(INST # "v2i32_indexed") V64:$Rd, V64:$Rn, (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; // 2 variants for the .4s version: DUPLANE from 128-bit and DUP scalar. def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), - (ARM64duplane32 (v4f32 V128:$Rm), + (AArch64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))), (!cast(INST # "v4i32_indexed") V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), - (ARM64dup (f32 FPR32Op:$Rm)))), + (AArch64dup (f32 FPR32Op:$Rm)))), (!cast(INST # "v4i32_indexed") V128:$Rd, V128:$Rn, (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; // 2 variants for the .2d version: DUPLANE from 128-bit and DUP scalar. def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), - (ARM64duplane64 (v2f64 V128:$Rm), + (AArch64duplane64 (v2f64 V128:$Rm), VectorIndexD:$idx))), (!cast(INST # "v2i64_indexed") V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), - (ARM64dup (f64 FPR64Op:$Rm)))), + (AArch64dup (f64 FPR64Op:$Rm)))), (!cast(INST # "v2i64_indexed") V128:$Rd, V128:$Rn, (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>; @@ -6471,7 +6471,7 @@ multiclass SIMDIndexedHS opc, string asm, asm, ".4h", ".4h", ".4h", ".h", [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), - (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { bits<3> idx; let Inst{11} = idx{2}; let Inst{21} = idx{1}; @@ -6484,7 +6484,7 @@ multiclass SIMDIndexedHS opc, string asm, asm, ".8h", ".8h", ".8h", ".h", [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), - (v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { bits<3> idx; let Inst{11} = idx{2}; let Inst{21} = idx{1}; @@ -6497,7 +6497,7 @@ multiclass SIMDIndexedHS opc, string asm, asm, ".2s", ".2s", ".2s", ".s", [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), - (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { bits<2> idx; let Inst{11} = idx{1}; let Inst{21} = idx{0}; @@ -6509,7 +6509,7 @@ multiclass SIMDIndexedHS opc, string asm, asm, ".4s", ".4s", ".4s", ".s", [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), - (v4i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { bits<2> idx; let Inst{11} = idx{1}; let Inst{21} = idx{0}; @@ -6545,7 +6545,7 @@ multiclass SIMDVectorIndexedHS opc, string asm, asm, ".4h", ".4h", ".4h", ".h", [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), - (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { bits<3> idx; let Inst{11} = idx{2}; let Inst{21} = idx{1}; @@ -6558,7 +6558,7 @@ multiclass SIMDVectorIndexedHS opc, string asm, asm, ".8h", ".8h", ".8h", ".h", [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), - (v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { bits<3> idx; let Inst{11} = idx{2}; let Inst{21} = idx{1}; @@ -6571,7 +6571,7 @@ multiclass SIMDVectorIndexedHS opc, string asm, asm, ".2s", ".2s", ".2s", ".s", [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), - (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { bits<2> idx; let Inst{11} = idx{1}; let Inst{21} = idx{0}; @@ -6583,7 +6583,7 @@ multiclass SIMDVectorIndexedHS opc, string asm, asm, ".4s", ".4s", ".4s", ".s", [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), - (v4i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { bits<2> idx; let Inst{11} = idx{1}; let Inst{21} = idx{0}; @@ -6597,7 +6597,7 @@ multiclass SIMDVectorIndexedHSTied opc, string asm, asm, ".4h", ".4h", ".4h", ".h", [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd),(v4i16 V64:$Rn), - (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { bits<3> idx; let Inst{11} = idx{2}; let Inst{21} = idx{1}; @@ -6610,7 +6610,7 @@ multiclass SIMDVectorIndexedHSTied opc, string asm, asm, ".8h", ".8h", ".8h", ".h", [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), - (v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { bits<3> idx; let Inst{11} = idx{2}; let Inst{21} = idx{1}; @@ -6623,7 +6623,7 @@ multiclass SIMDVectorIndexedHSTied opc, string asm, asm, ".2s", ".2s", ".2s", ".s", [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), - (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { bits<2> idx; let Inst{11} = idx{1}; let Inst{21} = idx{0}; @@ -6635,7 +6635,7 @@ multiclass SIMDVectorIndexedHSTied opc, string asm, asm, ".4s", ".4s", ".4s", ".s", [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), - (v4i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { bits<2> idx; let Inst{11} = idx{1}; let Inst{21} = idx{0}; @@ -6650,7 +6650,7 @@ multiclass SIMDIndexedLongSD opc, string asm, asm, ".4s", ".4s", ".4h", ".h", [(set (v4i32 V128:$Rd), (OpNode (v4i16 V64:$Rn), - (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { bits<3> idx; let Inst{11} = idx{2}; let Inst{21} = idx{1}; @@ -6663,7 +6663,7 @@ multiclass SIMDIndexedLongSD opc, string asm, asm#"2", ".4s", ".4s", ".8h", ".h", [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), + (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { bits<3> idx; @@ -6678,7 +6678,7 @@ multiclass SIMDIndexedLongSD opc, string asm, asm, ".2d", ".2d", ".2s", ".s", [(set (v2i64 V128:$Rd), (OpNode (v2i32 V64:$Rn), - (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { bits<2> idx; let Inst{11} = idx{1}; let Inst{21} = idx{0}; @@ -6690,7 +6690,7 @@ multiclass SIMDIndexedLongSD opc, string asm, asm#"2", ".2d", ".2d", ".4s", ".s", [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 (ARM64duplane32 (v4i32 V128:$Rm), + (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { bits<2> idx; let Inst{11} = idx{1}; @@ -6723,9 +6723,9 @@ multiclass SIMDIndexedLongSQDMLXSDTied opc, string asm, asm, ".4s", ".4s", ".4h", ".h", [(set (v4i32 V128:$dst), (Accum (v4i32 V128:$Rd), - (v4i32 (int_arm64_neon_sqdmull + (v4i32 (int_aarch64_neon_sqdmull (v4i16 V64:$Rn), - (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))))]> { bits<3> idx; let Inst{11} = idx{2}; @@ -6737,8 +6737,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied opc, string asm, // intermediate EXTRACT_SUBREG would be untyped. def : Pat<(i32 (Accum (i32 FPR32Op:$Rd), (i32 (vector_extract (v4i32 - (int_arm64_neon_sqdmull (v4i16 V64:$Rn), - (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), + (int_aarch64_neon_sqdmull (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))), (i64 0))))), (EXTRACT_SUBREG @@ -6753,10 +6753,10 @@ multiclass SIMDIndexedLongSQDMLXSDTied opc, string asm, asm#"2", ".4s", ".4s", ".8h", ".h", [(set (v4i32 V128:$dst), (Accum (v4i32 V128:$Rd), - (v4i32 (int_arm64_neon_sqdmull + (v4i32 (int_aarch64_neon_sqdmull (extract_high_v8i16 V128:$Rn), (extract_high_v8i16 - (ARM64duplane16 (v8i16 V128_lo:$Rm), + (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))))]> { bits<3> idx; let Inst{11} = idx{2}; @@ -6770,9 +6770,9 @@ multiclass SIMDIndexedLongSQDMLXSDTied opc, string asm, asm, ".2d", ".2d", ".2s", ".s", [(set (v2i64 V128:$dst), (Accum (v2i64 V128:$Rd), - (v2i64 (int_arm64_neon_sqdmull + (v2i64 (int_aarch64_neon_sqdmull (v2i32 V64:$Rn), - (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))))]> { bits<2> idx; let Inst{11} = idx{1}; @@ -6785,10 +6785,10 @@ multiclass SIMDIndexedLongSQDMLXSDTied opc, string asm, asm#"2", ".2d", ".2d", ".4s", ".s", [(set (v2i64 V128:$dst), (Accum (v2i64 V128:$Rd), - (v2i64 (int_arm64_neon_sqdmull + (v2i64 (int_aarch64_neon_sqdmull (extract_high_v4i32 V128:$Rn), (extract_high_v4i32 - (ARM64duplane32 (v4i32 V128:$Rm), + (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))))]> { bits<2> idx; let Inst{11} = idx{1}; @@ -6810,7 +6810,7 @@ multiclass SIMDIndexedLongSQDMLXSDTied opc, string asm, asm, ".s", "", "", ".s", [(set (i64 FPR64Op:$dst), (Accum (i64 FPR64Op:$Rd), - (i64 (int_arm64_neon_sqdmulls_scalar + (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32Op:$Rn), (i32 (vector_extract (v4i32 V128:$Rm), VectorIndexS:$idx))))))]> { @@ -6830,7 +6830,7 @@ multiclass SIMDVectorIndexedLongSD opc, string asm, asm, ".4s", ".4s", ".4h", ".h", [(set (v4i32 V128:$Rd), (OpNode (v4i16 V64:$Rn), - (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { bits<3> idx; let Inst{11} = idx{2}; let Inst{21} = idx{1}; @@ -6843,7 +6843,7 @@ multiclass SIMDVectorIndexedLongSD opc, string asm, asm#"2", ".4s", ".4s", ".8h", ".h", [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), + (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { bits<3> idx; @@ -6858,7 +6858,7 @@ multiclass SIMDVectorIndexedLongSD opc, string asm, asm, ".2d", ".2d", ".2s", ".s", [(set (v2i64 V128:$Rd), (OpNode (v2i32 V64:$Rn), - (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { bits<2> idx; let Inst{11} = idx{1}; let Inst{21} = idx{0}; @@ -6870,7 +6870,7 @@ multiclass SIMDVectorIndexedLongSD opc, string asm, asm#"2", ".2d", ".2d", ".4s", ".s", [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 (ARM64duplane32 (v4i32 V128:$Rm), + (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { bits<2> idx; let Inst{11} = idx{1}; @@ -6888,7 +6888,7 @@ multiclass SIMDVectorIndexedLongSDTied opc, string asm, asm, ".4s", ".4s", ".4h", ".h", [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v4i16 V64:$Rn), - (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { bits<3> idx; let Inst{11} = idx{2}; let Inst{21} = idx{1}; @@ -6902,7 +6902,7 @@ multiclass SIMDVectorIndexedLongSDTied opc, string asm, [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), + (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { bits<3> idx; let Inst{11} = idx{2}; @@ -6916,7 +6916,7 @@ multiclass SIMDVectorIndexedLongSDTied opc, string asm, asm, ".2d", ".2d", ".2s", ".s", [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), (v2i32 V64:$Rn), - (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { bits<2> idx; let Inst{11} = idx{1}; let Inst{21} = idx{0}; @@ -6929,7 +6929,7 @@ multiclass SIMDVectorIndexedLongSDTied opc, string asm, [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 (ARM64duplane32 (v4i32 V128:$Rm), + (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { bits<2> idx; let Inst{11} = idx{1}; diff --git a/lib/Target/ARM64/ARM64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp similarity index 55% rename from lib/Target/ARM64/ARM64InstrInfo.cpp rename to lib/Target/AArch64/AArch64InstrInfo.cpp index fbbddd566606..52e3b333eb08 100644 --- a/lib/Target/ARM64/ARM64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1,4 +1,4 @@ -//===- ARM64InstrInfo.cpp - ARM64 Instruction Information -----------------===// +//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===// // // The LLVM Compiler Infrastructure // @@ -7,13 +7,13 @@ // //===----------------------------------------------------------------------===// // -// This file contains the ARM64 implementation of the TargetInstrInfo class. +// This file contains the AArch64 implementation of the TargetInstrInfo class. // //===----------------------------------------------------------------------===// -#include "ARM64InstrInfo.h" -#include "ARM64Subtarget.h" -#include "MCTargetDesc/ARM64AddressingModes.h" +#include "AArch64InstrInfo.h" +#include "AArch64Subtarget.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -26,15 +26,15 @@ using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR -#include "ARM64GenInstrInfo.inc" +#include "AArch64GenInstrInfo.inc" -ARM64InstrInfo::ARM64InstrInfo(const ARM64Subtarget &STI) - : ARM64GenInstrInfo(ARM64::ADJCALLSTACKDOWN, ARM64::ADJCALLSTACKUP), +AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) + : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), RI(this, &STI), Subtarget(STI) {} /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. -unsigned ARM64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { +unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { const MCInstrDesc &Desc = MI->getDesc(); switch (Desc.getOpcode()) { @@ -57,23 +57,23 @@ static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, switch (LastInst->getOpcode()) { default: llvm_unreachable("Unknown branch instruction?"); - case ARM64::Bcc: + case AArch64::Bcc: Target = LastInst->getOperand(1).getMBB(); Cond.push_back(LastInst->getOperand(0)); break; - case ARM64::CBZW: - case ARM64::CBZX: - case ARM64::CBNZW: - case ARM64::CBNZX: + case AArch64::CBZW: + case AArch64::CBZX: + case AArch64::CBNZW: + case AArch64::CBNZX: Target = LastInst->getOperand(1).getMBB(); Cond.push_back(MachineOperand::CreateImm(-1)); Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); Cond.push_back(LastInst->getOperand(0)); break; - case ARM64::TBZW: - case ARM64::TBZX: - case ARM64::TBNZW: - case ARM64::TBNZX: + case AArch64::TBZW: + case AArch64::TBZX: + case AArch64::TBNZW: + case AArch64::TBNZX: Target = LastInst->getOperand(2).getMBB(); Cond.push_back(MachineOperand::CreateImm(-1)); Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); @@ -83,7 +83,7 @@ static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, } // Branch analysis. -bool ARM64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, +bool AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, @@ -175,40 +175,40 @@ bool ARM64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return true; } -bool ARM64InstrInfo::ReverseBranchCondition( +bool AArch64InstrInfo::ReverseBranchCondition( SmallVectorImpl &Cond) const { if (Cond[0].getImm() != -1) { // Regular Bcc - ARM64CC::CondCode CC = (ARM64CC::CondCode)(int)Cond[0].getImm(); - Cond[0].setImm(ARM64CC::getInvertedCondCode(CC)); + AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm(); + Cond[0].setImm(AArch64CC::getInvertedCondCode(CC)); } else { // Folded compare-and-branch switch (Cond[1].getImm()) { default: llvm_unreachable("Unknown conditional branch!"); - case ARM64::CBZW: - Cond[1].setImm(ARM64::CBNZW); + case AArch64::CBZW: + Cond[1].setImm(AArch64::CBNZW); break; - case ARM64::CBNZW: - Cond[1].setImm(ARM64::CBZW); + case AArch64::CBNZW: + Cond[1].setImm(AArch64::CBZW); break; - case ARM64::CBZX: - Cond[1].setImm(ARM64::CBNZX); + case AArch64::CBZX: + Cond[1].setImm(AArch64::CBNZX); break; - case ARM64::CBNZX: - Cond[1].setImm(ARM64::CBZX); + case AArch64::CBNZX: + Cond[1].setImm(AArch64::CBZX); break; - case ARM64::TBZW: - Cond[1].setImm(ARM64::TBNZW); + case AArch64::TBZW: + Cond[1].setImm(AArch64::TBNZW); break; - case ARM64::TBNZW: - Cond[1].setImm(ARM64::TBZW); + case AArch64::TBNZW: + Cond[1].setImm(AArch64::TBZW); break; - case ARM64::TBZX: - Cond[1].setImm(ARM64::TBNZX); + case AArch64::TBZX: + Cond[1].setImm(AArch64::TBNZX); break; - case ARM64::TBNZX: - Cond[1].setImm(ARM64::TBZX); + case AArch64::TBNZX: + Cond[1].setImm(AArch64::TBZX); break; } } @@ -216,7 +216,7 @@ bool ARM64InstrInfo::ReverseBranchCondition( return false; } -unsigned ARM64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { +unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return 0; @@ -246,12 +246,12 @@ unsigned ARM64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return 2; } -void ARM64InstrInfo::instantiateCondBranch( +void AArch64InstrInfo::instantiateCondBranch( MachineBasicBlock &MBB, DebugLoc DL, MachineBasicBlock *TBB, const SmallVectorImpl &Cond) const { if (Cond[0].getImm() != -1) { // Regular Bcc - BuildMI(&MBB, DL, get(ARM64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB); + BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB); } else { // Folded compare-and-branch const MachineInstrBuilder MIB = @@ -262,7 +262,7 @@ void ARM64InstrInfo::instantiateCondBranch( } } -unsigned ARM64InstrInfo::InsertBranch( +unsigned AArch64InstrInfo::InsertBranch( MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl &Cond, DebugLoc DL) const { // Shouldn't be a fall through. @@ -270,7 +270,7 @@ unsigned ARM64InstrInfo::InsertBranch( if (!FBB) { if (Cond.empty()) // Unconditional branch? - BuildMI(&MBB, DL, get(ARM64::B)).addMBB(TBB); + BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB); else instantiateCondBranch(MBB, DL, TBB, Cond); return 1; @@ -278,7 +278,7 @@ unsigned ARM64InstrInfo::InsertBranch( // Two-way conditional branch. instantiateCondBranch(MBB, DL, TBB, Cond); - BuildMI(&MBB, DL, get(ARM64::B)).addMBB(FBB); + BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB); return 2; } @@ -302,52 +302,52 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, if (!TargetRegisterInfo::isVirtualRegister(VReg)) return 0; - bool Is64Bit = ARM64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); + bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); const MachineInstr *DefMI = MRI.getVRegDef(VReg); unsigned Opc = 0; unsigned SrcOpNum = 0; switch (DefMI->getOpcode()) { - case ARM64::ADDSXri: - case ARM64::ADDSWri: + case AArch64::ADDSXri: + case AArch64::ADDSWri: // if NZCV is used, do not fold. - if (DefMI->findRegisterDefOperandIdx(ARM64::NZCV, true) == -1) + if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) return 0; // fall-through to ADDXri and ADDWri. - case ARM64::ADDXri: - case ARM64::ADDWri: + case AArch64::ADDXri: + case AArch64::ADDWri: // add x, 1 -> csinc. if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 || DefMI->getOperand(3).getImm() != 0) return 0; SrcOpNum = 1; - Opc = Is64Bit ? ARM64::CSINCXr : ARM64::CSINCWr; + Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr; break; - case ARM64::ORNXrr: - case ARM64::ORNWrr: { + case AArch64::ORNXrr: + case AArch64::ORNWrr: { // not x -> csinv, represented as orn dst, xzr, src. unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); - if (ZReg != ARM64::XZR && ZReg != ARM64::WZR) + if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) return 0; SrcOpNum = 2; - Opc = Is64Bit ? ARM64::CSINVXr : ARM64::CSINVWr; + Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr; break; } - case ARM64::SUBSXrr: - case ARM64::SUBSWrr: + case AArch64::SUBSXrr: + case AArch64::SUBSWrr: // if NZCV is used, do not fold. - if (DefMI->findRegisterDefOperandIdx(ARM64::NZCV, true) == -1) + if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) return 0; // fall-through to SUBXrr and SUBWrr. - case ARM64::SUBXrr: - case ARM64::SUBWrr: { + case AArch64::SUBXrr: + case AArch64::SUBWrr: { // neg x -> csneg, represented as sub dst, xzr, src. unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); - if (ZReg != ARM64::XZR && ZReg != ARM64::WZR) + if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) return 0; SrcOpNum = 2; - Opc = Is64Bit ? ARM64::CSNEGXr : ARM64::CSNEGWr; + Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr; break; } default: @@ -360,7 +360,7 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, return Opc; } -bool ARM64InstrInfo::canInsertSelect( +bool AArch64InstrInfo::canInsertSelect( const MachineBasicBlock &MBB, const SmallVectorImpl &Cond, unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const { @@ -376,8 +376,8 @@ bool ARM64InstrInfo::canInsertSelect( // GPRs are handled by csel. // FIXME: Fold in x+1, -x, and ~x when applicable. - if (ARM64::GPR64allRegClass.hasSubClassEq(RC) || - ARM64::GPR32allRegClass.hasSubClassEq(RC)) { + if (AArch64::GPR64allRegClass.hasSubClassEq(RC) || + AArch64::GPR32allRegClass.hasSubClassEq(RC)) { // Single-cycle csel, csinc, csinv, and csneg. CondCycles = 1 + ExtraCondLat; TrueCycles = FalseCycles = 1; @@ -390,8 +390,8 @@ bool ARM64InstrInfo::canInsertSelect( // Scalar floating point is handled by fcsel. // FIXME: Form fabs, fmin, and fmax when applicable. - if (ARM64::FPR64RegClass.hasSubClassEq(RC) || - ARM64::FPR32RegClass.hasSubClassEq(RC)) { + if (AArch64::FPR64RegClass.hasSubClassEq(RC) || + AArch64::FPR32RegClass.hasSubClassEq(RC)) { CondCycles = 5 + ExtraCondLat; TrueCycles = FalseCycles = 2; return true; @@ -401,20 +401,20 @@ bool ARM64InstrInfo::canInsertSelect( return false; } -void ARM64InstrInfo::insertSelect(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DstReg, - const SmallVectorImpl &Cond, - unsigned TrueReg, unsigned FalseReg) const { +void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DstReg, + const SmallVectorImpl &Cond, + unsigned TrueReg, unsigned FalseReg) const { MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); // Parse the condition code, see parseCondBranch() above. - ARM64CC::CondCode CC; + AArch64CC::CondCode CC; switch (Cond.size()) { default: llvm_unreachable("Unknown condition opcode in Cond"); case 1: // b.cc - CC = ARM64CC::CondCode(Cond[0].getImm()); + CC = AArch64CC::CondCode(Cond[0].getImm()); break; case 3: { // cbz/cbnz // We must insert a compare against 0. @@ -422,34 +422,34 @@ void ARM64InstrInfo::insertSelect(MachineBasicBlock &MBB, switch (Cond[1].getImm()) { default: llvm_unreachable("Unknown branch opcode in Cond"); - case ARM64::CBZW: + case AArch64::CBZW: Is64Bit = 0; - CC = ARM64CC::EQ; + CC = AArch64CC::EQ; break; - case ARM64::CBZX: + case AArch64::CBZX: Is64Bit = 1; - CC = ARM64CC::EQ; + CC = AArch64CC::EQ; break; - case ARM64::CBNZW: + case AArch64::CBNZW: Is64Bit = 0; - CC = ARM64CC::NE; + CC = AArch64CC::NE; break; - case ARM64::CBNZX: + case AArch64::CBNZX: Is64Bit = 1; - CC = ARM64CC::NE; + CC = AArch64CC::NE; break; } unsigned SrcReg = Cond[2].getReg(); if (Is64Bit) { // cmp reg, #0 is actually subs xzr, reg, #0. - MRI.constrainRegClass(SrcReg, &ARM64::GPR64spRegClass); - BuildMI(MBB, I, DL, get(ARM64::SUBSXri), ARM64::XZR) + MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass); + BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR) .addReg(SrcReg) .addImm(0) .addImm(0); } else { - MRI.constrainRegClass(SrcReg, &ARM64::GPR32spRegClass); - BuildMI(MBB, I, DL, get(ARM64::SUBSWri), ARM64::WZR) + MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass); + BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR) .addReg(SrcReg) .addImm(0) .addImm(0); @@ -461,24 +461,26 @@ void ARM64InstrInfo::insertSelect(MachineBasicBlock &MBB, switch (Cond[1].getImm()) { default: llvm_unreachable("Unknown branch opcode in Cond"); - case ARM64::TBZW: - case ARM64::TBZX: - CC = ARM64CC::EQ; + case AArch64::TBZW: + case AArch64::TBZX: + CC = AArch64CC::EQ; break; - case ARM64::TBNZW: - case ARM64::TBNZX: - CC = ARM64CC::NE; + case AArch64::TBNZW: + case AArch64::TBNZX: + CC = AArch64CC::NE; break; } // cmp reg, #foo is actually ands xzr, reg, #1< 64 bit extension case, these instructions can do // much more. if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31) @@ -548,7 +550,7 @@ bool ARM64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, // This is a signed or unsigned 32 -> 64 bit extension. SrcReg = MI.getOperand(1).getReg(); DstReg = MI.getOperand(0).getReg(); - SubIdx = ARM64::sub_32; + SubIdx = AArch64::sub_32; return true; } } @@ -556,49 +558,49 @@ bool ARM64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, /// analyzeCompare - For a comparison instruction, return the source registers /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. /// Return true if the comparison instruction can be analyzed. -bool ARM64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, - unsigned &SrcReg2, int &CmpMask, - int &CmpValue) const { +bool AArch64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, + unsigned &SrcReg2, int &CmpMask, + int &CmpValue) const { switch (MI->getOpcode()) { default: break; - case ARM64::SUBSWrr: - case ARM64::SUBSWrs: - case ARM64::SUBSWrx: - case ARM64::SUBSXrr: - case ARM64::SUBSXrs: - case ARM64::SUBSXrx: - case ARM64::ADDSWrr: - case ARM64::ADDSWrs: - case ARM64::ADDSWrx: - case ARM64::ADDSXrr: - case ARM64::ADDSXrs: - case ARM64::ADDSXrx: + case AArch64::SUBSWrr: + case AArch64::SUBSWrs: + case AArch64::SUBSWrx: + case AArch64::SUBSXrr: + case AArch64::SUBSXrs: + case AArch64::SUBSXrx: + case AArch64::ADDSWrr: + case AArch64::ADDSWrs: + case AArch64::ADDSWrx: + case AArch64::ADDSXrr: + case AArch64::ADDSXrs: + case AArch64::ADDSXrx: // Replace SUBSWrr with SUBWrr if NZCV is not used. SrcReg = MI->getOperand(1).getReg(); SrcReg2 = MI->getOperand(2).getReg(); CmpMask = ~0; CmpValue = 0; return true; - case ARM64::SUBSWri: - case ARM64::ADDSWri: - case ARM64::SUBSXri: - case ARM64::ADDSXri: + case AArch64::SUBSWri: + case AArch64::ADDSWri: + case AArch64::SUBSXri: + case AArch64::ADDSXri: SrcReg = MI->getOperand(1).getReg(); SrcReg2 = 0; CmpMask = ~0; CmpValue = MI->getOperand(2).getImm(); return true; - case ARM64::ANDSWri: - case ARM64::ANDSXri: + case AArch64::ANDSWri: + case AArch64::ANDSXri: // ANDS does not use the same encoding scheme as the others xxxS // instructions. SrcReg = MI->getOperand(1).getReg(); SrcReg2 = 0; CmpMask = ~0; - CmpValue = ARM64_AM::decodeLogicalImmediate( + CmpValue = AArch64_AM::decodeLogicalImmediate( MI->getOperand(2).getImm(), - MI->getOpcode() == ARM64::ANDSWri ? 32 : 64); + MI->getOpcode() == AArch64::ANDSWri ? 32 : 64); return true; } @@ -646,33 +648,33 @@ static bool UpdateOperandRegClass(MachineInstr *Instr) { /// optimizeCompareInstr - Convert the instruction supplying the argument to the /// comparison into one that sets the zero bit in the flags register. -bool ARM64InstrInfo::optimizeCompareInstr( +bool AArch64InstrInfo::optimizeCompareInstr( MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const { // Replace SUBSWrr with SUBWrr if NZCV is not used. - int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(ARM64::NZCV, true); + int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV, true); if (Cmp_NZCV != -1) { unsigned NewOpc; switch (CmpInstr->getOpcode()) { default: return false; - case ARM64::ADDSWrr: NewOpc = ARM64::ADDWrr; break; - case ARM64::ADDSWri: NewOpc = ARM64::ADDWri; break; - case ARM64::ADDSWrs: NewOpc = ARM64::ADDWrs; break; - case ARM64::ADDSWrx: NewOpc = ARM64::ADDWrx; break; - case ARM64::ADDSXrr: NewOpc = ARM64::ADDXrr; break; - case ARM64::ADDSXri: NewOpc = ARM64::ADDXri; break; - case ARM64::ADDSXrs: NewOpc = ARM64::ADDXrs; break; - case ARM64::ADDSXrx: NewOpc = ARM64::ADDXrx; break; - case ARM64::SUBSWrr: NewOpc = ARM64::SUBWrr; break; - case ARM64::SUBSWri: NewOpc = ARM64::SUBWri; break; - case ARM64::SUBSWrs: NewOpc = ARM64::SUBWrs; break; - case ARM64::SUBSWrx: NewOpc = ARM64::SUBWrx; break; - case ARM64::SUBSXrr: NewOpc = ARM64::SUBXrr; break; - case ARM64::SUBSXri: NewOpc = ARM64::SUBXri; break; - case ARM64::SUBSXrs: NewOpc = ARM64::SUBXrs; break; - case ARM64::SUBSXrx: NewOpc = ARM64::SUBXrx; break; + case AArch64::ADDSWrr: NewOpc = AArch64::ADDWrr; break; + case AArch64::ADDSWri: NewOpc = AArch64::ADDWri; break; + case AArch64::ADDSWrs: NewOpc = AArch64::ADDWrs; break; + case AArch64::ADDSWrx: NewOpc = AArch64::ADDWrx; break; + case AArch64::ADDSXrr: NewOpc = AArch64::ADDXrr; break; + case AArch64::ADDSXri: NewOpc = AArch64::ADDXri; break; + case AArch64::ADDSXrs: NewOpc = AArch64::ADDXrs; break; + case AArch64::ADDSXrx: NewOpc = AArch64::ADDXrx; break; + case AArch64::SUBSWrr: NewOpc = AArch64::SUBWrr; break; + case AArch64::SUBSWri: NewOpc = AArch64::SUBWri; break; + case AArch64::SUBSWrs: NewOpc = AArch64::SUBWrs; break; + case AArch64::SUBSWrx: NewOpc = AArch64::SUBWrx; break; + case AArch64::SUBSXrr: NewOpc = AArch64::SUBXrr; break; + case AArch64::SUBSXri: NewOpc = AArch64::SUBXri; break; + case AArch64::SUBSXrs: NewOpc = AArch64::SUBXrs; break; + case AArch64::SUBSXrx: NewOpc = AArch64::SUBXrx; break; } const MCInstrDesc &MCID = get(NewOpc); @@ -718,8 +720,8 @@ bool ARM64InstrInfo::optimizeCompareInstr( for (--I; I != E; --I) { const MachineInstr &Instr = *I; - if (Instr.modifiesRegister(ARM64::NZCV, TRI) || - Instr.readsRegister(ARM64::NZCV, TRI)) + if (Instr.modifiesRegister(AArch64::NZCV, TRI) || + Instr.readsRegister(AArch64::NZCV, TRI)) // This instruction modifies or uses NZCV after the one we want to // change. We can't do this transformation. return false; @@ -732,29 +734,29 @@ bool ARM64InstrInfo::optimizeCompareInstr( switch (MI->getOpcode()) { default: return false; - case ARM64::ADDSWrr: - case ARM64::ADDSWri: - case ARM64::ADDSXrr: - case ARM64::ADDSXri: - case ARM64::SUBSWrr: - case ARM64::SUBSWri: - case ARM64::SUBSXrr: - case ARM64::SUBSXri: - break; - case ARM64::ADDWrr: NewOpc = ARM64::ADDSWrr; break; - case ARM64::ADDWri: NewOpc = ARM64::ADDSWri; break; - case ARM64::ADDXrr: NewOpc = ARM64::ADDSXrr; break; - case ARM64::ADDXri: NewOpc = ARM64::ADDSXri; break; - case ARM64::ADCWr: NewOpc = ARM64::ADCSWr; break; - case ARM64::ADCXr: NewOpc = ARM64::ADCSXr; break; - case ARM64::SUBWrr: NewOpc = ARM64::SUBSWrr; break; - case ARM64::SUBWri: NewOpc = ARM64::SUBSWri; break; - case ARM64::SUBXrr: NewOpc = ARM64::SUBSXrr; break; - case ARM64::SUBXri: NewOpc = ARM64::SUBSXri; break; - case ARM64::SBCWr: NewOpc = ARM64::SBCSWr; break; - case ARM64::SBCXr: NewOpc = ARM64::SBCSXr; break; - case ARM64::ANDWri: NewOpc = ARM64::ANDSWri; break; - case ARM64::ANDXri: NewOpc = ARM64::ANDSXri; break; + case AArch64::ADDSWrr: + case AArch64::ADDSWri: + case AArch64::ADDSXrr: + case AArch64::ADDSXri: + case AArch64::SUBSWrr: + case AArch64::SUBSWri: + case AArch64::SUBSXrr: + case AArch64::SUBSXri: + break; + case AArch64::ADDWrr: NewOpc = AArch64::ADDSWrr; break; + case AArch64::ADDWri: NewOpc = AArch64::ADDSWri; break; + case AArch64::ADDXrr: NewOpc = AArch64::ADDSXrr; break; + case AArch64::ADDXri: NewOpc = AArch64::ADDSXri; break; + case AArch64::ADCWr: NewOpc = AArch64::ADCSWr; break; + case AArch64::ADCXr: NewOpc = AArch64::ADCSXr; break; + case AArch64::SUBWrr: NewOpc = AArch64::SUBSWrr; break; + case AArch64::SUBWri: NewOpc = AArch64::SUBSWri; break; + case AArch64::SUBXrr: NewOpc = AArch64::SUBSXrr; break; + case AArch64::SUBXri: NewOpc = AArch64::SUBSXri; break; + case AArch64::SBCWr: NewOpc = AArch64::SBCSWr; break; + case AArch64::SBCXr: NewOpc = AArch64::SBCSXr; break; + case AArch64::ANDWri: NewOpc = AArch64::ANDSWri; break; + case AArch64::ANDXri: NewOpc = AArch64::ANDSXri; break; } // Scan forward for the use of NZCV. @@ -771,11 +773,11 @@ bool ARM64InstrInfo::optimizeCompareInstr( for (unsigned IO = 0, EO = Instr.getNumOperands(); !IsSafe && IO != EO; ++IO) { const MachineOperand &MO = Instr.getOperand(IO); - if (MO.isRegMask() && MO.clobbersPhysReg(ARM64::NZCV)) { + if (MO.isRegMask() && MO.clobbersPhysReg(AArch64::NZCV)) { IsSafe = true; break; } - if (!MO.isReg() || MO.getReg() != ARM64::NZCV) + if (!MO.isReg() || MO.getReg() != AArch64::NZCV) continue; if (MO.isDef()) { IsSafe = true; @@ -784,24 +786,24 @@ bool ARM64InstrInfo::optimizeCompareInstr( // Decode the condition code. unsigned Opc = Instr.getOpcode(); - ARM64CC::CondCode CC; + AArch64CC::CondCode CC; switch (Opc) { default: return false; - case ARM64::Bcc: - CC = (ARM64CC::CondCode)Instr.getOperand(IO - 2).getImm(); + case AArch64::Bcc: + CC = (AArch64CC::CondCode)Instr.getOperand(IO - 2).getImm(); break; - case ARM64::CSINVWr: - case ARM64::CSINVXr: - case ARM64::CSINCWr: - case ARM64::CSINCXr: - case ARM64::CSELWr: - case ARM64::CSELXr: - case ARM64::CSNEGWr: - case ARM64::CSNEGXr: - case ARM64::FCSELSrrr: - case ARM64::FCSELDrrr: - CC = (ARM64CC::CondCode)Instr.getOperand(IO - 1).getImm(); + case AArch64::CSINVWr: + case AArch64::CSINVXr: + case AArch64::CSINCWr: + case AArch64::CSINCXr: + case AArch64::CSELWr: + case AArch64::CSELXr: + case AArch64::CSNEGWr: + case AArch64::CSNEGXr: + case AArch64::FCSELSrrr: + case AArch64::FCSELDrrr: + CC = (AArch64CC::CondCode)Instr.getOperand(IO - 1).getImm(); break; } @@ -810,12 +812,12 @@ bool ARM64InstrInfo::optimizeCompareInstr( default: // NZCV can be used multiple times, we should continue. break; - case ARM64CC::VS: - case ARM64CC::VC: - case ARM64CC::GE: - case ARM64CC::LT: - case ARM64CC::GT: - case ARM64CC::LE: + case AArch64CC::VS: + case AArch64CC::VC: + case AArch64CC::GE: + case AArch64CC::LT: + case AArch64CC::GT: + case AArch64CC::LE: return false; } } @@ -826,7 +828,7 @@ bool ARM64InstrInfo::optimizeCompareInstr( if (!IsSafe) { MachineBasicBlock *ParentBlock = CmpInstr->getParent(); for (auto *MBB : ParentBlock->successors()) - if (MBB->isLiveIn(ARM64::NZCV)) + if (MBB->isLiveIn(AArch64::NZCV)) return false; } @@ -836,47 +838,47 @@ bool ARM64InstrInfo::optimizeCompareInstr( bool succeeded = UpdateOperandRegClass(MI); (void)succeeded; assert(succeeded && "Some operands reg class are incompatible!"); - MI->addRegisterDefined(ARM64::NZCV, TRI); + MI->addRegisterDefined(AArch64::NZCV, TRI); return true; } /// Return true if this is this instruction has a non-zero immediate -bool ARM64InstrInfo::hasShiftedReg(const MachineInstr *MI) const { +bool AArch64InstrInfo::hasShiftedReg(const MachineInstr *MI) const { switch (MI->getOpcode()) { default: break; - case ARM64::ADDSWrs: - case ARM64::ADDSXrs: - case ARM64::ADDWrs: - case ARM64::ADDXrs: - case ARM64::ANDSWrs: - case ARM64::ANDSXrs: - case ARM64::ANDWrs: - case ARM64::ANDXrs: - case ARM64::BICSWrs: - case ARM64::BICSXrs: - case ARM64::BICWrs: - case ARM64::BICXrs: - case ARM64::CRC32Brr: - case ARM64::CRC32CBrr: - case ARM64::CRC32CHrr: - case ARM64::CRC32CWrr: - case ARM64::CRC32CXrr: - case ARM64::CRC32Hrr: - case ARM64::CRC32Wrr: - case ARM64::CRC32Xrr: - case ARM64::EONWrs: - case ARM64::EONXrs: - case ARM64::EORWrs: - case ARM64::EORXrs: - case ARM64::ORNWrs: - case ARM64::ORNXrs: - case ARM64::ORRWrs: - case ARM64::ORRXrs: - case ARM64::SUBSWrs: - case ARM64::SUBSXrs: - case ARM64::SUBWrs: - case ARM64::SUBXrs: + case AArch64::ADDSWrs: + case AArch64::ADDSXrs: + case AArch64::ADDWrs: + case AArch64::ADDXrs: + case AArch64::ANDSWrs: + case AArch64::ANDSXrs: + case AArch64::ANDWrs: + case AArch64::ANDXrs: + case AArch64::BICSWrs: + case AArch64::BICSXrs: + case AArch64::BICWrs: + case AArch64::BICXrs: + case AArch64::CRC32Brr: + case AArch64::CRC32CBrr: + case AArch64::CRC32CHrr: + case AArch64::CRC32CWrr: + case AArch64::CRC32CXrr: + case AArch64::CRC32Hrr: + case AArch64::CRC32Wrr: + case AArch64::CRC32Xrr: + case AArch64::EONWrs: + case AArch64::EONXrs: + case AArch64::EORWrs: + case AArch64::EORXrs: + case AArch64::ORNWrs: + case AArch64::ORNXrs: + case AArch64::ORRWrs: + case AArch64::ORRXrs: + case AArch64::SUBSWrs: + case AArch64::SUBSXrs: + case AArch64::SUBWrs: + case AArch64::SUBXrs: if (MI->getOperand(3).isImm()) { unsigned val = MI->getOperand(3).getImm(); return (val != 0); @@ -887,22 +889,22 @@ bool ARM64InstrInfo::hasShiftedReg(const MachineInstr *MI) const { } /// Return true if this is this instruction has a non-zero immediate -bool ARM64InstrInfo::hasExtendedReg(const MachineInstr *MI) const { +bool AArch64InstrInfo::hasExtendedReg(const MachineInstr *MI) const { switch (MI->getOpcode()) { default: break; - case ARM64::ADDSWrx: - case ARM64::ADDSXrx: - case ARM64::ADDSXrx64: - case ARM64::ADDWrx: - case ARM64::ADDXrx: - case ARM64::ADDXrx64: - case ARM64::SUBSWrx: - case ARM64::SUBSXrx: - case ARM64::SUBSXrx64: - case ARM64::SUBWrx: - case ARM64::SUBXrx: - case ARM64::SUBXrx64: + case AArch64::ADDSWrx: + case AArch64::ADDSXrx: + case AArch64::ADDSXrx64: + case AArch64::ADDWrx: + case AArch64::ADDXrx: + case AArch64::ADDXrx64: + case AArch64::SUBSWrx: + case AArch64::SUBSXrx: + case AArch64::SUBSXrx64: + case AArch64::SUBWrx: + case AArch64::SUBXrx: + case AArch64::SUBXrx64: if (MI->getOperand(3).isImm()) { unsigned val = MI->getOperand(3).getImm(); return (val != 0); @@ -915,47 +917,47 @@ bool ARM64InstrInfo::hasExtendedReg(const MachineInstr *MI) const { // Return true if this instruction simply sets its single destination register // to zero. This is equivalent to a register rename of the zero-register. -bool ARM64InstrInfo::isGPRZero(const MachineInstr *MI) const { +bool AArch64InstrInfo::isGPRZero(const MachineInstr *MI) const { switch (MI->getOpcode()) { default: break; - case ARM64::MOVZWi: - case ARM64::MOVZXi: // movz Rd, #0 (LSL #0) + case AArch64::MOVZWi: + case AArch64::MOVZXi: // movz Rd, #0 (LSL #0) if (MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) { assert(MI->getDesc().getNumOperands() == 3 && MI->getOperand(2).getImm() == 0 && "invalid MOVZi operands"); return true; } break; - case ARM64::ANDWri: // and Rd, Rzr, #imm - return MI->getOperand(1).getReg() == ARM64::WZR; - case ARM64::ANDXri: - return MI->getOperand(1).getReg() == ARM64::XZR; + case AArch64::ANDWri: // and Rd, Rzr, #imm + return MI->getOperand(1).getReg() == AArch64::WZR; + case AArch64::ANDXri: + return MI->getOperand(1).getReg() == AArch64::XZR; case TargetOpcode::COPY: - return MI->getOperand(1).getReg() == ARM64::WZR; + return MI->getOperand(1).getReg() == AArch64::WZR; } return false; } // Return true if this instruction simply renames a general register without // modifying bits. -bool ARM64InstrInfo::isGPRCopy(const MachineInstr *MI) const { +bool AArch64InstrInfo::isGPRCopy(const MachineInstr *MI) const { switch (MI->getOpcode()) { default: break; case TargetOpcode::COPY: { // GPR32 copies will by lowered to ORRXrs unsigned DstReg = MI->getOperand(0).getReg(); - return (ARM64::GPR32RegClass.contains(DstReg) || - ARM64::GPR64RegClass.contains(DstReg)); + return (AArch64::GPR32RegClass.contains(DstReg) || + AArch64::GPR64RegClass.contains(DstReg)); } - case ARM64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0) - if (MI->getOperand(1).getReg() == ARM64::XZR) { + case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0) + if (MI->getOperand(1).getReg() == AArch64::XZR) { assert(MI->getDesc().getNumOperands() == 4 && MI->getOperand(3).getImm() == 0 && "invalid ORRrs operands"); return true; } - case ARM64::ADDXri: // add Xd, Xn, #0 (LSL #0) + case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0) if (MI->getOperand(2).getImm() == 0) { assert(MI->getDesc().getNumOperands() == 4 && MI->getOperand(3).getImm() == 0 && "invalid ADDXri operands"); @@ -967,17 +969,17 @@ bool ARM64InstrInfo::isGPRCopy(const MachineInstr *MI) const { // Return true if this instruction simply renames a general register without // modifying bits. -bool ARM64InstrInfo::isFPRCopy(const MachineInstr *MI) const { +bool AArch64InstrInfo::isFPRCopy(const MachineInstr *MI) const { switch (MI->getOpcode()) { default: break; case TargetOpcode::COPY: { // FPR64 copies will by lowered to ORR.16b unsigned DstReg = MI->getOperand(0).getReg(); - return (ARM64::FPR64RegClass.contains(DstReg) || - ARM64::FPR128RegClass.contains(DstReg)); + return (AArch64::FPR64RegClass.contains(DstReg) || + AArch64::FPR128RegClass.contains(DstReg)); } - case ARM64::ORRv16i8: + case AArch64::ORRv16i8: if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { assert(MI->getDesc().getNumOperands() == 3 && MI->getOperand(0).isReg() && "invalid ORRv16i8 operands"); @@ -987,18 +989,18 @@ bool ARM64InstrInfo::isFPRCopy(const MachineInstr *MI) const { return false; } -unsigned ARM64InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const { +unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { switch (MI->getOpcode()) { default: break; - case ARM64::LDRWui: - case ARM64::LDRXui: - case ARM64::LDRBui: - case ARM64::LDRHui: - case ARM64::LDRSui: - case ARM64::LDRDui: - case ARM64::LDRQui: + case AArch64::LDRWui: + case AArch64::LDRXui: + case AArch64::LDRBui: + case AArch64::LDRHui: + case AArch64::LDRSui: + case AArch64::LDRDui: + case AArch64::LDRQui: if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() && MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) { FrameIndex = MI->getOperand(1).getIndex(); @@ -1010,18 +1012,18 @@ unsigned ARM64InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, return 0; } -unsigned ARM64InstrInfo::isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const { +unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { switch (MI->getOpcode()) { default: break; - case ARM64::STRWui: - case ARM64::STRXui: - case ARM64::STRBui: - case ARM64::STRHui: - case ARM64::STRSui: - case ARM64::STRDui: - case ARM64::STRQui: + case AArch64::STRWui: + case AArch64::STRXui: + case AArch64::STRBui: + case AArch64::STRHui: + case AArch64::STRSui: + case AArch64::STRDui: + case AArch64::STRQui: if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() && MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) { FrameIndex = MI->getOperand(1).getIndex(); @@ -1035,66 +1037,66 @@ unsigned ARM64InstrInfo::isStoreToStackSlot(const MachineInstr *MI, /// Return true if this is load/store scales or extends its register offset. /// This refers to scaling a dynamic index as opposed to scaled immediates. /// MI should be a memory op that allows scaled addressing. -bool ARM64InstrInfo::isScaledAddr(const MachineInstr *MI) const { +bool AArch64InstrInfo::isScaledAddr(const MachineInstr *MI) const { switch (MI->getOpcode()) { default: break; - case ARM64::LDRBBroW: - case ARM64::LDRBroW: - case ARM64::LDRDroW: - case ARM64::LDRHHroW: - case ARM64::LDRHroW: - case ARM64::LDRQroW: - case ARM64::LDRSBWroW: - case ARM64::LDRSBXroW: - case ARM64::LDRSHWroW: - case ARM64::LDRSHXroW: - case ARM64::LDRSWroW: - case ARM64::LDRSroW: - case ARM64::LDRWroW: - case ARM64::LDRXroW: - case ARM64::STRBBroW: - case ARM64::STRBroW: - case ARM64::STRDroW: - case ARM64::STRHHroW: - case ARM64::STRHroW: - case ARM64::STRQroW: - case ARM64::STRSroW: - case ARM64::STRWroW: - case ARM64::STRXroW: - case ARM64::LDRBBroX: - case ARM64::LDRBroX: - case ARM64::LDRDroX: - case ARM64::LDRHHroX: - case ARM64::LDRHroX: - case ARM64::LDRQroX: - case ARM64::LDRSBWroX: - case ARM64::LDRSBXroX: - case ARM64::LDRSHWroX: - case ARM64::LDRSHXroX: - case ARM64::LDRSWroX: - case ARM64::LDRSroX: - case ARM64::LDRWroX: - case ARM64::LDRXroX: - case ARM64::STRBBroX: - case ARM64::STRBroX: - case ARM64::STRDroX: - case ARM64::STRHHroX: - case ARM64::STRHroX: - case ARM64::STRQroX: - case ARM64::STRSroX: - case ARM64::STRWroX: - case ARM64::STRXroX: + case AArch64::LDRBBroW: + case AArch64::LDRBroW: + case AArch64::LDRDroW: + case AArch64::LDRHHroW: + case AArch64::LDRHroW: + case AArch64::LDRQroW: + case AArch64::LDRSBWroW: + case AArch64::LDRSBXroW: + case AArch64::LDRSHWroW: + case AArch64::LDRSHXroW: + case AArch64::LDRSWroW: + case AArch64::LDRSroW: + case AArch64::LDRWroW: + case AArch64::LDRXroW: + case AArch64::STRBBroW: + case AArch64::STRBroW: + case AArch64::STRDroW: + case AArch64::STRHHroW: + case AArch64::STRHroW: + case AArch64::STRQroW: + case AArch64::STRSroW: + case AArch64::STRWroW: + case AArch64::STRXroW: + case AArch64::LDRBBroX: + case AArch64::LDRBroX: + case AArch64::LDRDroX: + case AArch64::LDRHHroX: + case AArch64::LDRHroX: + case AArch64::LDRQroX: + case AArch64::LDRSBWroX: + case AArch64::LDRSBXroX: + case AArch64::LDRSHWroX: + case AArch64::LDRSHXroX: + case AArch64::LDRSWroX: + case AArch64::LDRSroX: + case AArch64::LDRWroX: + case AArch64::LDRXroX: + case AArch64::STRBBroX: + case AArch64::STRBroX: + case AArch64::STRDroX: + case AArch64::STRHHroX: + case AArch64::STRHroX: + case AArch64::STRQroX: + case AArch64::STRSroX: + case AArch64::STRWroX: + case AArch64::STRXroX: unsigned Val = MI->getOperand(3).getImm(); - ARM64_AM::ShiftExtendType ExtType = ARM64_AM::getMemExtendType(Val); - return (ExtType != ARM64_AM::UXTX) || ARM64_AM::getMemDoShift(Val); + AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val); + return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val); } return false; } /// Check all MachineMemOperands for a hint to suppress pairing. -bool ARM64InstrInfo::isLdStPairSuppressed(const MachineInstr *MI) const { +bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr *MI) const { assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) && "Too many target MO flags"); for (auto *MM : MI->memoperands()) { @@ -1107,7 +1109,7 @@ bool ARM64InstrInfo::isLdStPairSuppressed(const MachineInstr *MI) const { } /// Set a flag on the first MachineMemOperand to suppress pairing. -void ARM64InstrInfo::suppressLdStPair(MachineInstr *MI) const { +void AArch64InstrInfo::suppressLdStPair(MachineInstr *MI) const { if (MI->memoperands_empty()) return; @@ -1117,22 +1119,23 @@ void ARM64InstrInfo::suppressLdStPair(MachineInstr *MI) const { ->setFlags(MOSuppressPair << MachineMemOperand::MOTargetStartBit); } -bool ARM64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, - unsigned &Offset, - const TargetRegisterInfo *TRI) const { +bool +AArch64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, + unsigned &Offset, + const TargetRegisterInfo *TRI) const { switch (LdSt->getOpcode()) { default: return false; - case ARM64::STRSui: - case ARM64::STRDui: - case ARM64::STRQui: - case ARM64::STRXui: - case ARM64::STRWui: - case ARM64::LDRSui: - case ARM64::LDRDui: - case ARM64::LDRQui: - case ARM64::LDRXui: - case ARM64::LDRWui: + case AArch64::STRSui: + case AArch64::STRDui: + case AArch64::STRQui: + case AArch64::STRXui: + case AArch64::STRWui: + case AArch64::LDRSui: + case AArch64::LDRDui: + case AArch64::LDRQui: + case AArch64::LDRXui: + case AArch64::LDRWui: if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm()) return false; BaseReg = LdSt->getOperand(1).getReg(); @@ -1146,9 +1149,9 @@ bool ARM64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, /// Detect opportunities for ldp/stp formation. /// /// Only called for LdSt for which getLdStBaseRegImmOfs returns true. -bool ARM64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt, - MachineInstr *SecondLdSt, - unsigned NumLoads) const { +bool AArch64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt, + MachineInstr *SecondLdSt, + unsigned NumLoads) const { // Only cluster up to a single pair. if (NumLoads > 1) return false; @@ -1164,33 +1167,33 @@ bool ARM64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt, return Ofs1 + 1 == Ofs2; } -bool ARM64InstrInfo::shouldScheduleAdjacent(MachineInstr *First, - MachineInstr *Second) const { +bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First, + MachineInstr *Second) const { // Cyclone can fuse CMN, CMP followed by Bcc. // FIXME: B0 can also fuse: // AND, BIC, ORN, ORR, or EOR (optional S) followed by Bcc or CBZ or CBNZ. - if (Second->getOpcode() != ARM64::Bcc) + if (Second->getOpcode() != AArch64::Bcc) return false; switch (First->getOpcode()) { default: return false; - case ARM64::SUBSWri: - case ARM64::ADDSWri: - case ARM64::ANDSWri: - case ARM64::SUBSXri: - case ARM64::ADDSXri: - case ARM64::ANDSXri: + case AArch64::SUBSWri: + case AArch64::ADDSWri: + case AArch64::ANDSWri: + case AArch64::SUBSXri: + case AArch64::ADDSXri: + case AArch64::ANDSXri: return true; } } -MachineInstr *ARM64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, - uint64_t Offset, - const MDNode *MDPtr, - DebugLoc DL) const { - MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM64::DBG_VALUE)) +MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, + uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const { + MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE)) .addFrameIndex(FrameIx) .addImm(0) .addImm(Offset) @@ -1217,12 +1220,10 @@ static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, return ((DestReg - SrcReg) & 0x1f) < NumRegs; } -void ARM64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - DebugLoc DL, unsigned DestReg, - unsigned SrcReg, bool KillSrc, - unsigned Opcode, - llvm::ArrayRef Indices) const { +void AArch64InstrInfo::copyPhysRegTuple( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, + llvm::ArrayRef Indices) const { assert(getSubTarget().hasNEON() && "Unexpected register copy without NEON"); const TargetRegisterInfo *TRI = &getRegisterInfo(); @@ -1245,258 +1246,263 @@ void ARM64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB, } } -void ARM64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { - if (ARM64::GPR32spRegClass.contains(DestReg) && - (ARM64::GPR32spRegClass.contains(SrcReg) || SrcReg == ARM64::WZR)) { +void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + if (AArch64::GPR32spRegClass.contains(DestReg) && + (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) { const TargetRegisterInfo *TRI = &getRegisterInfo(); - if (DestReg == ARM64::WSP || SrcReg == ARM64::WSP) { + if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { // If either operand is WSP, expand to ADD #0. if (Subtarget.hasZeroCycleRegMove()) { // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move. - unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, ARM64::sub_32, - &ARM64::GPR64spRegClass); - unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, ARM64::sub_32, - &ARM64::GPR64spRegClass); + unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); + unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); // This instruction is reading and writing X registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegX, but a proper // value from SrcReg. - BuildMI(MBB, I, DL, get(ARM64::ADDXri), DestRegX) + BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX) .addReg(SrcRegX, RegState::Undef) .addImm(0) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0)) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); } else { - BuildMI(MBB, I, DL, get(ARM64::ADDWri), DestReg) + BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)) .addImm(0) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0)); + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); } - } else if (SrcReg == ARM64::WZR && Subtarget.hasZeroCycleZeroing()) { - BuildMI(MBB, I, DL, get(ARM64::MOVZWi), DestReg).addImm(0).addImm( - ARM64_AM::getShifterImm(ARM64_AM::LSL, 0)); + } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) { + BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm( + AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); } else { if (Subtarget.hasZeroCycleRegMove()) { // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. - unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, ARM64::sub_32, - &ARM64::GPR64spRegClass); - unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, ARM64::sub_32, - &ARM64::GPR64spRegClass); + unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); + unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); // This instruction is reading and writing X registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegX, but a proper // value from SrcReg. - BuildMI(MBB, I, DL, get(ARM64::ORRXrr), DestRegX) - .addReg(ARM64::XZR) + BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX) + .addReg(AArch64::XZR) .addReg(SrcRegX, RegState::Undef) .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); } else { // Otherwise, expand to ORR WZR. - BuildMI(MBB, I, DL, get(ARM64::ORRWrr), DestReg) - .addReg(ARM64::WZR) + BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg) + .addReg(AArch64::WZR) .addReg(SrcReg, getKillRegState(KillSrc)); } } return; } - if (ARM64::GPR64spRegClass.contains(DestReg) && - (ARM64::GPR64spRegClass.contains(SrcReg) || SrcReg == ARM64::XZR)) { - if (DestReg == ARM64::SP || SrcReg == ARM64::SP) { + if (AArch64::GPR64spRegClass.contains(DestReg) && + (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) { + if (DestReg == AArch64::SP || SrcReg == AArch64::SP) { // If either operand is SP, expand to ADD #0. - BuildMI(MBB, I, DL, get(ARM64::ADDXri), DestReg) + BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)) .addImm(0) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0)); - } else if (SrcReg == ARM64::XZR && Subtarget.hasZeroCycleZeroing()) { - BuildMI(MBB, I, DL, get(ARM64::MOVZXi), DestReg).addImm(0).addImm( - ARM64_AM::getShifterImm(ARM64_AM::LSL, 0)); + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); + } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) { + BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm( + AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); } else { // Otherwise, expand to ORR XZR. - BuildMI(MBB, I, DL, get(ARM64::ORRXrr), DestReg) - .addReg(ARM64::XZR) + BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg) + .addReg(AArch64::XZR) .addReg(SrcReg, getKillRegState(KillSrc)); } return; } // Copy a DDDD register quad by copying the individual sub-registers. - if (ARM64::DDDDRegClass.contains(DestReg) && - ARM64::DDDDRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { ARM64::dsub0, ARM64::dsub1, - ARM64::dsub2, ARM64::dsub3 }; - copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv8i8, + if (AArch64::DDDDRegClass.contains(DestReg) && + AArch64::DDDDRegClass.contains(SrcReg)) { + static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1, + AArch64::dsub2, AArch64::dsub3 }; + copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, Indices); return; } // Copy a DDD register triple by copying the individual sub-registers. - if (ARM64::DDDRegClass.contains(DestReg) && - ARM64::DDDRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { ARM64::dsub0, ARM64::dsub1, - ARM64::dsub2 }; - copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv8i8, + if (AArch64::DDDRegClass.contains(DestReg) && + AArch64::DDDRegClass.contains(SrcReg)) { + static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1, + AArch64::dsub2 }; + copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, Indices); return; } // Copy a DD register pair by copying the individual sub-registers. - if (ARM64::DDRegClass.contains(DestReg) && - ARM64::DDRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { ARM64::dsub0, ARM64::dsub1 }; - copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv8i8, + if (AArch64::DDRegClass.contains(DestReg) && + AArch64::DDRegClass.contains(SrcReg)) { + static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 }; + copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, Indices); return; } // Copy a QQQQ register quad by copying the individual sub-registers. - if (ARM64::QQQQRegClass.contains(DestReg) && - ARM64::QQQQRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { ARM64::qsub0, ARM64::qsub1, - ARM64::qsub2, ARM64::qsub3 }; - copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv16i8, + if (AArch64::QQQQRegClass.contains(DestReg) && + AArch64::QQQQRegClass.contains(SrcReg)) { + static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1, + AArch64::qsub2, AArch64::qsub3 }; + copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, Indices); return; } // Copy a QQQ register triple by copying the individual sub-registers. - if (ARM64::QQQRegClass.contains(DestReg) && - ARM64::QQQRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { ARM64::qsub0, ARM64::qsub1, - ARM64::qsub2 }; - copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv16i8, + if (AArch64::QQQRegClass.contains(DestReg) && + AArch64::QQQRegClass.contains(SrcReg)) { + static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1, + AArch64::qsub2 }; + copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, Indices); return; } // Copy a QQ register pair by copying the individual sub-registers. - if (ARM64::QQRegClass.contains(DestReg) && - ARM64::QQRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { ARM64::qsub0, ARM64::qsub1 }; - copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv16i8, + if (AArch64::QQRegClass.contains(DestReg) && + AArch64::QQRegClass.contains(SrcReg)) { + static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 }; + copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, Indices); return; } - if (ARM64::FPR128RegClass.contains(DestReg) && - ARM64::FPR128RegClass.contains(SrcReg)) { + if (AArch64::FPR128RegClass.contains(DestReg) && + AArch64::FPR128RegClass.contains(SrcReg)) { if(getSubTarget().hasNEON()) { - BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg( - SrcReg, getKillRegState(KillSrc)); + BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) + .addReg(SrcReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } else { - BuildMI(MBB, I, DL, get(ARM64::STRQpre)) - .addReg(ARM64::SP, RegState::Define) + BuildMI(MBB, I, DL, get(AArch64::STRQpre)) + .addReg(AArch64::SP, RegState::Define) .addReg(SrcReg, getKillRegState(KillSrc)) - .addReg(ARM64::SP) + .addReg(AArch64::SP) .addImm(-16); - BuildMI(MBB, I, DL, get(ARM64::LDRQpre)) - .addReg(ARM64::SP, RegState::Define) + BuildMI(MBB, I, DL, get(AArch64::LDRQpre)) + .addReg(AArch64::SP, RegState::Define) .addReg(DestReg, RegState::Define) - .addReg(ARM64::SP) + .addReg(AArch64::SP) .addImm(16); } return; } - if (ARM64::FPR64RegClass.contains(DestReg) && - ARM64::FPR64RegClass.contains(SrcReg)) { + if (AArch64::FPR64RegClass.contains(DestReg) && + AArch64::FPR64RegClass.contains(SrcReg)) { if(getSubTarget().hasNEON()) { - DestReg = - RI.getMatchingSuperReg(DestReg, ARM64::dsub, &ARM64::FPR128RegClass); - SrcReg = - RI.getMatchingSuperReg(SrcReg, ARM64::dsub, &ARM64::FPR128RegClass); - BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg( - SrcReg, getKillRegState(KillSrc)); + DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub, + &AArch64::FPR128RegClass); + SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub, + &AArch64::FPR128RegClass); + BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) + .addReg(SrcReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } else { - BuildMI(MBB, I, DL, get(ARM64::FMOVDr), DestReg) + BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); } return; } - if (ARM64::FPR32RegClass.contains(DestReg) && - ARM64::FPR32RegClass.contains(SrcReg)) { + if (AArch64::FPR32RegClass.contains(DestReg) && + AArch64::FPR32RegClass.contains(SrcReg)) { if(getSubTarget().hasNEON()) { - DestReg = - RI.getMatchingSuperReg(DestReg, ARM64::ssub, &ARM64::FPR128RegClass); - SrcReg = - RI.getMatchingSuperReg(SrcReg, ARM64::ssub, &ARM64::FPR128RegClass); - BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg( - SrcReg, getKillRegState(KillSrc)); + DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub, + &AArch64::FPR128RegClass); + SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, + &AArch64::FPR128RegClass); + BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) + .addReg(SrcReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } else { - BuildMI(MBB, I, DL, get(ARM64::FMOVSr), DestReg) + BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); } return; } - if (ARM64::FPR16RegClass.contains(DestReg) && - ARM64::FPR16RegClass.contains(SrcReg)) { + if (AArch64::FPR16RegClass.contains(DestReg) && + AArch64::FPR16RegClass.contains(SrcReg)) { if(getSubTarget().hasNEON()) { - DestReg = - RI.getMatchingSuperReg(DestReg, ARM64::hsub, &ARM64::FPR128RegClass); - SrcReg = - RI.getMatchingSuperReg(SrcReg, ARM64::hsub, &ARM64::FPR128RegClass); - BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg( - SrcReg, getKillRegState(KillSrc)); + DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, + &AArch64::FPR128RegClass); + SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, + &AArch64::FPR128RegClass); + BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) + .addReg(SrcReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } else { - DestReg = - RI.getMatchingSuperReg(DestReg, ARM64::hsub, &ARM64::FPR32RegClass); - SrcReg = - RI.getMatchingSuperReg(SrcReg, ARM64::hsub, &ARM64::FPR32RegClass); - BuildMI(MBB, I, DL, get(ARM64::FMOVSr), DestReg) + DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, + &AArch64::FPR32RegClass); + SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, + &AArch64::FPR32RegClass); + BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); } return; } - if (ARM64::FPR8RegClass.contains(DestReg) && - ARM64::FPR8RegClass.contains(SrcReg)) { + if (AArch64::FPR8RegClass.contains(DestReg) && + AArch64::FPR8RegClass.contains(SrcReg)) { if(getSubTarget().hasNEON()) { - DestReg = - RI.getMatchingSuperReg(DestReg, ARM64::bsub, &ARM64::FPR128RegClass); - SrcReg = - RI.getMatchingSuperReg(SrcReg, ARM64::bsub, &ARM64::FPR128RegClass); - BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg( - SrcReg, getKillRegState(KillSrc)); + DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, + &AArch64::FPR128RegClass); + SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, + &AArch64::FPR128RegClass); + BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) + .addReg(SrcReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } else { - DestReg = - RI.getMatchingSuperReg(DestReg, ARM64::bsub, &ARM64::FPR32RegClass); - SrcReg = - RI.getMatchingSuperReg(SrcReg, ARM64::bsub, &ARM64::FPR32RegClass); - BuildMI(MBB, I, DL, get(ARM64::FMOVSr), DestReg) + DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, + &AArch64::FPR32RegClass); + SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, + &AArch64::FPR32RegClass); + BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); } return; } // Copies between GPR64 and FPR64. - if (ARM64::FPR64RegClass.contains(DestReg) && - ARM64::GPR64RegClass.contains(SrcReg)) { - BuildMI(MBB, I, DL, get(ARM64::FMOVXDr), DestReg) + if (AArch64::FPR64RegClass.contains(DestReg) && + AArch64::GPR64RegClass.contains(SrcReg)) { + BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); return; } - if (ARM64::GPR64RegClass.contains(DestReg) && - ARM64::FPR64RegClass.contains(SrcReg)) { - BuildMI(MBB, I, DL, get(ARM64::FMOVDXr), DestReg) + if (AArch64::GPR64RegClass.contains(DestReg) && + AArch64::FPR64RegClass.contains(SrcReg)) { + BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); return; } // Copies between GPR32 and FPR32. - if (ARM64::FPR32RegClass.contains(DestReg) && - ARM64::GPR32RegClass.contains(SrcReg)) { - BuildMI(MBB, I, DL, get(ARM64::FMOVWSr), DestReg) + if (AArch64::FPR32RegClass.contains(DestReg) && + AArch64::GPR32RegClass.contains(SrcReg)) { + BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); return; } - if (ARM64::GPR32RegClass.contains(DestReg) && - ARM64::FPR32RegClass.contains(SrcReg)) { - BuildMI(MBB, I, DL, get(ARM64::FMOVSWr), DestReg) + if (AArch64::GPR32RegClass.contains(DestReg) && + AArch64::FPR32RegClass.contains(SrcReg)) { + BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); return; } @@ -1504,11 +1510,10 @@ void ARM64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, assert(0 && "unimplemented reg-to-reg copy"); } -void ARM64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void AArch64InstrInfo::storeRegToStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, + bool isKill, int FI, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); @@ -1523,72 +1528,72 @@ void ARM64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, bool Offset = true; switch (RC->getSize()) { case 1: - if (ARM64::FPR8RegClass.hasSubClassEq(RC)) - Opc = ARM64::STRBui; + if (AArch64::FPR8RegClass.hasSubClassEq(RC)) + Opc = AArch64::STRBui; break; case 2: - if (ARM64::FPR16RegClass.hasSubClassEq(RC)) - Opc = ARM64::STRHui; + if (AArch64::FPR16RegClass.hasSubClassEq(RC)) + Opc = AArch64::STRHui; break; case 4: - if (ARM64::GPR32allRegClass.hasSubClassEq(RC)) { - Opc = ARM64::STRWui; + if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { + Opc = AArch64::STRWui; if (TargetRegisterInfo::isVirtualRegister(SrcReg)) - MF.getRegInfo().constrainRegClass(SrcReg, &ARM64::GPR32RegClass); + MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass); else - assert(SrcReg != ARM64::WSP); - } else if (ARM64::FPR32RegClass.hasSubClassEq(RC)) - Opc = ARM64::STRSui; + assert(SrcReg != AArch64::WSP); + } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) + Opc = AArch64::STRSui; break; case 8: - if (ARM64::GPR64allRegClass.hasSubClassEq(RC)) { - Opc = ARM64::STRXui; + if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { + Opc = AArch64::STRXui; if (TargetRegisterInfo::isVirtualRegister(SrcReg)) - MF.getRegInfo().constrainRegClass(SrcReg, &ARM64::GPR64RegClass); + MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); else - assert(SrcReg != ARM64::SP); - } else if (ARM64::FPR64RegClass.hasSubClassEq(RC)) - Opc = ARM64::STRDui; + assert(SrcReg != AArch64::SP); + } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) + Opc = AArch64::STRDui; break; case 16: - if (ARM64::FPR128RegClass.hasSubClassEq(RC)) - Opc = ARM64::STRQui; - else if (ARM64::DDRegClass.hasSubClassEq(RC)) { + if (AArch64::FPR128RegClass.hasSubClassEq(RC)) + Opc = AArch64::STRQui; + else if (AArch64::DDRegClass.hasSubClassEq(RC)) { assert(getSubTarget().hasNEON() && "Unexpected register store without NEON"); - Opc = ARM64::ST1Twov1d, Offset = false; + Opc = AArch64::ST1Twov1d, Offset = false; } break; case 24: - if (ARM64::DDDRegClass.hasSubClassEq(RC)) { + if (AArch64::DDDRegClass.hasSubClassEq(RC)) { assert(getSubTarget().hasNEON() && "Unexpected register store without NEON"); - Opc = ARM64::ST1Threev1d, Offset = false; + Opc = AArch64::ST1Threev1d, Offset = false; } break; case 32: - if (ARM64::DDDDRegClass.hasSubClassEq(RC)) { + if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { assert(getSubTarget().hasNEON() && "Unexpected register store without NEON"); - Opc = ARM64::ST1Fourv1d, Offset = false; - } else if (ARM64::QQRegClass.hasSubClassEq(RC)) { + Opc = AArch64::ST1Fourv1d, Offset = false; + } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { assert(getSubTarget().hasNEON() && "Unexpected register store without NEON"); - Opc = ARM64::ST1Twov2d, Offset = false; + Opc = AArch64::ST1Twov2d, Offset = false; } break; case 48: - if (ARM64::QQQRegClass.hasSubClassEq(RC)) { + if (AArch64::QQQRegClass.hasSubClassEq(RC)) { assert(getSubTarget().hasNEON() && "Unexpected register store without NEON"); - Opc = ARM64::ST1Threev2d, Offset = false; + Opc = AArch64::ST1Threev2d, Offset = false; } break; case 64: - if (ARM64::QQQQRegClass.hasSubClassEq(RC)) { + if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { assert(getSubTarget().hasNEON() && "Unexpected register store without NEON"); - Opc = ARM64::ST1Fourv2d, Offset = false; + Opc = AArch64::ST1Fourv2d, Offset = false; } break; } @@ -1603,11 +1608,10 @@ void ARM64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MI.addMemOperand(MMO); } -void ARM64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void AArch64InstrInfo::loadRegFromStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, + int FI, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); @@ -1622,72 +1626,72 @@ void ARM64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, bool Offset = true; switch (RC->getSize()) { case 1: - if (ARM64::FPR8RegClass.hasSubClassEq(RC)) - Opc = ARM64::LDRBui; + if (AArch64::FPR8RegClass.hasSubClassEq(RC)) + Opc = AArch64::LDRBui; break; case 2: - if (ARM64::FPR16RegClass.hasSubClassEq(RC)) - Opc = ARM64::LDRHui; + if (AArch64::FPR16RegClass.hasSubClassEq(RC)) + Opc = AArch64::LDRHui; break; case 4: - if (ARM64::GPR32allRegClass.hasSubClassEq(RC)) { - Opc = ARM64::LDRWui; + if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { + Opc = AArch64::LDRWui; if (TargetRegisterInfo::isVirtualRegister(DestReg)) - MF.getRegInfo().constrainRegClass(DestReg, &ARM64::GPR32RegClass); + MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass); else - assert(DestReg != ARM64::WSP); - } else if (ARM64::FPR32RegClass.hasSubClassEq(RC)) - Opc = ARM64::LDRSui; + assert(DestReg != AArch64::WSP); + } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) + Opc = AArch64::LDRSui; break; case 8: - if (ARM64::GPR64allRegClass.hasSubClassEq(RC)) { - Opc = ARM64::LDRXui; + if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { + Opc = AArch64::LDRXui; if (TargetRegisterInfo::isVirtualRegister(DestReg)) - MF.getRegInfo().constrainRegClass(DestReg, &ARM64::GPR64RegClass); + MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass); else - assert(DestReg != ARM64::SP); - } else if (ARM64::FPR64RegClass.hasSubClassEq(RC)) - Opc = ARM64::LDRDui; + assert(DestReg != AArch64::SP); + } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) + Opc = AArch64::LDRDui; break; case 16: - if (ARM64::FPR128RegClass.hasSubClassEq(RC)) - Opc = ARM64::LDRQui; - else if (ARM64::DDRegClass.hasSubClassEq(RC)) { + if (AArch64::FPR128RegClass.hasSubClassEq(RC)) + Opc = AArch64::LDRQui; + else if (AArch64::DDRegClass.hasSubClassEq(RC)) { assert(getSubTarget().hasNEON() && "Unexpected register load without NEON"); - Opc = ARM64::LD1Twov1d, Offset = false; + Opc = AArch64::LD1Twov1d, Offset = false; } break; case 24: - if (ARM64::DDDRegClass.hasSubClassEq(RC)) { + if (AArch64::DDDRegClass.hasSubClassEq(RC)) { assert(getSubTarget().hasNEON() && "Unexpected register load without NEON"); - Opc = ARM64::LD1Threev1d, Offset = false; + Opc = AArch64::LD1Threev1d, Offset = false; } break; case 32: - if (ARM64::DDDDRegClass.hasSubClassEq(RC)) { + if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { assert(getSubTarget().hasNEON() && "Unexpected register load without NEON"); - Opc = ARM64::LD1Fourv1d, Offset = false; - } else if (ARM64::QQRegClass.hasSubClassEq(RC)) { + Opc = AArch64::LD1Fourv1d, Offset = false; + } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { assert(getSubTarget().hasNEON() && "Unexpected register load without NEON"); - Opc = ARM64::LD1Twov2d, Offset = false; + Opc = AArch64::LD1Twov2d, Offset = false; } break; case 48: - if (ARM64::QQQRegClass.hasSubClassEq(RC)) { + if (AArch64::QQQRegClass.hasSubClassEq(RC)) { assert(getSubTarget().hasNEON() && "Unexpected register load without NEON"); - Opc = ARM64::LD1Threev2d, Offset = false; + Opc = AArch64::LD1Threev2d, Offset = false; } break; case 64: - if (ARM64::QQQQRegClass.hasSubClassEq(RC)) { + if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { assert(getSubTarget().hasNEON() && "Unexpected register load without NEON"); - Opc = ARM64::LD1Fourv2d, Offset = false; + Opc = AArch64::LD1Fourv2d, Offset = false; } break; } @@ -1704,8 +1708,8 @@ void ARM64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, void llvm::emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, int Offset, - const ARM64InstrInfo *TII, MachineInstr::MIFlag Flag, - bool SetNZCV) { + const AArch64InstrInfo *TII, + MachineInstr::MIFlag Flag, bool SetNZCV) { if (DestReg == SrcReg && Offset == 0) return; @@ -1726,9 +1730,9 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB, unsigned Opc; if (SetNZCV) - Opc = isSub ? ARM64::SUBSXri : ARM64::ADDSXri; + Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri; else - Opc = isSub ? ARM64::SUBXri : ARM64::ADDXri; + Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri; const unsigned MaxEncoding = 0xfff; const unsigned ShiftSize = 12; const unsigned MaxEncodableValue = MaxEncoding << ShiftSize; @@ -1744,7 +1748,7 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) .addReg(SrcReg) .addImm(ThisVal >> ShiftSize) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftSize)) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize)) .setMIFlag(Flag); SrcReg = DestReg; @@ -1755,14 +1759,14 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) .addReg(SrcReg) .addImm(Offset) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0)) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) .setMIFlag(Flag); } MachineInstr * -ARM64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl &Ops, - int FrameIndex) const { +AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + const SmallVectorImpl &Ops, + int FrameIndex) const { // This is a bit of a hack. Consider this instruction: // // %vreg0 = COPY %SP; GPR64all:%vreg0 @@ -1779,12 +1783,14 @@ ARM64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, if (MI->isCopy()) { unsigned DstReg = MI->getOperand(0).getReg(); unsigned SrcReg = MI->getOperand(1).getReg(); - if (SrcReg == ARM64::SP && TargetRegisterInfo::isVirtualRegister(DstReg)) { - MF.getRegInfo().constrainRegClass(DstReg, &ARM64::GPR64RegClass); + if (SrcReg == AArch64::SP && + TargetRegisterInfo::isVirtualRegister(DstReg)) { + MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass); return nullptr; } - if (DstReg == ARM64::SP && TargetRegisterInfo::isVirtualRegister(SrcReg)) { - MF.getRegInfo().constrainRegClass(SrcReg, &ARM64::GPR64RegClass); + if (DstReg == AArch64::SP && + TargetRegisterInfo::isVirtualRegister(SrcReg)) { + MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); return nullptr; } } @@ -1793,10 +1799,10 @@ ARM64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, return nullptr; } -int llvm::isARM64FrameOffsetLegal(const MachineInstr &MI, int &Offset, - bool *OutUseUnscaledOp, - unsigned *OutUnscaledOp, - int *EmittableOffset) { +int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, + bool *OutUseUnscaledOp, + unsigned *OutUnscaledOp, + int *EmittableOffset) { int Scale = 1; bool IsSigned = false; // The ImmIdx should be changed case by case if it is not 2. @@ -1811,162 +1817,162 @@ int llvm::isARM64FrameOffsetLegal(const MachineInstr &MI, int &Offset, *OutUnscaledOp = 0; switch (MI.getOpcode()) { default: - assert(0 && "unhandled opcode in rewriteARM64FrameIndex"); + assert(0 && "unhandled opcode in rewriteAArch64FrameIndex"); // Vector spills/fills can't take an immediate offset. - case ARM64::LD1Twov2d: - case ARM64::LD1Threev2d: - case ARM64::LD1Fourv2d: - case ARM64::LD1Twov1d: - case ARM64::LD1Threev1d: - case ARM64::LD1Fourv1d: - case ARM64::ST1Twov2d: - case ARM64::ST1Threev2d: - case ARM64::ST1Fourv2d: - case ARM64::ST1Twov1d: - case ARM64::ST1Threev1d: - case ARM64::ST1Fourv1d: - return ARM64FrameOffsetCannotUpdate; - case ARM64::PRFMui: + case AArch64::LD1Twov2d: + case AArch64::LD1Threev2d: + case AArch64::LD1Fourv2d: + case AArch64::LD1Twov1d: + case AArch64::LD1Threev1d: + case AArch64::LD1Fourv1d: + case AArch64::ST1Twov2d: + case AArch64::ST1Threev2d: + case AArch64::ST1Fourv2d: + case AArch64::ST1Twov1d: + case AArch64::ST1Threev1d: + case AArch64::ST1Fourv1d: + return AArch64FrameOffsetCannotUpdate; + case AArch64::PRFMui: Scale = 8; - UnscaledOp = ARM64::PRFUMi; + UnscaledOp = AArch64::PRFUMi; break; - case ARM64::LDRXui: + case AArch64::LDRXui: Scale = 8; - UnscaledOp = ARM64::LDURXi; + UnscaledOp = AArch64::LDURXi; break; - case ARM64::LDRWui: + case AArch64::LDRWui: Scale = 4; - UnscaledOp = ARM64::LDURWi; + UnscaledOp = AArch64::LDURWi; break; - case ARM64::LDRBui: + case AArch64::LDRBui: Scale = 1; - UnscaledOp = ARM64::LDURBi; + UnscaledOp = AArch64::LDURBi; break; - case ARM64::LDRHui: + case AArch64::LDRHui: Scale = 2; - UnscaledOp = ARM64::LDURHi; + UnscaledOp = AArch64::LDURHi; break; - case ARM64::LDRSui: + case AArch64::LDRSui: Scale = 4; - UnscaledOp = ARM64::LDURSi; + UnscaledOp = AArch64::LDURSi; break; - case ARM64::LDRDui: + case AArch64::LDRDui: Scale = 8; - UnscaledOp = ARM64::LDURDi; + UnscaledOp = AArch64::LDURDi; break; - case ARM64::LDRQui: + case AArch64::LDRQui: Scale = 16; - UnscaledOp = ARM64::LDURQi; + UnscaledOp = AArch64::LDURQi; break; - case ARM64::LDRBBui: + case AArch64::LDRBBui: Scale = 1; - UnscaledOp = ARM64::LDURBBi; + UnscaledOp = AArch64::LDURBBi; break; - case ARM64::LDRHHui: + case AArch64::LDRHHui: Scale = 2; - UnscaledOp = ARM64::LDURHHi; + UnscaledOp = AArch64::LDURHHi; break; - case ARM64::LDRSBXui: + case AArch64::LDRSBXui: Scale = 1; - UnscaledOp = ARM64::LDURSBXi; + UnscaledOp = AArch64::LDURSBXi; break; - case ARM64::LDRSBWui: + case AArch64::LDRSBWui: Scale = 1; - UnscaledOp = ARM64::LDURSBWi; + UnscaledOp = AArch64::LDURSBWi; break; - case ARM64::LDRSHXui: + case AArch64::LDRSHXui: Scale = 2; - UnscaledOp = ARM64::LDURSHXi; + UnscaledOp = AArch64::LDURSHXi; break; - case ARM64::LDRSHWui: + case AArch64::LDRSHWui: Scale = 2; - UnscaledOp = ARM64::LDURSHWi; + UnscaledOp = AArch64::LDURSHWi; break; - case ARM64::LDRSWui: + case AArch64::LDRSWui: Scale = 4; - UnscaledOp = ARM64::LDURSWi; + UnscaledOp = AArch64::LDURSWi; break; - case ARM64::STRXui: + case AArch64::STRXui: Scale = 8; - UnscaledOp = ARM64::STURXi; + UnscaledOp = AArch64::STURXi; break; - case ARM64::STRWui: + case AArch64::STRWui: Scale = 4; - UnscaledOp = ARM64::STURWi; + UnscaledOp = AArch64::STURWi; break; - case ARM64::STRBui: + case AArch64::STRBui: Scale = 1; - UnscaledOp = ARM64::STURBi; + UnscaledOp = AArch64::STURBi; break; - case ARM64::STRHui: + case AArch64::STRHui: Scale = 2; - UnscaledOp = ARM64::STURHi; + UnscaledOp = AArch64::STURHi; break; - case ARM64::STRSui: + case AArch64::STRSui: Scale = 4; - UnscaledOp = ARM64::STURSi; + UnscaledOp = AArch64::STURSi; break; - case ARM64::STRDui: + case AArch64::STRDui: Scale = 8; - UnscaledOp = ARM64::STURDi; + UnscaledOp = AArch64::STURDi; break; - case ARM64::STRQui: + case AArch64::STRQui: Scale = 16; - UnscaledOp = ARM64::STURQi; + UnscaledOp = AArch64::STURQi; break; - case ARM64::STRBBui: + case AArch64::STRBBui: Scale = 1; - UnscaledOp = ARM64::STURBBi; + UnscaledOp = AArch64::STURBBi; break; - case ARM64::STRHHui: + case AArch64::STRHHui: Scale = 2; - UnscaledOp = ARM64::STURHHi; + UnscaledOp = AArch64::STURHHi; break; - case ARM64::LDPXi: - case ARM64::LDPDi: - case ARM64::STPXi: - case ARM64::STPDi: + case AArch64::LDPXi: + case AArch64::LDPDi: + case AArch64::STPXi: + case AArch64::STPDi: IsSigned = true; Scale = 8; break; - case ARM64::LDPQi: - case ARM64::STPQi: + case AArch64::LDPQi: + case AArch64::STPQi: IsSigned = true; Scale = 16; break; - case ARM64::LDPWi: - case ARM64::LDPSi: - case ARM64::STPWi: - case ARM64::STPSi: + case AArch64::LDPWi: + case AArch64::LDPSi: + case AArch64::STPWi: + case AArch64::STPSi: IsSigned = true; Scale = 4; break; - case ARM64::LDURXi: - case ARM64::LDURWi: - case ARM64::LDURBi: - case ARM64::LDURHi: - case ARM64::LDURSi: - case ARM64::LDURDi: - case ARM64::LDURQi: - case ARM64::LDURHHi: - case ARM64::LDURBBi: - case ARM64::LDURSBXi: - case ARM64::LDURSBWi: - case ARM64::LDURSHXi: - case ARM64::LDURSHWi: - case ARM64::LDURSWi: - case ARM64::STURXi: - case ARM64::STURWi: - case ARM64::STURBi: - case ARM64::STURHi: - case ARM64::STURSi: - case ARM64::STURDi: - case ARM64::STURQi: - case ARM64::STURBBi: - case ARM64::STURHHi: + case AArch64::LDURXi: + case AArch64::LDURWi: + case AArch64::LDURBi: + case AArch64::LDURHi: + case AArch64::LDURSi: + case AArch64::LDURDi: + case AArch64::LDURQi: + case AArch64::LDURHHi: + case AArch64::LDURBBi: + case AArch64::LDURSBXi: + case AArch64::LDURSBWi: + case AArch64::LDURSHXi: + case AArch64::LDURSHWi: + case AArch64::LDURSWi: + case AArch64::STURXi: + case AArch64::STURWi: + case AArch64::STURBi: + case AArch64::STURHi: + case AArch64::STURSi: + case AArch64::STURDi: + case AArch64::STURQi: + case AArch64::STURBBi: + case AArch64::STURHHi: Scale = 1; break; } @@ -2014,21 +2020,21 @@ int llvm::isARM64FrameOffsetLegal(const MachineInstr &MI, int &Offset, *OutUseUnscaledOp = useUnscaledOp; if (OutUnscaledOp) *OutUnscaledOp = UnscaledOp; - return ARM64FrameOffsetCanUpdate | - (Offset == 0 ? ARM64FrameOffsetIsLegal : 0); + return AArch64FrameOffsetCanUpdate | + (Offset == 0 ? AArch64FrameOffsetIsLegal : 0); } -bool llvm::rewriteARM64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned FrameReg, int &Offset, - const ARM64InstrInfo *TII) { +bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, + unsigned FrameReg, int &Offset, + const AArch64InstrInfo *TII) { unsigned Opcode = MI.getOpcode(); unsigned ImmIdx = FrameRegIdx + 1; - if (Opcode == ARM64::ADDSXri || Opcode == ARM64::ADDXri) { + if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { Offset += MI.getOperand(ImmIdx).getImm(); emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), MI.getOperand(0).getReg(), FrameReg, Offset, TII, - MachineInstr::NoFlags, (Opcode == ARM64::ADDSXri)); + MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); MI.eraseFromParent(); Offset = 0; return true; @@ -2037,10 +2043,10 @@ bool llvm::rewriteARM64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, int NewOffset; unsigned UnscaledOp; bool UseUnscaledOp; - int Status = isARM64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, &UnscaledOp, - &NewOffset); - if (Status & ARM64FrameOffsetCanUpdate) { - if (Status & ARM64FrameOffsetIsLegal) + int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, + &UnscaledOp, &NewOffset); + if (Status & AArch64FrameOffsetCanUpdate) { + if (Status & AArch64FrameOffsetIsLegal) // Replace the FrameIndex with FrameReg. MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); if (UseUnscaledOp) @@ -2053,7 +2059,7 @@ bool llvm::rewriteARM64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, return false; } -void ARM64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { - NopInst.setOpcode(ARM64::HINT); +void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { + NopInst.setOpcode(AArch64::HINT); NopInst.addOperand(MCOperand::CreateImm(0)); } diff --git a/lib/Target/ARM64/ARM64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h similarity index 81% rename from lib/Target/ARM64/ARM64InstrInfo.h rename to lib/Target/AArch64/AArch64InstrInfo.h index ce195e763b2b..90ce75f26d42 100644 --- a/lib/Target/ARM64/ARM64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -1,4 +1,4 @@ -//===- ARM64InstrInfo.h - ARM64 Instruction Information ---------*- C++ -*-===// +//===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,44 +7,44 @@ // //===----------------------------------------------------------------------===// // -// This file contains the ARM64 implementation of the TargetInstrInfo class. +// This file contains the AArch64 implementation of the TargetInstrInfo class. // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_ARM64INSTRINFO_H -#define LLVM_TARGET_ARM64INSTRINFO_H +#ifndef LLVM_TARGET_AArch64INSTRINFO_H +#define LLVM_TARGET_AArch64INSTRINFO_H -#include "ARM64.h" -#include "ARM64RegisterInfo.h" +#include "AArch64.h" +#include "AArch64RegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER -#include "ARM64GenInstrInfo.inc" +#include "AArch64GenInstrInfo.inc" namespace llvm { -class ARM64Subtarget; -class ARM64TargetMachine; +class AArch64Subtarget; +class AArch64TargetMachine; -class ARM64InstrInfo : public ARM64GenInstrInfo { +class AArch64InstrInfo : public AArch64GenInstrInfo { // Reserve bits in the MachineMemOperand target hint flags, starting at 1. // They will be shifted into MOTargetHintStart when accessed. enum TargetMemOperandFlags { MOSuppressPair = 1 }; - const ARM64RegisterInfo RI; - const ARM64Subtarget &Subtarget; + const AArch64RegisterInfo RI; + const AArch64Subtarget &Subtarget; public: - explicit ARM64InstrInfo(const ARM64Subtarget &STI); + explicit AArch64InstrInfo(const AArch64Subtarget &STI); /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). - const ARM64RegisterInfo &getRegisterInfo() const { return RI; } + const AArch64RegisterInfo &getRegisterInfo() const { return RI; } - const ARM64Subtarget &getSubTarget() const { return Subtarget; } + const AArch64Subtarget &getSubTarget() const { return Subtarget; } unsigned GetInstSizeInBytes(const MachineInstr *MI) const; @@ -60,8 +60,8 @@ class ARM64InstrInfo : public ARM64GenInstrInfo { /// is non-zero. bool hasShiftedReg(const MachineInstr *MI) const; - /// Returns true if there is an extendable register and that the extending value - /// is non-zero. + /// Returns true if there is an extendable register and that the extending + /// value is non-zero. bool hasExtendedReg(const MachineInstr *MI) const; /// \brief Does this instruction set its full destination register to zero? @@ -168,63 +168,63 @@ class ARM64InstrInfo : public ARM64GenInstrInfo { /// if necessary, to be replaced by the scavenger at the end of PEI. void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, int Offset, - const ARM64InstrInfo *TII, + const AArch64InstrInfo *TII, MachineInstr::MIFlag = MachineInstr::NoFlags, bool SetNZCV = false); -/// rewriteARM64FrameIndex - Rewrite MI to access 'Offset' bytes from the +/// rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the /// FP. Return false if the offset could not be handled directly in MI, and /// return the left-over portion by reference. -bool rewriteARM64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, +bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, - const ARM64InstrInfo *TII); + const AArch64InstrInfo *TII); -/// \brief Use to report the frame offset status in isARM64FrameOffsetLegal. -enum ARM64FrameOffsetStatus { - ARM64FrameOffsetCannotUpdate = 0x0, ///< Offset cannot apply. - ARM64FrameOffsetIsLegal = 0x1, ///< Offset is legal. - ARM64FrameOffsetCanUpdate = 0x2 ///< Offset can apply, at least partly. +/// \brief Use to report the frame offset status in isAArch64FrameOffsetLegal. +enum AArch64FrameOffsetStatus { + AArch64FrameOffsetCannotUpdate = 0x0, ///< Offset cannot apply. + AArch64FrameOffsetIsLegal = 0x1, ///< Offset is legal. + AArch64FrameOffsetCanUpdate = 0x2 ///< Offset can apply, at least partly. }; /// \brief Check if the @p Offset is a valid frame offset for @p MI. /// The returned value reports the validity of the frame offset for @p MI. -/// It uses the values defined by ARM64FrameOffsetStatus for that. -/// If result == ARM64FrameOffsetCannotUpdate, @p MI cannot be updated to +/// It uses the values defined by AArch64FrameOffsetStatus for that. +/// If result == AArch64FrameOffsetCannotUpdate, @p MI cannot be updated to /// use an offset.eq -/// If result & ARM64FrameOffsetIsLegal, @p Offset can completely be +/// If result & AArch64FrameOffsetIsLegal, @p Offset can completely be /// rewriten in @p MI. -/// If result & ARM64FrameOffsetCanUpdate, @p Offset contains the +/// If result & AArch64FrameOffsetCanUpdate, @p Offset contains the /// amount that is off the limit of the legal offset. /// If set, @p OutUseUnscaledOp will contain the whether @p MI should be /// turned into an unscaled operator, which opcode is in @p OutUnscaledOp. /// If set, @p EmittableOffset contains the amount that can be set in @p MI /// (possibly with @p OutUnscaledOp if OutUseUnscaledOp is true) and that /// is a legal offset. -int isARM64FrameOffsetLegal(const MachineInstr &MI, int &Offset, +int isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, bool *OutUseUnscaledOp = nullptr, unsigned *OutUnscaledOp = nullptr, int *EmittableOffset = nullptr); -static inline bool isUncondBranchOpcode(int Opc) { return Opc == ARM64::B; } +static inline bool isUncondBranchOpcode(int Opc) { return Opc == AArch64::B; } static inline bool isCondBranchOpcode(int Opc) { switch (Opc) { - case ARM64::Bcc: - case ARM64::CBZW: - case ARM64::CBZX: - case ARM64::CBNZW: - case ARM64::CBNZX: - case ARM64::TBZW: - case ARM64::TBZX: - case ARM64::TBNZW: - case ARM64::TBNZX: + case AArch64::Bcc: + case AArch64::CBZW: + case AArch64::CBZX: + case AArch64::CBNZW: + case AArch64::CBNZX: + case AArch64::TBZW: + case AArch64::TBZX: + case AArch64::TBNZW: + case AArch64::TBNZX: return true; default: return false; } } -static inline bool isIndirectBranchOpcode(int Opc) { return Opc == ARM64::BR; } +static inline bool isIndirectBranchOpcode(int Opc) { return Opc == AArch64::BR; } } // end namespace llvm diff --git a/lib/Target/ARM64/ARM64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td similarity index 83% rename from lib/Target/ARM64/ARM64InstrInfo.td rename to lib/Target/AArch64/AArch64InstrInfo.td index e68980c83c54..9ad36e8740db 100644 --- a/lib/Target/ARM64/ARM64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -1,4 +1,4 @@ -//===- ARM64InstrInfo.td - Describe the ARM64 Instructions -*- tablegen -*-===// +//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// ARM64 Instruction definitions. +// AArch64 Instruction definitions. // //===----------------------------------------------------------------------===// @@ -26,7 +26,7 @@ def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; //===----------------------------------------------------------------------===// -// ARM64-specific DAG Nodes. +// AArch64-specific DAG Nodes. // // SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS @@ -50,196 +50,198 @@ def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, SDTCisVT<1, i32>, SDTCisVT<4, i32>]>; -def SDT_ARM64Brcond : SDTypeProfile<0, 3, +def SDT_AArch64Brcond : SDTypeProfile<0, 3, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; -def SDT_ARM64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>; -def SDT_ARM64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, +def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>; +def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisVT<2, OtherVT>]>; -def SDT_ARM64CSel : SDTypeProfile<1, 4, +def SDT_AArch64CSel : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<3>, SDTCisVT<4, i32>]>; -def SDT_ARM64FCmp : SDTypeProfile<0, 2, +def SDT_AArch64FCmp : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>; -def SDT_ARM64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>; -def SDT_ARM64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>; -def SDT_ARM64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>, +def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>; +def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>; +def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>; -def SDT_ARM64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>; -def SDT_ARM64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; -def SDT_ARM64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, +def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>; +def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; +def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisInt<2>, SDTCisInt<3>]>; -def SDT_ARM64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; -def SDT_ARM64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, +def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; +def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisInt<3>]>; -def SDT_ARM64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>; +def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>; -def SDT_ARM64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; -def SDT_ARM64fcmpz : SDTypeProfile<1, 1, []>; -def SDT_ARM64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>; -def SDT_ARM64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, +def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; +def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>; +def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>; +def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>; -def SDT_ARM64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, +def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>; -def SDT_ARM64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>; -def SDT_ARM64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; +def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>; +def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; -def SDT_ARM64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; +def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; -def SDT_ARM64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, +def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; -def SDT_ARM64WrapperLarge : SDTypeProfile<1, 4, +def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4, [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, SDTCisSameAs<1, 4>]>; // Node definitions. -def ARM64adrp : SDNode<"ARM64ISD::ADRP", SDTIntUnaryOp, []>; -def ARM64addlow : SDNode<"ARM64ISD::ADDlow", SDTIntBinOp, []>; -def ARM64LOADgot : SDNode<"ARM64ISD::LOADgot", SDTIntUnaryOp>; -def ARM64callseq_start : SDNode<"ISD::CALLSEQ_START", +def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; +def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>; +def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>; +def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", SDCallSeqStart<[ SDTCisVT<0, i32> ]>, [SDNPHasChain, SDNPOutGlue]>; -def ARM64callseq_end : SDNode<"ISD::CALLSEQ_END", +def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -def ARM64call : SDNode<"ARM64ISD::CALL", +def AArch64call : SDNode<"AArch64ISD::CALL", SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; -def ARM64brcond : SDNode<"ARM64ISD::BRCOND", SDT_ARM64Brcond, +def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond, [SDNPHasChain]>; -def ARM64cbz : SDNode<"ARM64ISD::CBZ", SDT_ARM64cbz, +def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz, [SDNPHasChain]>; -def ARM64cbnz : SDNode<"ARM64ISD::CBNZ", SDT_ARM64cbz, +def AArch64cbnz : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz, [SDNPHasChain]>; -def ARM64tbz : SDNode<"ARM64ISD::TBZ", SDT_ARM64tbz, +def AArch64tbz : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz, [SDNPHasChain]>; -def ARM64tbnz : SDNode<"ARM64ISD::TBNZ", SDT_ARM64tbz, +def AArch64tbnz : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz, [SDNPHasChain]>; -def ARM64csel : SDNode<"ARM64ISD::CSEL", SDT_ARM64CSel>; -def ARM64csinv : SDNode<"ARM64ISD::CSINV", SDT_ARM64CSel>; -def ARM64csneg : SDNode<"ARM64ISD::CSNEG", SDT_ARM64CSel>; -def ARM64csinc : SDNode<"ARM64ISD::CSINC", SDT_ARM64CSel>; -def ARM64retflag : SDNode<"ARM64ISD::RET_FLAG", SDTNone, +def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>; +def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>; +def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>; +def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>; +def AArch64retflag : SDNode<"AArch64ISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def ARM64adc : SDNode<"ARM64ISD::ADC", SDTBinaryArithWithFlagsIn >; -def ARM64sbc : SDNode<"ARM64ISD::SBC", SDTBinaryArithWithFlagsIn>; -def ARM64add_flag : SDNode<"ARM64ISD::ADDS", SDTBinaryArithWithFlagsOut, +def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >; +def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>; +def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut, [SDNPCommutative]>; -def ARM64sub_flag : SDNode<"ARM64ISD::SUBS", SDTBinaryArithWithFlagsOut>; -def ARM64and_flag : SDNode<"ARM64ISD::ANDS", SDTBinaryArithWithFlagsOut, +def AArch64sub_flag : SDNode<"AArch64ISD::SUBS", SDTBinaryArithWithFlagsOut>; +def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut, [SDNPCommutative]>; -def ARM64adc_flag : SDNode<"ARM64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; -def ARM64sbc_flag : SDNode<"ARM64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; - -def ARM64threadpointer : SDNode<"ARM64ISD::THREAD_POINTER", SDTPtrLeaf>; - -def ARM64fcmp : SDNode<"ARM64ISD::FCMP", SDT_ARM64FCmp>; - -def ARM64fmax : SDNode<"ARM64ISD::FMAX", SDTFPBinOp>; -def ARM64fmin : SDNode<"ARM64ISD::FMIN", SDTFPBinOp>; - -def ARM64dup : SDNode<"ARM64ISD::DUP", SDT_ARM64Dup>; -def ARM64duplane8 : SDNode<"ARM64ISD::DUPLANE8", SDT_ARM64DupLane>; -def ARM64duplane16 : SDNode<"ARM64ISD::DUPLANE16", SDT_ARM64DupLane>; -def ARM64duplane32 : SDNode<"ARM64ISD::DUPLANE32", SDT_ARM64DupLane>; -def ARM64duplane64 : SDNode<"ARM64ISD::DUPLANE64", SDT_ARM64DupLane>; - -def ARM64zip1 : SDNode<"ARM64ISD::ZIP1", SDT_ARM64Zip>; -def ARM64zip2 : SDNode<"ARM64ISD::ZIP2", SDT_ARM64Zip>; -def ARM64uzp1 : SDNode<"ARM64ISD::UZP1", SDT_ARM64Zip>; -def ARM64uzp2 : SDNode<"ARM64ISD::UZP2", SDT_ARM64Zip>; -def ARM64trn1 : SDNode<"ARM64ISD::TRN1", SDT_ARM64Zip>; -def ARM64trn2 : SDNode<"ARM64ISD::TRN2", SDT_ARM64Zip>; - -def ARM64movi_edit : SDNode<"ARM64ISD::MOVIedit", SDT_ARM64MOVIedit>; -def ARM64movi_shift : SDNode<"ARM64ISD::MOVIshift", SDT_ARM64MOVIshift>; -def ARM64movi_msl : SDNode<"ARM64ISD::MOVImsl", SDT_ARM64MOVIshift>; -def ARM64mvni_shift : SDNode<"ARM64ISD::MVNIshift", SDT_ARM64MOVIshift>; -def ARM64mvni_msl : SDNode<"ARM64ISD::MVNImsl", SDT_ARM64MOVIshift>; -def ARM64movi : SDNode<"ARM64ISD::MOVI", SDT_ARM64MOVIedit>; -def ARM64fmov : SDNode<"ARM64ISD::FMOV", SDT_ARM64MOVIedit>; - -def ARM64rev16 : SDNode<"ARM64ISD::REV16", SDT_ARM64UnaryVec>; -def ARM64rev32 : SDNode<"ARM64ISD::REV32", SDT_ARM64UnaryVec>; -def ARM64rev64 : SDNode<"ARM64ISD::REV64", SDT_ARM64UnaryVec>; -def ARM64ext : SDNode<"ARM64ISD::EXT", SDT_ARM64ExtVec>; - -def ARM64vashr : SDNode<"ARM64ISD::VASHR", SDT_ARM64vshift>; -def ARM64vlshr : SDNode<"ARM64ISD::VLSHR", SDT_ARM64vshift>; -def ARM64vshl : SDNode<"ARM64ISD::VSHL", SDT_ARM64vshift>; -def ARM64sqshli : SDNode<"ARM64ISD::SQSHL_I", SDT_ARM64vshift>; -def ARM64uqshli : SDNode<"ARM64ISD::UQSHL_I", SDT_ARM64vshift>; -def ARM64sqshlui : SDNode<"ARM64ISD::SQSHLU_I", SDT_ARM64vshift>; -def ARM64srshri : SDNode<"ARM64ISD::SRSHR_I", SDT_ARM64vshift>; -def ARM64urshri : SDNode<"ARM64ISD::URSHR_I", SDT_ARM64vshift>; - -def ARM64not: SDNode<"ARM64ISD::NOT", SDT_ARM64unvec>; -def ARM64bit: SDNode<"ARM64ISD::BIT", SDT_ARM64trivec>; -def ARM64bsl: SDNode<"ARM64ISD::BSL", SDT_ARM64trivec>; - -def ARM64cmeq: SDNode<"ARM64ISD::CMEQ", SDT_ARM64binvec>; -def ARM64cmge: SDNode<"ARM64ISD::CMGE", SDT_ARM64binvec>; -def ARM64cmgt: SDNode<"ARM64ISD::CMGT", SDT_ARM64binvec>; -def ARM64cmhi: SDNode<"ARM64ISD::CMHI", SDT_ARM64binvec>; -def ARM64cmhs: SDNode<"ARM64ISD::CMHS", SDT_ARM64binvec>; - -def ARM64fcmeq: SDNode<"ARM64ISD::FCMEQ", SDT_ARM64fcmp>; -def ARM64fcmge: SDNode<"ARM64ISD::FCMGE", SDT_ARM64fcmp>; -def ARM64fcmgt: SDNode<"ARM64ISD::FCMGT", SDT_ARM64fcmp>; - -def ARM64cmeqz: SDNode<"ARM64ISD::CMEQz", SDT_ARM64unvec>; -def ARM64cmgez: SDNode<"ARM64ISD::CMGEz", SDT_ARM64unvec>; -def ARM64cmgtz: SDNode<"ARM64ISD::CMGTz", SDT_ARM64unvec>; -def ARM64cmlez: SDNode<"ARM64ISD::CMLEz", SDT_ARM64unvec>; -def ARM64cmltz: SDNode<"ARM64ISD::CMLTz", SDT_ARM64unvec>; -def ARM64cmtst : PatFrag<(ops node:$LHS, node:$RHS), - (ARM64not (ARM64cmeqz (and node:$LHS, node:$RHS)))>; - -def ARM64fcmeqz: SDNode<"ARM64ISD::FCMEQz", SDT_ARM64fcmpz>; -def ARM64fcmgez: SDNode<"ARM64ISD::FCMGEz", SDT_ARM64fcmpz>; -def ARM64fcmgtz: SDNode<"ARM64ISD::FCMGTz", SDT_ARM64fcmpz>; -def ARM64fcmlez: SDNode<"ARM64ISD::FCMLEz", SDT_ARM64fcmpz>; -def ARM64fcmltz: SDNode<"ARM64ISD::FCMLTz", SDT_ARM64fcmpz>; - -def ARM64bici: SDNode<"ARM64ISD::BICi", SDT_ARM64vecimm>; -def ARM64orri: SDNode<"ARM64ISD::ORRi", SDT_ARM64vecimm>; - -def ARM64neg : SDNode<"ARM64ISD::NEG", SDT_ARM64unvec>; - -def ARM64tcret: SDNode<"ARM64ISD::TC_RETURN", SDT_ARM64TCRET, +def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; +def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; + +def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; + +def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>; + +def AArch64fmax : SDNode<"AArch64ISD::FMAX", SDTFPBinOp>; +def AArch64fmin : SDNode<"AArch64ISD::FMIN", SDTFPBinOp>; + +def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>; +def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>; +def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>; +def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>; +def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>; + +def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>; +def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>; +def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>; +def AArch64uzp2 : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>; +def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>; +def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>; + +def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>; +def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>; +def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>; +def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>; +def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>; +def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>; +def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>; + +def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>; +def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>; +def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>; +def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>; + +def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>; +def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>; +def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>; +def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>; +def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>; +def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>; +def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>; +def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>; + +def AArch64not: SDNode<"AArch64ISD::NOT", SDT_AArch64unvec>; +def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>; +def AArch64bsl: SDNode<"AArch64ISD::BSL", SDT_AArch64trivec>; + +def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>; +def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>; +def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>; +def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>; +def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>; + +def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>; +def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>; +def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>; + +def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>; +def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>; +def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>; +def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>; +def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>; +def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS), + (AArch64not (AArch64cmeqz (and node:$LHS, node:$RHS)))>; + +def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>; +def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>; +def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>; +def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>; +def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>; + +def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>; +def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>; + +def AArch64neg : SDNode<"AArch64ISD::NEG", SDT_AArch64unvec>; + +def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def ARM64Prefetch : SDNode<"ARM64ISD::PREFETCH", SDT_ARM64PREFETCH, +def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH, [SDNPHasChain, SDNPSideEffect]>; -def ARM64sitof: SDNode<"ARM64ISD::SITOF", SDT_ARM64ITOF>; -def ARM64uitof: SDNode<"ARM64ISD::UITOF", SDT_ARM64ITOF>; +def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>; +def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>; -def ARM64tlsdesc_call : SDNode<"ARM64ISD::TLSDESC_CALL", SDT_ARM64TLSDescCall, - [SDNPInGlue, SDNPOutGlue, SDNPHasChain, - SDNPVariadic]>; +def AArch64tlsdesc_call : SDNode<"AArch64ISD::TLSDESC_CALL", + SDT_AArch64TLSDescCall, + [SDNPInGlue, SDNPOutGlue, SDNPHasChain, + SDNPVariadic]>; -def ARM64WrapperLarge : SDNode<"ARM64ISD::WrapperLarge", SDT_ARM64WrapperLarge>; +def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge", + SDT_AArch64WrapperLarge>; //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// ARM64 Instruction Predicate Definitions. +// AArch64 Instruction Predicate Definitions. // def HasZCZ : Predicate<"Subtarget->hasZeroCycleZeroing()">; def NoZCZ : Predicate<"!Subtarget->hasZeroCycleZeroing()">; @@ -248,7 +250,7 @@ def IsNotDarwin: Predicate<"!Subtarget->isTargetDarwin()">; def ForCodeSize : Predicate<"ForCodeSize">; def NotForCodeSize : Predicate<"!ForCodeSize">; -include "ARM64InstrFormats.td" +include "AArch64InstrFormats.td" //===----------------------------------------------------------------------===// @@ -258,63 +260,63 @@ include "ARM64InstrFormats.td" let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in { def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), - [(ARM64callseq_start timm:$amt)]>; + [(AArch64callseq_start timm:$amt)]>; def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), - [(ARM64callseq_end timm:$amt1, timm:$amt2)]>; + [(AArch64callseq_end timm:$amt1, timm:$amt2)]>; } // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 let isReMaterializable = 1, isCodeGenOnly = 1 in { // FIXME: The following pseudo instructions are only needed because remat // cannot handle multiple instructions. When that changes, they can be -// removed, along with the ARM64Wrapper node. +// removed, along with the AArch64Wrapper node. let AddedComplexity = 10 in def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr), - [(set GPR64:$dst, (ARM64LOADgot tglobaladdr:$addr))]>, + [(set GPR64:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, Sched<[WriteLDAdr]>; // The MOVaddr instruction should match only when the add is not folded // into a load or store address. def MOVaddr : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (ARM64addlow (ARM64adrp tglobaladdr:$hi), + [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi), tglobaladdr:$low))]>, Sched<[WriteAdrAdr]>; def MOVaddrJT : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (ARM64addlow (ARM64adrp tjumptable:$hi), + [(set GPR64:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi), tjumptable:$low))]>, Sched<[WriteAdrAdr]>; def MOVaddrCP : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (ARM64addlow (ARM64adrp tconstpool:$hi), + [(set GPR64:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi), tconstpool:$low))]>, Sched<[WriteAdrAdr]>; def MOVaddrBA : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (ARM64addlow (ARM64adrp tblockaddress:$hi), + [(set GPR64:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi), tblockaddress:$low))]>, Sched<[WriteAdrAdr]>; def MOVaddrTLS : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (ARM64addlow (ARM64adrp tglobaltlsaddr:$hi), + [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi), tglobaltlsaddr:$low))]>, Sched<[WriteAdrAdr]>; def MOVaddrEXT : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (ARM64addlow (ARM64adrp texternalsym:$hi), + [(set GPR64:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), texternalsym:$low))]>, Sched<[WriteAdrAdr]>; } // isReMaterializable, isCodeGenOnly -def : Pat<(ARM64LOADgot tglobaltlsaddr:$addr), +def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr), (LOADgot tglobaltlsaddr:$addr)>; -def : Pat<(ARM64LOADgot texternalsym:$addr), +def : Pat<(AArch64LOADgot texternalsym:$addr), (LOADgot texternalsym:$addr)>; -def : Pat<(ARM64LOADgot tconstpool:$addr), +def : Pat<(AArch64LOADgot tconstpool:$addr), (LOADgot tconstpool:$addr)>; //===----------------------------------------------------------------------===// @@ -345,7 +347,7 @@ def MSRpstate: MSRpstateI; // The thread pointer (on Linux, at least, where this has been implemented) is // TPIDR_EL0. -def : Pat<(ARM64threadpointer), (MRS 0xde82)>; +def : Pat<(AArch64threadpointer), (MRS 0xde82)>; // Generic system instructions def SYSxt : SystemXtI<0, "sys">; @@ -464,28 +466,28 @@ def : Pat<(i64 i64imm_32bit:$src), // Deal with the various forms of (ELF) large addressing with MOVZ/MOVK // sequences. -def : Pat<(ARM64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2, +def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2, tglobaladdr:$g1, tglobaladdr:$g0), (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g3, 48), tglobaladdr:$g2, 32), tglobaladdr:$g1, 16), tglobaladdr:$g0, 0)>; -def : Pat<(ARM64WrapperLarge tblockaddress:$g3, tblockaddress:$g2, +def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2, tblockaddress:$g1, tblockaddress:$g0), (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g3, 48), tblockaddress:$g2, 32), tblockaddress:$g1, 16), tblockaddress:$g0, 0)>; -def : Pat<(ARM64WrapperLarge tconstpool:$g3, tconstpool:$g2, +def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2, tconstpool:$g1, tconstpool:$g0), (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g3, 48), tconstpool:$g2, 32), tconstpool:$g1, 16), tconstpool:$g0, 0)>; -def : Pat<(ARM64WrapperLarge tjumptable:$g3, tjumptable:$g2, +def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2, tjumptable:$g1, tjumptable:$g0), (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g3, 48), tjumptable:$g2, 32), @@ -498,8 +500,8 @@ def : Pat<(ARM64WrapperLarge tjumptable:$g3, tjumptable:$g2, //===----------------------------------------------------------------------===// // Add/subtract with carry. -defm ADC : AddSubCarry<0, "adc", "adcs", ARM64adc, ARM64adc_flag>; -defm SBC : AddSubCarry<1, "sbc", "sbcs", ARM64sbc, ARM64sbc_flag>; +defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>; +defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>; def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>; def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>; @@ -519,8 +521,8 @@ def : InstAlias<"mov $dst, $src", def : InstAlias<"mov $dst, $src", (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>; -defm ADDS : AddSubS<0, "adds", ARM64add_flag, "cmn">; -defm SUBS : AddSubS<1, "subs", ARM64sub_flag, "cmp">; +defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn">; +defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp">; // Use SUBS instead of SUB to enable CSE between SUBS and SUB. def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm), @@ -558,13 +560,13 @@ def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm), // expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). // These patterns capture that transformation. let AddedComplexity = 1 in { -def : Pat<(ARM64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), +def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; -def : Pat<(ARM64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), +def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; -def : Pat<(ARM64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), +def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; -def : Pat<(ARM64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), +def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; } @@ -587,8 +589,8 @@ def : InstAlias<"negs $dst, $src$shift", defm UDIV : Div<0, "udiv", udiv>; defm SDIV : Div<1, "sdiv", sdiv>; let isCodeGenOnly = 1 in { -defm UDIV_Int : Div<0, "udiv", int_arm64_udiv>; -defm SDIV_Int : Div<1, "sdiv", int_arm64_sdiv>; +defm UDIV_Int : Div<0, "udiv", int_aarch64_udiv>; +defm SDIV_Int : Div<1, "sdiv", int_aarch64_sdiv>; } // Variable shift @@ -653,15 +655,15 @@ def SMULHrr : MulHi<0b010, "smulh", mulhs>; def UMULHrr : MulHi<0b110, "umulh", mulhu>; // CRC32 -def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_arm64_crc32b, "crc32b">; -def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_arm64_crc32h, "crc32h">; -def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_arm64_crc32w, "crc32w">; -def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_arm64_crc32x, "crc32x">; +def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">; +def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">; +def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">; +def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">; -def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_arm64_crc32cb, "crc32cb">; -def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_arm64_crc32ch, "crc32ch">; -def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_arm64_crc32cw, "crc32cw">; -def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_arm64_crc32cx, "crc32cx">; +def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">; +def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">; +def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">; +def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">; //===----------------------------------------------------------------------===// @@ -669,7 +671,7 @@ def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_arm64_crc32cx, "crc32cx">; //===----------------------------------------------------------------------===// // (immediate) -defm ANDS : LogicalImmS<0b11, "ands", ARM64and_flag>; +defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag>; defm AND : LogicalImm<0b00, "and", and>; defm EOR : LogicalImm<0b10, "eor", xor>; defm ORR : LogicalImm<0b01, "orr", or>; @@ -684,9 +686,9 @@ def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR, // (register) -defm ANDS : LogicalRegS<0b11, 0, "ands", ARM64and_flag>; +defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>; defm BICS : LogicalRegS<0b11, 1, "bics", - BinOpFrag<(ARM64and_flag node:$LHS, (not node:$RHS))>>; + BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>; defm AND : LogicalReg<0b00, 0, "and", and>; defm BIC : LogicalReg<0b00, 1, "bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>; @@ -900,26 +902,26 @@ defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>; defm CSINV : CondSelectOp<1, 0b00, "csinv", not>; defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>; -def : Pat<(ARM64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), +def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; -def : Pat<(ARM64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), +def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; -def : Pat<(ARM64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), +def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; -def : Pat<(ARM64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), +def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; -def : Pat<(ARM64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), +def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; -def : Pat<(ARM64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), +def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; -def : Pat<(ARM64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV), +def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV), (CSINCWr WZR, WZR, (i32 imm:$cc))>; -def : Pat<(ARM64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV), +def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV), (CSINCXr XZR, XZR, (i32 imm:$cc))>; -def : Pat<(ARM64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV), +def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV), (CSINVWr WZR, WZR, (i32 imm:$cc))>; -def : Pat<(ARM64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV), +def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV), (CSINVXr XZR, XZR, (i32 imm:$cc))>; // The inverse of the condition code from the alias instruction is what is used @@ -959,12 +961,12 @@ def ADR : ADRI<0, "adr", adrlabel, []>; } // neverHasSideEffects = 1 def ADRP : ADRI<1, "adrp", adrplabel, - [(set GPR64:$Xd, (ARM64adrp tglobaladdr:$label))]>; + [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>; } // isReMaterializable = 1 // page address of a constant pool entry, block address -def : Pat<(ARM64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>; -def : Pat<(ARM64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>; +def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>; +def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>; //===----------------------------------------------------------------------===// // Unconditional branch (register) instructions. @@ -980,7 +982,7 @@ def ERET : SpecialReturn<0b0100, "eret">; def : InstAlias<"ret", (RET LR)>; let isCall = 1, Defs = [LR], Uses = [SP] in { -def BLR : BranchReg<0b0001, "blr", [(ARM64call GPR64:$Rn)]>; +def BLR : BranchReg<0b0001, "blr", [(AArch64call GPR64:$Rn)]>; } // isCall let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { @@ -990,7 +992,7 @@ def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>; // Create a separate pseudo-instruction for codegen to use so that we don't // flag lr as used in every function. It'll be restored before the RET by the // epilogue if it's legitimately used. -def RET_ReallyLR : Pseudo<(outs), (ins), [(ARM64retflag)]> { +def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]> { let isTerminator = 1; let isBarrier = 1; let isReturn = 1; @@ -1009,9 +1011,9 @@ def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []> { let isCall = 1, Defs = [LR] in def TLSDESC_BLR : Pseudo<(outs), (ins GPR64:$dest, i64imm:$sym), - [(ARM64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>; + [(AArch64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>; -def : Pat<(ARM64tlsdesc_call GPR64:$dest, texternalsym:$sym), +def : Pat<(AArch64tlsdesc_call GPR64:$dest, texternalsym:$sym), (TLSDESC_BLR GPR64:$dest, texternalsym:$sym)>; //===----------------------------------------------------------------------===// // Conditional branch (immediate) instruction. @@ -1021,14 +1023,14 @@ def Bcc : BranchCond; //===----------------------------------------------------------------------===// // Compare-and-branch instructions. //===----------------------------------------------------------------------===// -defm CBZ : CmpBranch<0, "cbz", ARM64cbz>; -defm CBNZ : CmpBranch<1, "cbnz", ARM64cbnz>; +defm CBZ : CmpBranch<0, "cbz", AArch64cbz>; +defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>; //===----------------------------------------------------------------------===// // Test-bit-and-branch instructions. //===----------------------------------------------------------------------===// -defm TBZ : TestBranch<0, "tbz", ARM64tbz>; -defm TBNZ : TestBranch<1, "tbnz", ARM64tbnz>; +defm TBZ : TestBranch<0, "tbz", AArch64tbz>; +defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>; //===----------------------------------------------------------------------===// // Unconditional branch (immediate) instructions. @@ -1038,9 +1040,9 @@ def B : BranchImm<0, "b", [(br bb:$addr)]>; } // isBranch, isTerminator, isBarrier let isCall = 1, Defs = [LR], Uses = [SP] in { -def BL : CallImm<1, "bl", [(ARM64call tglobaladdr:$addr)]>; +def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>; } // isCall -def : Pat<(ARM64call texternalsym:$func), (BL texternalsym:$func)>; +def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>; //===----------------------------------------------------------------------===// // Exception generation instructions. @@ -1432,7 +1434,7 @@ def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), // Pre-fetch. def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm", - [(ARM64Prefetch imm:$Rt, + [(AArch64Prefetch imm:$Rt, (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; @@ -1451,7 +1453,7 @@ def LDRSWl : LoadLiteral<0b10, 0, GPR64, "ldrsw">; // prefetch def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>; -// [(ARM64Prefetch imm:$Rt, tglobaladdr:$label)]>; +// [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>; //--- // (unscaled immediate) @@ -1650,7 +1652,7 @@ def : InstAlias<"ldrsw $Rt, [$Rn, $offset]", // Pre-fetch. defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum", - [(ARM64Prefetch imm:$Rt, + [(AArch64Prefetch imm:$Rt, (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; //--- @@ -2187,23 +2189,23 @@ def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">; // Scaled floating point to integer conversion instructions. //===----------------------------------------------------------------------===// -defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_arm64_neon_fcvtas>; -defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_arm64_neon_fcvtau>; -defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_arm64_neon_fcvtms>; -defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_arm64_neon_fcvtmu>; -defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_arm64_neon_fcvtns>; -defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_arm64_neon_fcvtnu>; -defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_arm64_neon_fcvtps>; -defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_arm64_neon_fcvtpu>; +defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>; +defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>; +defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>; +defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>; +defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>; +defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>; +defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>; +defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>; defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", fp_to_sint>; defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", fp_to_uint>; defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", fp_to_sint>; defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", fp_to_uint>; let isCodeGenOnly = 1 in { -defm FCVTZS_Int : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", int_arm64_neon_fcvtzs>; -defm FCVTZU_Int : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", int_arm64_neon_fcvtzu>; -defm FCVTZS_Int : FPToIntegerScaled<0b11, 0b000, "fcvtzs", int_arm64_neon_fcvtzs>; -defm FCVTZU_Int : FPToIntegerScaled<0b11, 0b001, "fcvtzu", int_arm64_neon_fcvtzu>; +defm FCVTZS_Int : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", int_aarch64_neon_fcvtzs>; +defm FCVTZU_Int : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", int_aarch64_neon_fcvtzu>; +defm FCVTZS_Int : FPToIntegerScaled<0b11, 0b000, "fcvtzs", int_aarch64_neon_fcvtzs>; +defm FCVTZU_Int : FPToIntegerScaled<0b11, 0b001, "fcvtzu", int_aarch64_neon_fcvtzu>; } //===----------------------------------------------------------------------===// @@ -2246,10 +2248,10 @@ defm FNEG : SingleOperandFPData<0b0010, "fneg", fneg>; defm FRINTA : SingleOperandFPData<0b1100, "frinta", frnd>; defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>; defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>; -defm FRINTN : SingleOperandFPData<0b1000, "frintn", int_arm64_neon_frintn>; +defm FRINTN : SingleOperandFPData<0b1000, "frintn", int_aarch64_neon_frintn>; defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>; -def : Pat<(v1f64 (int_arm64_neon_frintn (v1f64 FPR64:$Rn))), +def : Pat<(v1f64 (int_aarch64_neon_frintn (v1f64 FPR64:$Rn))), (FRINTNDr FPR64:$Rn)>; // FRINTX is inserted to set the flags as required by FENV_ACCESS ON behavior @@ -2274,23 +2276,23 @@ defm FADD : TwoOperandFPData<0b0010, "fadd", fadd>; let SchedRW = [WriteFDiv] in { defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>; } -defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", int_arm64_neon_fmaxnm>; -defm FMAX : TwoOperandFPData<0b0100, "fmax", ARM64fmax>; -defm FMINNM : TwoOperandFPData<0b0111, "fminnm", int_arm64_neon_fminnm>; -defm FMIN : TwoOperandFPData<0b0101, "fmin", ARM64fmin>; +defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", int_aarch64_neon_fmaxnm>; +defm FMAX : TwoOperandFPData<0b0100, "fmax", AArch64fmax>; +defm FMINNM : TwoOperandFPData<0b0111, "fminnm", int_aarch64_neon_fminnm>; +defm FMIN : TwoOperandFPData<0b0101, "fmin", AArch64fmin>; let SchedRW = [WriteFMul] in { defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>; defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>; } defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>; -def : Pat<(v1f64 (ARM64fmax (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), +def : Pat<(v1f64 (AArch64fmax (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FMAXDrr FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(v1f64 (ARM64fmin (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), +def : Pat<(v1f64 (AArch64fmin (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FMINDrr FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(v1f64 (int_arm64_neon_fmaxnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), +def : Pat<(v1f64 (int_aarch64_neon_fmaxnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(v1f64 (int_arm64_neon_fminnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), +def : Pat<(v1f64 (int_aarch64_neon_fminnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>; //===----------------------------------------------------------------------===// @@ -2335,7 +2337,7 @@ def : Pat<(f64 (fma FPR64:$Rn, (fneg FPR64:$Rm), (fneg FPR64:$Ra))), //===----------------------------------------------------------------------===// defm FCMPE : FPComparison<1, "fcmpe">; -defm FCMP : FPComparison<0, "fcmp", ARM64fcmp>; +defm FCMP : FPComparison<0, "fcmp", AArch64fcmp>; //===----------------------------------------------------------------------===// // Floating point conditional comparison instructions. @@ -2356,7 +2358,7 @@ defm FCSEL : FPCondSelect<"fcsel">; def F128CSEL : Pseudo<(outs FPR128:$Rd), (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond), [(set (f128 FPR128:$Rd), - (ARM64csel FPR128:$Rn, FPR128:$Rm, + (AArch64csel FPR128:$Rn, FPR128:$Rm, (i32 imm:$cond), NZCV))]> { let Uses = [NZCV]; let usesCustomInserter = 1; @@ -2375,28 +2377,28 @@ defm FMOV : FPMoveImmediate<"fmov">; // Advanced SIMD two vector instructions. //===----------------------------------------------------------------------===// -defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", int_arm64_neon_abs>; -defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_arm64_neon_cls>; +defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", int_aarch64_neon_abs>; +defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>; defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>; -defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", ARM64cmeqz>; -defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", ARM64cmgez>; -defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", ARM64cmgtz>; -defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", ARM64cmlez>; -defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", ARM64cmltz>; +defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>; +defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>; +defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>; +defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>; +defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>; defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>; defm FABS : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>; -defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", ARM64fcmeqz>; -defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", ARM64fcmgez>; -defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", ARM64fcmgtz>; -defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", ARM64fcmlez>; -defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", ARM64fcmltz>; -defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_arm64_neon_fcvtas>; -defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_arm64_neon_fcvtau>; +defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; +defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>; +defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; +defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>; +defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; +defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>; +defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>; defm FCVTL : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">; -def : Pat<(v4f32 (int_arm64_neon_vcvthf2fp (v4i16 V64:$Rn))), +def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>; -def : Pat<(v4f32 (int_arm64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn), +def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn), (i64 4)))), (FCVTLv8i16 V128:$Rn)>; def : Pat<(v2f64 (fextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>; @@ -2404,41 +2406,41 @@ def : Pat<(v2f64 (fextend (v2f32 (extract_subvector (v4f32 V128:$Rn), (i64 2))))), (FCVTLv4i32 V128:$Rn)>; -defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_arm64_neon_fcvtms>; -defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_arm64_neon_fcvtmu>; -defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_arm64_neon_fcvtns>; -defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_arm64_neon_fcvtnu>; +defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>; +defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>; +defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>; +defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>; defm FCVTN : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">; -def : Pat<(v4i16 (int_arm64_neon_vcvtfp2hf (v4f32 V128:$Rn))), +def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>; def : Pat<(concat_vectors V64:$Rd, - (v4i16 (int_arm64_neon_vcvtfp2hf (v4f32 V128:$Rn)))), + (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))), (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; def : Pat<(v2f32 (fround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>; def : Pat<(concat_vectors V64:$Rd, (v2f32 (fround (v2f64 V128:$Rn)))), (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; -defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_arm64_neon_fcvtps>; -defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_arm64_neon_fcvtpu>; +defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>; +defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>; defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn", - int_arm64_neon_fcvtxn>; + int_aarch64_neon_fcvtxn>; defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>; defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>; let isCodeGenOnly = 1 in { defm FCVTZS_Int : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", - int_arm64_neon_fcvtzs>; + int_aarch64_neon_fcvtzs>; defm FCVTZU_Int : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", - int_arm64_neon_fcvtzu>; + int_aarch64_neon_fcvtzu>; } defm FNEG : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>; -defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_arm64_neon_frecpe>; +defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>; defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", frnd>; defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>; defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>; -defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_arm64_neon_frintn>; +defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_aarch64_neon_frintn>; defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>; defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>; defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>; -defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_arm64_neon_frsqrte>; +defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>; defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>; defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg", UnOpFrag<(sub immAllZerosV, node:$LHS)> >; @@ -2449,22 +2451,22 @@ def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}", def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}", (NOTv16i8 V128:$Vd, V128:$Vn)>; -def : Pat<(ARM64neg (v8i8 V64:$Rn)), (NEGv8i8 V64:$Rn)>; -def : Pat<(ARM64neg (v16i8 V128:$Rn)), (NEGv16i8 V128:$Rn)>; -def : Pat<(ARM64neg (v4i16 V64:$Rn)), (NEGv4i16 V64:$Rn)>; -def : Pat<(ARM64neg (v8i16 V128:$Rn)), (NEGv8i16 V128:$Rn)>; -def : Pat<(ARM64neg (v2i32 V64:$Rn)), (NEGv2i32 V64:$Rn)>; -def : Pat<(ARM64neg (v4i32 V128:$Rn)), (NEGv4i32 V128:$Rn)>; -def : Pat<(ARM64neg (v2i64 V128:$Rn)), (NEGv2i64 V128:$Rn)>; - -def : Pat<(ARM64not (v8i8 V64:$Rn)), (NOTv8i8 V64:$Rn)>; -def : Pat<(ARM64not (v16i8 V128:$Rn)), (NOTv16i8 V128:$Rn)>; -def : Pat<(ARM64not (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; -def : Pat<(ARM64not (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; -def : Pat<(ARM64not (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; -def : Pat<(ARM64not (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>; -def : Pat<(ARM64not (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; -def : Pat<(ARM64not (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; +def : Pat<(AArch64neg (v8i8 V64:$Rn)), (NEGv8i8 V64:$Rn)>; +def : Pat<(AArch64neg (v16i8 V128:$Rn)), (NEGv16i8 V128:$Rn)>; +def : Pat<(AArch64neg (v4i16 V64:$Rn)), (NEGv4i16 V64:$Rn)>; +def : Pat<(AArch64neg (v8i16 V128:$Rn)), (NEGv8i16 V128:$Rn)>; +def : Pat<(AArch64neg (v2i32 V64:$Rn)), (NEGv2i32 V64:$Rn)>; +def : Pat<(AArch64neg (v4i32 V128:$Rn)), (NEGv4i32 V128:$Rn)>; +def : Pat<(AArch64neg (v2i64 V128:$Rn)), (NEGv2i64 V128:$Rn)>; + +def : Pat<(AArch64not (v8i8 V64:$Rn)), (NOTv8i8 V64:$Rn)>; +def : Pat<(AArch64not (v16i8 V128:$Rn)), (NOTv16i8 V128:$Rn)>; +def : Pat<(AArch64not (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; +def : Pat<(AArch64not (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; +def : Pat<(AArch64not (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; +def : Pat<(AArch64not (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>; +def : Pat<(AArch64not (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; +def : Pat<(AArch64not (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; @@ -2472,49 +2474,49 @@ def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; -defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_arm64_neon_rbit>; -defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", ARM64rev16>; -defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", ARM64rev32>; -defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", ARM64rev64>; +defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_aarch64_neon_rbit>; +defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>; +defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>; +defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>; defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp", - BinOpFrag<(add node:$LHS, (int_arm64_neon_saddlp node:$RHS))> >; -defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_arm64_neon_saddlp>; + BinOpFrag<(add node:$LHS, (int_aarch64_neon_saddlp node:$RHS))> >; +defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_aarch64_neon_saddlp>; defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", sint_to_fp>; defm SHLL : SIMDVectorLShiftLongBySizeBHS; -defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_arm64_neon_sqabs>; -defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_arm64_neon_sqneg>; -defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_arm64_neon_sqxtn>; -defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_arm64_neon_sqxtun>; -defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_arm64_neon_suqadd>; +defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; +defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; +defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>; +defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>; +defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>; defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp", - BinOpFrag<(add node:$LHS, (int_arm64_neon_uaddlp node:$RHS))> >; + BinOpFrag<(add node:$LHS, (int_aarch64_neon_uaddlp node:$RHS))> >; defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", - int_arm64_neon_uaddlp>; + int_aarch64_neon_uaddlp>; defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", uint_to_fp>; -defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_arm64_neon_uqxtn>; -defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_arm64_neon_urecpe>; -defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_arm64_neon_ursqrte>; -defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_arm64_neon_usqadd>; +defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>; +defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>; +defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>; +defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>; defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>; -def : Pat<(v2f32 (ARM64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>; -def : Pat<(v4f32 (ARM64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>; +def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>; +def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>; // Patterns for vector long shift (by element width). These need to match all // three of zext, sext and anyext so it's easier to pull the patterns out of the // definition. multiclass SIMDVectorLShiftLongBySizeBHSPats { - def : Pat<(ARM64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)), + def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)), (SHLLv8i8 V64:$Rn)>; - def : Pat<(ARM64vshl (v8i16 (ext (extract_high_v16i8 V128:$Rn))), (i32 8)), + def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 V128:$Rn))), (i32 8)), (SHLLv16i8 V128:$Rn)>; - def : Pat<(ARM64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)), + def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)), (SHLLv4i16 V64:$Rn)>; - def : Pat<(ARM64vshl (v4i32 (ext (extract_high_v8i16 V128:$Rn))), (i32 16)), + def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 V128:$Rn))), (i32 16)), (SHLLv8i16 V128:$Rn)>; - def : Pat<(ARM64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)), + def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)), (SHLLv2i32 V64:$Rn)>; - def : Pat<(ARM64vshl (v2i64 (ext (extract_high_v4i32 V128:$Rn))), (i32 32)), + def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 V128:$Rn))), (i32 32)), (SHLLv4i32 V128:$Rn)>; } @@ -2527,30 +2529,30 @@ defm : SIMDVectorLShiftLongBySizeBHSPats; //===----------------------------------------------------------------------===// defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>; -defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", int_arm64_neon_addp>; -defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", ARM64cmeq>; -defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", ARM64cmge>; -defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", ARM64cmgt>; -defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", ARM64cmhi>; -defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", ARM64cmhs>; -defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", ARM64cmtst>; -defm FABD : SIMDThreeSameVectorFP<1,1,0b11010,"fabd", int_arm64_neon_fabd>; -defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b11101,"facge",int_arm64_neon_facge>; -defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b11101,"facgt",int_arm64_neon_facgt>; -defm FADDP : SIMDThreeSameVectorFP<1,0,0b11010,"faddp",int_arm64_neon_addp>; +defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", int_aarch64_neon_addp>; +defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>; +defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>; +defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>; +defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>; +defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>; +defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>; +defm FABD : SIMDThreeSameVectorFP<1,1,0b11010,"fabd", int_aarch64_neon_fabd>; +defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b11101,"facge",int_aarch64_neon_facge>; +defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b11101,"facgt",int_aarch64_neon_facgt>; +defm FADDP : SIMDThreeSameVectorFP<1,0,0b11010,"faddp",int_aarch64_neon_addp>; defm FADD : SIMDThreeSameVectorFP<0,0,0b11010,"fadd", fadd>; -defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b11100, "fcmeq", ARM64fcmeq>; -defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b11100, "fcmge", ARM64fcmge>; -defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b11100, "fcmgt", ARM64fcmgt>; +defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>; +defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>; +defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>; defm FDIV : SIMDThreeSameVectorFP<1,0,0b11111,"fdiv", fdiv>; -defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b11000,"fmaxnmp", int_arm64_neon_fmaxnmp>; -defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b11000,"fmaxnm", int_arm64_neon_fmaxnm>; -defm FMAXP : SIMDThreeSameVectorFP<1,0,0b11110,"fmaxp", int_arm64_neon_fmaxp>; -defm FMAX : SIMDThreeSameVectorFP<0,0,0b11110,"fmax", ARM64fmax>; -defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b11000,"fminnmp", int_arm64_neon_fminnmp>; -defm FMINNM : SIMDThreeSameVectorFP<0,1,0b11000,"fminnm", int_arm64_neon_fminnm>; -defm FMINP : SIMDThreeSameVectorFP<1,1,0b11110,"fminp", int_arm64_neon_fminp>; -defm FMIN : SIMDThreeSameVectorFP<0,1,0b11110,"fmin", ARM64fmin>; +defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b11000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; +defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b11000,"fmaxnm", int_aarch64_neon_fmaxnm>; +defm FMAXP : SIMDThreeSameVectorFP<1,0,0b11110,"fmaxp", int_aarch64_neon_fmaxp>; +defm FMAX : SIMDThreeSameVectorFP<0,0,0b11110,"fmax", AArch64fmax>; +defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b11000,"fminnmp", int_aarch64_neon_fminnmp>; +defm FMINNM : SIMDThreeSameVectorFP<0,1,0b11000,"fminnm", int_aarch64_neon_fminnm>; +defm FMINP : SIMDThreeSameVectorFP<1,1,0b11110,"fminp", int_aarch64_neon_fminp>; +defm FMIN : SIMDThreeSameVectorFP<0,1,0b11110,"fmin", AArch64fmin>; // NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the // instruction expects the addend first, while the fma intrinsic puts it last. @@ -2570,58 +2572,58 @@ def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), (FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>; -defm FMULX : SIMDThreeSameVectorFP<0,0,0b11011,"fmulx", int_arm64_neon_fmulx>; +defm FMULX : SIMDThreeSameVectorFP<0,0,0b11011,"fmulx", int_aarch64_neon_fmulx>; defm FMUL : SIMDThreeSameVectorFP<1,0,0b11011,"fmul", fmul>; -defm FRECPS : SIMDThreeSameVectorFP<0,0,0b11111,"frecps", int_arm64_neon_frecps>; -defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b11111,"frsqrts", int_arm64_neon_frsqrts>; +defm FRECPS : SIMDThreeSameVectorFP<0,0,0b11111,"frecps", int_aarch64_neon_frecps>; +defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b11111,"frsqrts", int_aarch64_neon_frsqrts>; defm FSUB : SIMDThreeSameVectorFP<0,1,0b11010,"fsub", fsub>; defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >; defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))> >; defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>; -defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_arm64_neon_pmul>; +defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>; defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba", - TriOpFrag<(add node:$LHS, (int_arm64_neon_sabd node:$MHS, node:$RHS))> >; -defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_arm64_neon_sabd>; -defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_arm64_neon_shadd>; -defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_arm64_neon_shsub>; -defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_arm64_neon_smaxp>; -defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", int_arm64_neon_smax>; -defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_arm64_neon_sminp>; -defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", int_arm64_neon_smin>; -defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_arm64_neon_sqadd>; -defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_arm64_neon_sqdmulh>; -defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_arm64_neon_sqrdmulh>; -defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_arm64_neon_sqrshl>; -defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_arm64_neon_sqshl>; -defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_arm64_neon_sqsub>; -defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd",int_arm64_neon_srhadd>; -defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_arm64_neon_srshl>; -defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_arm64_neon_sshl>; + TriOpFrag<(add node:$LHS, (int_aarch64_neon_sabd node:$MHS, node:$RHS))> >; +defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>; +defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>; +defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>; +defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>; +defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", int_aarch64_neon_smax>; +defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>; +defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", int_aarch64_neon_smin>; +defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>; +defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>; +defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>; +defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>; +defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>; +defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>; +defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd",int_aarch64_neon_srhadd>; +defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>; +defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>; defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>; defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba", - TriOpFrag<(add node:$LHS, (int_arm64_neon_uabd node:$MHS, node:$RHS))> >; -defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_arm64_neon_uabd>; -defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_arm64_neon_uhadd>; -defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_arm64_neon_uhsub>; -defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_arm64_neon_umaxp>; -defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", int_arm64_neon_umax>; -defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_arm64_neon_uminp>; -defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", int_arm64_neon_umin>; -defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_arm64_neon_uqadd>; -defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_arm64_neon_uqrshl>; -defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_arm64_neon_uqshl>; -defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_arm64_neon_uqsub>; -defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_arm64_neon_urhadd>; -defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_arm64_neon_urshl>; -defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_arm64_neon_ushl>; + TriOpFrag<(add node:$LHS, (int_aarch64_neon_uabd node:$MHS, node:$RHS))> >; +defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>; +defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>; +defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>; +defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>; +defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", int_aarch64_neon_umax>; +defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>; +defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", int_aarch64_neon_umin>; +defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>; +defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>; +defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>; +defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>; +defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>; +defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>; +defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>; defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>; defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic", BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >; defm BIF : SIMDLogicalThreeVector<1, 0b11, "bif">; -defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", ARM64bit>; +defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>; defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl", TriOpFrag<(or (and node:$LHS, node:$MHS), (and (vnot node:$LHS), node:$RHS))>>; defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>; @@ -2629,22 +2631,22 @@ defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn", BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >; defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>; -def : Pat<(ARM64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm), +def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm), (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; -def : Pat<(ARM64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm), +def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm), (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; -def : Pat<(ARM64bsl (v2i32 V64:$Rd), V64:$Rn, V64:$Rm), +def : Pat<(AArch64bsl (v2i32 V64:$Rd), V64:$Rn, V64:$Rm), (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; -def : Pat<(ARM64bsl (v1i64 V64:$Rd), V64:$Rn, V64:$Rm), +def : Pat<(AArch64bsl (v1i64 V64:$Rd), V64:$Rn, V64:$Rm), (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; -def : Pat<(ARM64bsl (v16i8 V128:$Rd), V128:$Rn, V128:$Rm), +def : Pat<(AArch64bsl (v16i8 V128:$Rd), V128:$Rn, V128:$Rm), (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; -def : Pat<(ARM64bsl (v8i16 V128:$Rd), V128:$Rn, V128:$Rm), +def : Pat<(AArch64bsl (v8i16 V128:$Rd), V128:$Rn, V128:$Rm), (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; -def : Pat<(ARM64bsl (v4i32 V128:$Rd), V128:$Rn, V128:$Rm), +def : Pat<(AArch64bsl (v4i32 V128:$Rd), V128:$Rn, V128:$Rm), (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; -def : Pat<(ARM64bsl (v2i64 V128:$Rd), V128:$Rn, V128:$Rm), +def : Pat<(AArch64bsl (v2i64 V128:$Rd), V128:$Rn, V128:$Rm), (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}", @@ -2798,40 +2800,40 @@ def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" # //===----------------------------------------------------------------------===// defm ADD : SIMDThreeScalarD<0, 0b10000, "add", add>; -defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", ARM64cmeq>; -defm CMGE : SIMDThreeScalarD<0, 0b00111, "cmge", ARM64cmge>; -defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", ARM64cmgt>; -defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", ARM64cmhi>; -defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", ARM64cmhs>; -defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", ARM64cmtst>; -defm FABD : SIMDThreeScalarSD<1, 1, 0b11010, "fabd", int_arm64_sisd_fabd>; -def : Pat<(v1f64 (int_arm64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), +defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>; +defm CMGE : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>; +defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>; +defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>; +defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>; +defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>; +defm FABD : SIMDThreeScalarSD<1, 1, 0b11010, "fabd", int_aarch64_sisd_fabd>; +def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FABD64 FPR64:$Rn, FPR64:$Rm)>; defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b11101, "facge", - int_arm64_neon_facge>; + int_aarch64_neon_facge>; defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b11101, "facgt", - int_arm64_neon_facgt>; -defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b11100, "fcmeq", ARM64fcmeq>; -defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b11100, "fcmge", ARM64fcmge>; -defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b11100, "fcmgt", ARM64fcmgt>; -defm FMULX : SIMDThreeScalarSD<0, 0, 0b11011, "fmulx", int_arm64_neon_fmulx>; -defm FRECPS : SIMDThreeScalarSD<0, 0, 0b11111, "frecps", int_arm64_neon_frecps>; -defm FRSQRTS : SIMDThreeScalarSD<0, 1, 0b11111, "frsqrts", int_arm64_neon_frsqrts>; -defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_arm64_neon_sqadd>; -defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_arm64_neon_sqdmulh>; -defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_arm64_neon_sqrdmulh>; -defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_arm64_neon_sqrshl>; -defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_arm64_neon_sqshl>; -defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_arm64_neon_sqsub>; -defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_arm64_neon_srshl>; -defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_arm64_neon_sshl>; + int_aarch64_neon_facgt>; +defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>; +defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>; +defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>; +defm FMULX : SIMDThreeScalarSD<0, 0, 0b11011, "fmulx", int_aarch64_neon_fmulx>; +defm FRECPS : SIMDThreeScalarSD<0, 0, 0b11111, "frecps", int_aarch64_neon_frecps>; +defm FRSQRTS : SIMDThreeScalarSD<0, 1, 0b11111, "frsqrts", int_aarch64_neon_frsqrts>; +defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>; +defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>; +defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>; +defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>; +defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>; +defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>; +defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>; +defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>; defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>; -defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_arm64_neon_uqadd>; -defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_arm64_neon_uqrshl>; -defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_arm64_neon_uqshl>; -defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_arm64_neon_uqsub>; -defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_arm64_neon_urshl>; -defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_arm64_neon_ushl>; +defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>; +defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>; +defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>; +defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>; +defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>; +defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>; def : InstAlias<"cmls $dst, $src1, $src2", (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; @@ -2862,16 +2864,16 @@ def : InstAlias<"faclt $dst, $src1, $src2", // Advanced SIMD three scalar instructions (mixed operands). //===----------------------------------------------------------------------===// defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull", - int_arm64_neon_sqdmulls_scalar>; + int_aarch64_neon_sqdmulls_scalar>; defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">; defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">; -def : Pat<(i64 (int_arm64_neon_sqadd (i64 FPR64:$Rd), - (i64 (int_arm64_neon_sqdmulls_scalar (i32 FPR32:$Rn), +def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd), + (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), (i32 FPR32:$Rm))))), (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; -def : Pat<(i64 (int_arm64_neon_sqsub (i64 FPR64:$Rd), - (i64 (int_arm64_neon_sqdmulls_scalar (i32 FPR32:$Rn), +def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd), + (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), (i32 FPR32:$Rm))))), (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; @@ -2879,17 +2881,17 @@ def : Pat<(i64 (int_arm64_neon_sqsub (i64 FPR64:$Rd), // Advanced SIMD two scalar instructions. //===----------------------------------------------------------------------===// -defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", int_arm64_neon_abs>; -defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", ARM64cmeqz>; -defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", ARM64cmgez>; -defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", ARM64cmgtz>; -defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", ARM64cmlez>; -defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", ARM64cmltz>; -defm FCMEQ : SIMDCmpTwoScalarSD<0, 1, 0b01101, "fcmeq", ARM64fcmeqz>; -defm FCMGE : SIMDCmpTwoScalarSD<1, 1, 0b01100, "fcmge", ARM64fcmgez>; -defm FCMGT : SIMDCmpTwoScalarSD<0, 1, 0b01100, "fcmgt", ARM64fcmgtz>; -defm FCMLE : SIMDCmpTwoScalarSD<1, 1, 0b01101, "fcmle", ARM64fcmlez>; -defm FCMLT : SIMDCmpTwoScalarSD<0, 1, 0b01110, "fcmlt", ARM64fcmltz>; +defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", int_aarch64_neon_abs>; +defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>; +defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>; +defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>; +defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>; +defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>; +defm FCMEQ : SIMDCmpTwoScalarSD<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; +defm FCMGE : SIMDCmpTwoScalarSD<1, 1, 0b01100, "fcmge", AArch64fcmgez>; +defm FCMGT : SIMDCmpTwoScalarSD<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; +defm FCMLE : SIMDCmpTwoScalarSD<1, 1, 0b01101, "fcmle", AArch64fcmlez>; +defm FCMLT : SIMDCmpTwoScalarSD<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; defm FCVTAS : SIMDTwoScalarSD< 0, 0, 0b11100, "fcvtas">; defm FCVTAU : SIMDTwoScalarSD< 1, 0, 0b11100, "fcvtau">; defm FCVTMS : SIMDTwoScalarSD< 0, 0, 0b11011, "fcvtms">; @@ -2906,54 +2908,54 @@ defm FRECPX : SIMDTwoScalarSD< 0, 1, 0b11111, "frecpx">; defm FRSQRTE : SIMDTwoScalarSD< 1, 1, 0b11101, "frsqrte">; defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg", UnOpFrag<(sub immAllZerosV, node:$LHS)> >; -defm SCVTF : SIMDTwoScalarCVTSD< 0, 0, 0b11101, "scvtf", ARM64sitof>; -defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_arm64_neon_sqabs>; -defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_arm64_neon_sqneg>; -defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_arm64_neon_scalar_sqxtn>; -defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_arm64_neon_scalar_sqxtun>; +defm SCVTF : SIMDTwoScalarCVTSD< 0, 0, 0b11101, "scvtf", AArch64sitof>; +defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; +defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; +defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>; +defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>; defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd", - int_arm64_neon_suqadd>; -defm UCVTF : SIMDTwoScalarCVTSD< 1, 0, 0b11101, "ucvtf", ARM64uitof>; -defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_arm64_neon_scalar_uqxtn>; + int_aarch64_neon_suqadd>; +defm UCVTF : SIMDTwoScalarCVTSD< 1, 0, 0b11101, "ucvtf", AArch64uitof>; +defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>; defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", - int_arm64_neon_usqadd>; + int_aarch64_neon_usqadd>; -def : Pat<(ARM64neg (v1i64 V64:$Rn)), (NEGv1i64 V64:$Rn)>; +def : Pat<(AArch64neg (v1i64 V64:$Rn)), (NEGv1i64 V64:$Rn)>; -def : Pat<(v1i64 (int_arm64_neon_fcvtas (v1f64 FPR64:$Rn))), +def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))), (FCVTASv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_arm64_neon_fcvtau (v1f64 FPR64:$Rn))), +def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))), (FCVTAUv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_arm64_neon_fcvtms (v1f64 FPR64:$Rn))), +def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))), (FCVTMSv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_arm64_neon_fcvtmu (v1f64 FPR64:$Rn))), +def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))), (FCVTMUv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_arm64_neon_fcvtns (v1f64 FPR64:$Rn))), +def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))), (FCVTNSv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_arm64_neon_fcvtnu (v1f64 FPR64:$Rn))), +def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))), (FCVTNUv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_arm64_neon_fcvtps (v1f64 FPR64:$Rn))), +def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))), (FCVTPSv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_arm64_neon_fcvtpu (v1f64 FPR64:$Rn))), +def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))), (FCVTPUv1i64 FPR64:$Rn)>; -def : Pat<(f32 (int_arm64_neon_frecpe (f32 FPR32:$Rn))), +def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))), (FRECPEv1i32 FPR32:$Rn)>; -def : Pat<(f64 (int_arm64_neon_frecpe (f64 FPR64:$Rn))), +def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))), (FRECPEv1i64 FPR64:$Rn)>; -def : Pat<(v1f64 (int_arm64_neon_frecpe (v1f64 FPR64:$Rn))), +def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))), (FRECPEv1i64 FPR64:$Rn)>; -def : Pat<(f32 (int_arm64_neon_frecpx (f32 FPR32:$Rn))), +def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))), (FRECPXv1i32 FPR32:$Rn)>; -def : Pat<(f64 (int_arm64_neon_frecpx (f64 FPR64:$Rn))), +def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))), (FRECPXv1i64 FPR64:$Rn)>; -def : Pat<(f32 (int_arm64_neon_frsqrte (f32 FPR32:$Rn))), +def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))), (FRSQRTEv1i32 FPR32:$Rn)>; -def : Pat<(f64 (int_arm64_neon_frsqrte (f64 FPR64:$Rn))), +def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))), (FRSQRTEv1i64 FPR64:$Rn)>; -def : Pat<(v1f64 (int_arm64_neon_frsqrte (v1f64 FPR64:$Rn))), +def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))), (FRSQRTEv1i64 FPR64:$Rn)>; // If an integer is about to be converted to a floating point value, @@ -3047,56 +3049,56 @@ def : Pat <(f64 (uint_to_fp (i32 // Advanced SIMD three different-sized vector instructions. //===----------------------------------------------------------------------===// -defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_arm64_neon_addhn>; -defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_arm64_neon_subhn>; -defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_arm64_neon_raddhn>; -defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_arm64_neon_rsubhn>; -defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_arm64_neon_pmull>; +defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>; +defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>; +defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>; +defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>; +defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>; defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", - int_arm64_neon_sabd>; + int_aarch64_neon_sabd>; defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", - int_arm64_neon_sabd>; + int_aarch64_neon_sabd>; defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl", BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>; defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw", BinOpFrag<(add node:$LHS, (sext node:$RHS))>>; defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal", - TriOpFrag<(add node:$LHS, (int_arm64_neon_smull node:$MHS, node:$RHS))>>; + TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl", - TriOpFrag<(sub node:$LHS, (int_arm64_neon_smull node:$MHS, node:$RHS))>>; -defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", int_arm64_neon_smull>; + TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; +defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", int_aarch64_neon_smull>; defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", - int_arm64_neon_sqadd>; + int_aarch64_neon_sqadd>; defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", - int_arm64_neon_sqsub>; + int_aarch64_neon_sqsub>; defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull", - int_arm64_neon_sqdmull>; + int_aarch64_neon_sqdmull>; defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl", BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>; defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw", BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>; defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal", - int_arm64_neon_uabd>; + int_aarch64_neon_uabd>; defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", - int_arm64_neon_uabd>; + int_aarch64_neon_uabd>; defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl", BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>; defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw", BinOpFrag<(add node:$LHS, (zext node:$RHS))>>; defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal", - TriOpFrag<(add node:$LHS, (int_arm64_neon_umull node:$MHS, node:$RHS))>>; + TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl", - TriOpFrag<(sub node:$LHS, (int_arm64_neon_umull node:$MHS, node:$RHS))>>; -defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_arm64_neon_umull>; + TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; +defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_aarch64_neon_umull>; defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl", BinOpFrag<(sub (zext node:$LHS), (zext node:$RHS))>>; defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw", BinOpFrag<(sub node:$LHS, (zext node:$RHS))>>; // Patterns for 64-bit pmull -def : Pat<(int_arm64_neon_pmull64 V64:$Rn, V64:$Rm), +def : Pat<(int_aarch64_neon_pmull64 V64:$Rn, V64:$Rm), (PMULLv1i64 V64:$Rn, V64:$Rm)>; -def : Pat<(int_arm64_neon_pmull64 (vector_extract (v2i64 V128:$Rn), (i64 1)), +def : Pat<(int_aarch64_neon_pmull64 (vector_extract (v2i64 V128:$Rn), (i64 1)), (vector_extract (v2i64 V128:$Rm), (i64 1))), (PMULLv2i64 V128:$Rn, V128:$Rm)>; @@ -3104,51 +3106,51 @@ def : Pat<(int_arm64_neon_pmull64 (vector_extract (v2i64 V128:$Rn), (i64 1)), // written in LLVM IR without too much difficulty. // ADDHN -def : Pat<(v8i8 (trunc (v8i16 (ARM64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))), +def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))), (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; -def : Pat<(v4i16 (trunc (v4i32 (ARM64vlshr (add V128:$Rn, V128:$Rm), +def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 16))))), (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; -def : Pat<(v2i32 (trunc (v2i64 (ARM64vlshr (add V128:$Rn, V128:$Rm), +def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 32))))), (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; def : Pat<(concat_vectors (v8i8 V64:$Rd), - (trunc (v8i16 (ARM64vlshr (add V128:$Rn, V128:$Rm), + (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))), (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>; def : Pat<(concat_vectors (v4i16 V64:$Rd), - (trunc (v4i32 (ARM64vlshr (add V128:$Rn, V128:$Rm), + (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 16))))), (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>; def : Pat<(concat_vectors (v2i32 V64:$Rd), - (trunc (v2i64 (ARM64vlshr (add V128:$Rn, V128:$Rm), + (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 32))))), (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>; // SUBHN -def : Pat<(v8i8 (trunc (v8i16 (ARM64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))), +def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))), (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; -def : Pat<(v4i16 (trunc (v4i32 (ARM64vlshr (sub V128:$Rn, V128:$Rm), +def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 16))))), (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; -def : Pat<(v2i32 (trunc (v2i64 (ARM64vlshr (sub V128:$Rn, V128:$Rm), +def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 32))))), (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; def : Pat<(concat_vectors (v8i8 V64:$Rd), - (trunc (v8i16 (ARM64vlshr (sub V128:$Rn, V128:$Rm), + (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))), (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>; def : Pat<(concat_vectors (v4i16 V64:$Rd), - (trunc (v4i32 (ARM64vlshr (sub V128:$Rn, V128:$Rm), + (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 16))))), (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>; def : Pat<(concat_vectors (v2i32 V64:$Rd), - (trunc (v2i64 (ARM64vlshr (sub V128:$Rn, V128:$Rm), + (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 32))))), (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>; @@ -3159,21 +3161,21 @@ def : Pat<(concat_vectors (v2i32 V64:$Rd), defm EXT : SIMDBitwiseExtract<"ext">; -def : Pat<(v4i16 (ARM64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), +def : Pat<(v4i16 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; -def : Pat<(v8i16 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), +def : Pat<(v8i16 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; -def : Pat<(v2i32 (ARM64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), +def : Pat<(v2i32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; -def : Pat<(v2f32 (ARM64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), +def : Pat<(v2f32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; -def : Pat<(v4i32 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), +def : Pat<(v4i32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; -def : Pat<(v4f32 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), +def : Pat<(v4f32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; -def : Pat<(v2i64 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), +def : Pat<(v2i64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; -def : Pat<(v2f64 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), +def : Pat<(v2f64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; // We use EXT to handle extract_subvector to copy the upper 64-bits of a @@ -3196,12 +3198,12 @@ def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 1))), // AdvSIMD zip vector //---------------------------------------------------------------------------- -defm TRN1 : SIMDZipVector<0b010, "trn1", ARM64trn1>; -defm TRN2 : SIMDZipVector<0b110, "trn2", ARM64trn2>; -defm UZP1 : SIMDZipVector<0b001, "uzp1", ARM64uzp1>; -defm UZP2 : SIMDZipVector<0b101, "uzp2", ARM64uzp2>; -defm ZIP1 : SIMDZipVector<0b011, "zip1", ARM64zip1>; -defm ZIP2 : SIMDZipVector<0b111, "zip2", ARM64zip2>; +defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>; +defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>; +defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>; +defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>; +defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>; +defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>; //---------------------------------------------------------------------------- // AdvSIMD TBL/TBX instructions @@ -3210,15 +3212,15 @@ defm ZIP2 : SIMDZipVector<0b111, "zip2", ARM64zip2>; defm TBL : SIMDTableLookup< 0, "tbl">; defm TBX : SIMDTableLookupTied<1, "tbx">; -def : Pat<(v8i8 (int_arm64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), +def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>; -def : Pat<(v16i8 (int_arm64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), +def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), (TBLv16i8One V128:$Ri, V128:$Rn)>; -def : Pat<(v8i8 (int_arm64_neon_tbx1 (v8i8 V64:$Rd), +def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd), (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>; -def : Pat<(v16i8 (int_arm64_neon_tbx1 (v16i8 V128:$Rd), +def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd), (v16i8 V128:$Ri), (v16i8 V128:$Rn))), (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>; @@ -3239,31 +3241,31 @@ defm FMAXNMP : SIMDPairwiseScalarSD<1, 0, 0b01100, "fmaxnmp">; defm FMAXP : SIMDPairwiseScalarSD<1, 0, 0b01111, "fmaxp">; defm FMINNMP : SIMDPairwiseScalarSD<1, 1, 0b01100, "fminnmp">; defm FMINP : SIMDPairwiseScalarSD<1, 1, 0b01111, "fminp">; -def : Pat<(i64 (int_arm64_neon_saddv (v2i64 V128:$Rn))), +def : Pat<(i64 (int_aarch64_neon_saddv (v2i64 V128:$Rn))), (ADDPv2i64p V128:$Rn)>; -def : Pat<(i64 (int_arm64_neon_uaddv (v2i64 V128:$Rn))), +def : Pat<(i64 (int_aarch64_neon_uaddv (v2i64 V128:$Rn))), (ADDPv2i64p V128:$Rn)>; -def : Pat<(f32 (int_arm64_neon_faddv (v2f32 V64:$Rn))), +def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))), (FADDPv2i32p V64:$Rn)>; -def : Pat<(f32 (int_arm64_neon_faddv (v4f32 V128:$Rn))), +def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))), (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>; -def : Pat<(f64 (int_arm64_neon_faddv (v2f64 V128:$Rn))), +def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))), (FADDPv2i64p V128:$Rn)>; -def : Pat<(f32 (int_arm64_neon_fmaxnmv (v2f32 V64:$Rn))), +def : Pat<(f32 (int_aarch64_neon_fmaxnmv (v2f32 V64:$Rn))), (FMAXNMPv2i32p V64:$Rn)>; -def : Pat<(f64 (int_arm64_neon_fmaxnmv (v2f64 V128:$Rn))), +def : Pat<(f64 (int_aarch64_neon_fmaxnmv (v2f64 V128:$Rn))), (FMAXNMPv2i64p V128:$Rn)>; -def : Pat<(f32 (int_arm64_neon_fmaxv (v2f32 V64:$Rn))), +def : Pat<(f32 (int_aarch64_neon_fmaxv (v2f32 V64:$Rn))), (FMAXPv2i32p V64:$Rn)>; -def : Pat<(f64 (int_arm64_neon_fmaxv (v2f64 V128:$Rn))), +def : Pat<(f64 (int_aarch64_neon_fmaxv (v2f64 V128:$Rn))), (FMAXPv2i64p V128:$Rn)>; -def : Pat<(f32 (int_arm64_neon_fminnmv (v2f32 V64:$Rn))), +def : Pat<(f32 (int_aarch64_neon_fminnmv (v2f32 V64:$Rn))), (FMINNMPv2i32p V64:$Rn)>; -def : Pat<(f64 (int_arm64_neon_fminnmv (v2f64 V128:$Rn))), +def : Pat<(f64 (int_aarch64_neon_fminnmv (v2f64 V128:$Rn))), (FMINNMPv2i64p V128:$Rn)>; -def : Pat<(f32 (int_arm64_neon_fminv (v2f32 V64:$Rn))), +def : Pat<(f32 (int_aarch64_neon_fminv (v2f32 V64:$Rn))), (FMINPv2i32p V64:$Rn)>; -def : Pat<(f64 (int_arm64_neon_fminv (v2f64 V128:$Rn))), +def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))), (FMINPv2i64p V128:$Rn)>; //---------------------------------------------------------------------------- @@ -3286,27 +3288,27 @@ def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>; def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>; def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>; -def : Pat<(v2f32 (ARM64dup (f32 FPR32:$Rn))), +def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))), (v2f32 (DUPv2i32lane (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), (i64 0)))>; -def : Pat<(v4f32 (ARM64dup (f32 FPR32:$Rn))), +def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))), (v4f32 (DUPv4i32lane (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), (i64 0)))>; -def : Pat<(v2f64 (ARM64dup (f64 FPR64:$Rn))), +def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))), (v2f64 (DUPv2i64lane (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub), (i64 0)))>; -def : Pat<(v2f32 (ARM64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), +def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>; -def : Pat<(v4f32 (ARM64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), +def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>; -def : Pat<(v2f64 (ARM64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)), +def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)), (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>; -// If there's an (ARM64dup (vector_extract ...) ...), we can use a duplane +// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane // instruction even if the types don't match: we just have to remap the lane // carefully. N.b. this trick only applies to truncations. def VecIndex_x2 : SDNodeXForm { - def : Pat<(ResVT (ARM64dup (ScalVT (vector_extract (Src128VT V128:$Rn), + def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn), imm:$idx)))), (DUP V128:$Rn, (IdxXFORM imm:$idx))>; - def : Pat<(ResVT (ARM64dup (ScalVT (vector_extract (Src64VT V64:$Rn), + def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn), imm:$idx)))), (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; } @@ -3341,11 +3343,11 @@ defm : DUPWithTruncPats; multiclass DUPWithTrunci64Pats { - def : Pat<(ResVT (ARM64dup (i32 (trunc (vector_extract (v2i64 V128:$Rn), + def : Pat<(ResVT (AArch64dup (i32 (trunc (vector_extract (v2i64 V128:$Rn), imm:$idx))))), (DUP V128:$Rn, (IdxXFORM imm:$idx))>; - def : Pat<(ResVT (ARM64dup (i32 (trunc (vector_extract (v1i64 V64:$Rn), + def : Pat<(ResVT (AArch64dup (i32 (trunc (vector_extract (v1i64 V64:$Rn), imm:$idx))))), (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; } @@ -3377,7 +3379,7 @@ def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))), // Extracting i8 or i16 elements will have the zero-extend transformed to // an 'and' mask by type legalization since neither i8 nor i16 are legal types -// for ARM64. Match these patterns here since UMOV already zeroes out the high +// for AArch64. Match these patterns here since UMOV already zeroes out the high // bits of the destination register. def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), (i32 0xff)), @@ -3445,25 +3447,25 @@ def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn), // element of another. // FIXME refactor to a shared class/dev parameterized on vector type, vector // index type and INS extension -def : Pat<(v16i8 (int_arm64_neon_vcopy_lane +def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs), VectorIndexB:$idx2)), (v16i8 (INSvi8lane V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2) )>; -def : Pat<(v8i16 (int_arm64_neon_vcopy_lane +def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs), VectorIndexH:$idx2)), (v8i16 (INSvi16lane V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2) )>; -def : Pat<(v4i32 (int_arm64_neon_vcopy_lane +def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs), VectorIndexS:$idx2)), (v4i32 (INSvi32lane V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2) )>; -def : Pat<(v2i64 (int_arm64_neon_vcopy_lane +def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs), VectorIndexD:$idx2)), (v2i64 (INSvi64lane @@ -3526,7 +3528,7 @@ def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx), ssub))>; // All concat_vectors operations are canonicalised to act on i64 vectors for -// ARM64. In the general case we need an instruction, which had just as well be +// AArch64. In the general case we need an instruction, which had just as well be // INS. class ConcatPat : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)), @@ -3563,10 +3565,10 @@ defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">; defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">; defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">; defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">; -defm FMAXNMV : SIMDAcrossLanesS<0b01100, 0, "fmaxnmv", int_arm64_neon_fmaxnmv>; -defm FMAXV : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_arm64_neon_fmaxv>; -defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_arm64_neon_fminnmv>; -defm FMINV : SIMDAcrossLanesS<0b01111, 1, "fminv", int_arm64_neon_fminv>; +defm FMAXNMV : SIMDAcrossLanesS<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>; +defm FMAXV : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>; +defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>; +defm FMINV : SIMDAcrossLanesS<0b01111, 1, "fminv", int_aarch64_neon_fminv>; multiclass SIMDAcrossLanesSignedIntrinsic { // If there is a sign extension after this intrinsic, consume it as smov already @@ -3745,43 +3747,43 @@ def : Pat<(i64 (intOp (v4i32 V128:$Rn))), dsub))>; } -defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", int_arm64_neon_saddv>; +defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", int_aarch64_neon_saddv>; // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm -def : Pat<(i32 (int_arm64_neon_saddv (v2i32 V64:$Rn))), +def : Pat<(i32 (int_aarch64_neon_saddv (v2i32 V64:$Rn))), (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>; -defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", int_arm64_neon_uaddv>; +defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", int_aarch64_neon_uaddv>; // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm -def : Pat<(i32 (int_arm64_neon_uaddv (v2i32 V64:$Rn))), +def : Pat<(i32 (int_aarch64_neon_uaddv (v2i32 V64:$Rn))), (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>; -defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", int_arm64_neon_smaxv>; -def : Pat<(i32 (int_arm64_neon_smaxv (v2i32 V64:$Rn))), +defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", int_aarch64_neon_smaxv>; +def : Pat<(i32 (int_aarch64_neon_smaxv (v2i32 V64:$Rn))), (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>; -defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", int_arm64_neon_sminv>; -def : Pat<(i32 (int_arm64_neon_sminv (v2i32 V64:$Rn))), +defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", int_aarch64_neon_sminv>; +def : Pat<(i32 (int_aarch64_neon_sminv (v2i32 V64:$Rn))), (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub)>; -defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", int_arm64_neon_umaxv>; -def : Pat<(i32 (int_arm64_neon_umaxv (v2i32 V64:$Rn))), +defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", int_aarch64_neon_umaxv>; +def : Pat<(i32 (int_aarch64_neon_umaxv (v2i32 V64:$Rn))), (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>; -defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", int_arm64_neon_uminv>; -def : Pat<(i32 (int_arm64_neon_uminv (v2i32 V64:$Rn))), +defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", int_aarch64_neon_uminv>; +def : Pat<(i32 (int_aarch64_neon_uminv (v2i32 V64:$Rn))), (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub)>; -defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_arm64_neon_saddlv>; -defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_arm64_neon_uaddlv>; +defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>; +defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>; // The vaddlv_s32 intrinsic gets mapped to SADDLP. -def : Pat<(i64 (int_arm64_neon_saddlv (v2i32 V64:$Rn))), +def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))), (i64 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (SADDLPv2i32_v1i64 V64:$Rn), dsub), dsub))>; // The vaddlv_u32 intrinsic gets mapped to UADDLP. -def : Pat<(i64 (int_arm64_neon_uaddlv (v2i32 V64:$Rn))), +def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))), (i64 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (UADDLPv2i32_v1i64 V64:$Rn), dsub), @@ -3792,9 +3794,9 @@ def : Pat<(i64 (int_arm64_neon_uaddlv (v2i32 V64:$Rn))), //------------------------------------------------------------------------------ // AdvSIMD BIC -defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", ARM64bici>; +defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>; // AdvSIMD ORR -defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", ARM64orri>; +defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>; def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; @@ -3819,13 +3821,13 @@ def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; // AdvSIMD FMOV def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1111, V128, fpimm8, "fmov", ".2d", - [(set (v2f64 V128:$Rd), (ARM64fmov imm0_255:$imm8))]>; + [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1111, V64, fpimm8, "fmov", ".2s", - [(set (v2f32 V64:$Rd), (ARM64fmov imm0_255:$imm8))]>; + [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1111, V128, fpimm8, "fmov", ".4s", - [(set (v4f32 V128:$Rd), (ARM64fmov imm0_255:$imm8))]>; + [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; // AdvSIMD MOVI @@ -3835,7 +3837,7 @@ def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi", [(set FPR64:$Rd, simdimmtype10:$imm8)]>; // The movi_edit node has the immediate value already encoded, so we use // a plain imm0_255 here. -def : Pat<(f64 (ARM64movi_edit imm0_255:$shift)), +def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)), (MOVID imm0_255:$shift)>; def : Pat<(v1i64 immAllZerosV), (MOVID (i32 0))>; @@ -3856,7 +3858,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1110, V128, simdimmtype10, "movi", ".2d", - [(set (v2i64 V128:$Rd), (ARM64movi_edit imm0_255:$imm8))]>; + [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>; // Use movi.2d to materialize 0.0 if the HW does zero-cycle zeroing. @@ -3880,8 +3882,8 @@ def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>; def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>; def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>; -def : Pat<(v2f64 (ARM64dup (f64 fpimm0))), (MOVIv2d_ns (i32 0))>; -def : Pat<(v4f32 (ARM64dup (f32 fpimm0))), (MOVIv2d_ns (i32 0))>; +def : Pat<(v2f64 (AArch64dup (f64 fpimm0))), (MOVIv2d_ns (i32 0))>; +def : Pat<(v4f32 (AArch64dup (f32 fpimm0))), (MOVIv2d_ns (i32 0))>; // EDIT per word & halfword: 2s, 4h, 4s, & 8h defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">; @@ -3896,30 +3898,30 @@ def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; -def : Pat<(v2i32 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))), +def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), (MOVIv2i32 imm0_255:$imm8, imm:$shift)>; -def : Pat<(v4i32 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))), +def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), (MOVIv4i32 imm0_255:$imm8, imm:$shift)>; -def : Pat<(v4i16 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))), +def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), (MOVIv4i16 imm0_255:$imm8, imm:$shift)>; -def : Pat<(v8i16 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))), +def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), (MOVIv8i16 imm0_255:$imm8, imm:$shift)>; // EDIT per word: 2s & 4s with MSL shifter def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s", [(set (v2i32 V64:$Rd), - (ARM64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; + (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s", [(set (v4i32 V128:$Rd), - (ARM64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; + (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; // Per byte: 8b & 16b def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1110, V64, imm0_255, "movi", ".8b", - [(set (v8i8 V64:$Rd), (ARM64movi imm0_255:$imm8))]>; + [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>; def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1110, V128, imm0_255, "movi", ".16b", - [(set (v16i8 V128:$Rd), (ARM64movi imm0_255:$imm8))]>; + [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>; // AdvSIMD MVNI @@ -3936,22 +3938,22 @@ def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; -def : Pat<(v2i32 (ARM64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), +def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), (MVNIv2i32 imm0_255:$imm8, imm:$shift)>; -def : Pat<(v4i32 (ARM64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), +def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), (MVNIv4i32 imm0_255:$imm8, imm:$shift)>; -def : Pat<(v4i16 (ARM64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), +def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), (MVNIv4i16 imm0_255:$imm8, imm:$shift)>; -def : Pat<(v8i16 (ARM64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), +def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), (MVNIv8i16 imm0_255:$imm8, imm:$shift)>; // EDIT per word: 2s & 4s with MSL shifter def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s", [(set (v2i32 V64:$Rd), - (ARM64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; + (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s", [(set (v4i32 V128:$Rd), - (ARM64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; + (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; //---------------------------------------------------------------------------- // AdvSIMD indexed element @@ -3985,11 +3987,11 @@ multiclass FMLSIndexedAfterNegPatterns { // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit // and DUP scalar. def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), - (ARM64duplane32 (v4f32 (fneg V128:$Rm)), + (AArch64duplane32 (v4f32 (fneg V128:$Rm)), VectorIndexS:$idx))), (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), - (v2f32 (ARM64duplane32 + (v2f32 (AArch64duplane32 (v4f32 (insert_subvector undef, (v2f32 (fneg V64:$Rm)), (i32 0))), @@ -3998,19 +4000,19 @@ multiclass FMLSIndexedAfterNegPatterns { (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), - (ARM64dup (f32 (fneg FPR32Op:$Rm))))), + (AArch64dup (f32 (fneg FPR32Op:$Rm))))), (FMLSv2i32_indexed V64:$Rd, V64:$Rn, (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit // and DUP scalar. def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), - (ARM64duplane32 (v4f32 (fneg V128:$Rm)), + (AArch64duplane32 (v4f32 (fneg V128:$Rm)), VectorIndexS:$idx))), (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), - (v4f32 (ARM64duplane32 + (v4f32 (AArch64duplane32 (v4f32 (insert_subvector undef, (v2f32 (fneg V64:$Rm)), (i32 0))), @@ -4019,19 +4021,19 @@ multiclass FMLSIndexedAfterNegPatterns { (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), - (ARM64dup (f32 (fneg FPR32Op:$Rm))))), + (AArch64dup (f32 (fneg FPR32Op:$Rm))))), (FMLSv4i32_indexed V128:$Rd, V128:$Rn, (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar // (DUPLANE from 64-bit would be trivial). def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), - (ARM64duplane64 (v2f64 (fneg V128:$Rm)), + (AArch64duplane64 (v2f64 (fneg V128:$Rm)), VectorIndexD:$idx))), (FMLSv2i64_indexed V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), - (ARM64dup (f64 (fneg FPR64Op:$Rm))))), + (AArch64dup (f64 (fneg FPR64Op:$Rm))))), (FMLSv2i64_indexed V128:$Rd, V128:$Rn, (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>; @@ -4060,50 +4062,50 @@ defm : FMLSIndexedAfterNegPatterns< defm : FMLSIndexedAfterNegPatterns< TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >; -defm FMULX : SIMDFPIndexedSD<1, 0b1001, "fmulx", int_arm64_neon_fmulx>; +defm FMULX : SIMDFPIndexedSD<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>; defm FMUL : SIMDFPIndexedSD<0, 0b1001, "fmul", fmul>; -def : Pat<(v2f32 (fmul V64:$Rn, (ARM64dup (f32 FPR32:$Rm)))), +def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))), (FMULv2i32_indexed V64:$Rn, (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), (i64 0))>; -def : Pat<(v4f32 (fmul V128:$Rn, (ARM64dup (f32 FPR32:$Rm)))), +def : Pat<(v4f32 (fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))), (FMULv4i32_indexed V128:$Rn, (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), (i64 0))>; -def : Pat<(v2f64 (fmul V128:$Rn, (ARM64dup (f64 FPR64:$Rm)))), +def : Pat<(v2f64 (fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))), (FMULv2i64_indexed V128:$Rn, (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub), (i64 0))>; -defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_arm64_neon_sqdmulh>; -defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_arm64_neon_sqrdmulh>; +defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>; +defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>; defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))>>; defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))>>; defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>; defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal", - TriOpFrag<(add node:$LHS, (int_arm64_neon_smull node:$MHS, node:$RHS))>>; + TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl", - TriOpFrag<(sub node:$LHS, (int_arm64_neon_smull node:$MHS, node:$RHS))>>; + TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", - int_arm64_neon_smull>; + int_aarch64_neon_smull>; defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", - int_arm64_neon_sqadd>; + int_aarch64_neon_sqadd>; defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", - int_arm64_neon_sqsub>; -defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_arm64_neon_sqdmull>; + int_aarch64_neon_sqsub>; +defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>; defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal", - TriOpFrag<(add node:$LHS, (int_arm64_neon_umull node:$MHS, node:$RHS))>>; + TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl", - TriOpFrag<(sub node:$LHS, (int_arm64_neon_umull node:$MHS, node:$RHS))>>; + TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull", - int_arm64_neon_umull>; + int_aarch64_neon_umull>; // A scalar sqdmull with the second operand being a vector lane can be // handled directly with the indexed instruction encoding. -def : Pat<(int_arm64_neon_sqdmulls_scalar (i32 FPR32:$Rn), +def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), (vector_extract (v4i32 V128:$Vm), VectorIndexS:$idx)), (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>; @@ -4118,149 +4120,149 @@ defm UCVTF : SIMDScalarRShiftSD<1, 0b11100, "ucvtf">; // Codegen patterns for the above. We don't put these directly on the // instructions because TableGen's type inference can't handle the truth. // Having the same base pattern for fp <--> int totally freaks it out. -def : Pat<(int_arm64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm), +def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm), (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>; -def : Pat<(int_arm64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm), +def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm), (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>; -def : Pat<(i64 (int_arm64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)), +def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)), (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(i64 (int_arm64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)), +def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)), (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(v1i64 (int_arm64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn), +def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn), vecshiftR64:$imm)), (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(v1i64 (int_arm64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn), +def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn), vecshiftR64:$imm)), (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(int_arm64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm), +def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm), (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>; -def : Pat<(int_arm64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm), +def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm), (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>; -def : Pat<(f64 (int_arm64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), +def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(f64 (int_arm64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), +def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(v1f64 (int_arm64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn), +def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn), vecshiftR64:$imm)), (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(v1f64 (int_arm64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn), +def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn), vecshiftR64:$imm)), (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; -defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", ARM64vshl>; +defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>; defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">; defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn", - int_arm64_neon_sqrshrn>; + int_aarch64_neon_sqrshrn>; defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun", - int_arm64_neon_sqrshrun>; -defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", ARM64sqshlui>; -defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", ARM64sqshli>; + int_aarch64_neon_sqrshrun>; +defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; +defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn", - int_arm64_neon_sqshrn>; + int_aarch64_neon_sqshrn>; defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun", - int_arm64_neon_sqshrun>; + int_aarch64_neon_sqshrun>; defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">; -defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", ARM64srshri>; +defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>; defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra", TriOpFrag<(add node:$LHS, - (ARM64srshri node:$MHS, node:$RHS))>>; -defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", ARM64vashr>; + (AArch64srshri node:$MHS, node:$RHS))>>; +defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>; defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra", TriOpFrag<(add node:$LHS, - (ARM64vashr node:$MHS, node:$RHS))>>; + (AArch64vashr node:$MHS, node:$RHS))>>; defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn", - int_arm64_neon_uqrshrn>; -defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", ARM64uqshli>; + int_aarch64_neon_uqrshrn>; +defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn", - int_arm64_neon_uqshrn>; -defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", ARM64urshri>; + int_aarch64_neon_uqshrn>; +defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>; defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra", TriOpFrag<(add node:$LHS, - (ARM64urshri node:$MHS, node:$RHS))>>; -defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", ARM64vlshr>; + (AArch64urshri node:$MHS, node:$RHS))>>; +defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>; defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra", TriOpFrag<(add node:$LHS, - (ARM64vlshr node:$MHS, node:$RHS))>>; + (AArch64vlshr node:$MHS, node:$RHS))>>; //---------------------------------------------------------------------------- // AdvSIMD vector shift instructions //---------------------------------------------------------------------------- -defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_arm64_neon_vcvtfp2fxs>; -defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_arm64_neon_vcvtfp2fxu>; +defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>; +defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>; defm SCVTF: SIMDVectorRShiftSDToFP<0, 0b11100, "scvtf", - int_arm64_neon_vcvtfxs2fp>; + int_aarch64_neon_vcvtfxs2fp>; defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", - int_arm64_neon_rshrn>; -defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", ARM64vshl>; + int_aarch64_neon_rshrn>; +defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>; defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn", - BinOpFrag<(trunc (ARM64vashr node:$LHS, node:$RHS))>>; -defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", int_arm64_neon_vsli>; -def : Pat<(v1i64 (int_arm64_neon_vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), + BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>; +defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", int_aarch64_neon_vsli>; +def : Pat<(v1i64 (int_aarch64_neon_vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), (i32 vecshiftL64:$imm))), (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>; defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn", - int_arm64_neon_sqrshrn>; + int_aarch64_neon_sqrshrn>; defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun", - int_arm64_neon_sqrshrun>; -defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", ARM64sqshlui>; -defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", ARM64sqshli>; + int_aarch64_neon_sqrshrun>; +defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; +defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn", - int_arm64_neon_sqshrn>; + int_aarch64_neon_sqshrn>; defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun", - int_arm64_neon_sqshrun>; -defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", int_arm64_neon_vsri>; -def : Pat<(v1i64 (int_arm64_neon_vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), + int_aarch64_neon_sqshrun>; +defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", int_aarch64_neon_vsri>; +def : Pat<(v1i64 (int_aarch64_neon_vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), (i32 vecshiftR64:$imm))), (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>; -defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", ARM64srshri>; +defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>; defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra", TriOpFrag<(add node:$LHS, - (ARM64srshri node:$MHS, node:$RHS))> >; + (AArch64srshri node:$MHS, node:$RHS))> >; defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll", - BinOpFrag<(ARM64vshl (sext node:$LHS), node:$RHS)>>; + BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>; -defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", ARM64vashr>; +defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>; defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra", - TriOpFrag<(add node:$LHS, (ARM64vashr node:$MHS, node:$RHS))>>; + TriOpFrag<(add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>; defm UCVTF : SIMDVectorRShiftSDToFP<1, 0b11100, "ucvtf", - int_arm64_neon_vcvtfxu2fp>; + int_aarch64_neon_vcvtfxu2fp>; defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn", - int_arm64_neon_uqrshrn>; -defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", ARM64uqshli>; + int_aarch64_neon_uqrshrn>; +defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn", - int_arm64_neon_uqshrn>; -defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", ARM64urshri>; + int_aarch64_neon_uqshrn>; +defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>; defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra", TriOpFrag<(add node:$LHS, - (ARM64urshri node:$MHS, node:$RHS))> >; + (AArch64urshri node:$MHS, node:$RHS))> >; defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll", - BinOpFrag<(ARM64vshl (zext node:$LHS), node:$RHS)>>; -defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", ARM64vlshr>; + BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>; +defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>; defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra", - TriOpFrag<(add node:$LHS, (ARM64vlshr node:$MHS, node:$RHS))> >; + TriOpFrag<(add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >; // SHRN patterns for when a logical right shift was used instead of arithmetic // (the immediate guarantees no sign bits actually end up in the result so it // doesn't matter). -def : Pat<(v8i8 (trunc (ARM64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))), +def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))), (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>; -def : Pat<(v4i16 (trunc (ARM64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))), +def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))), (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>; -def : Pat<(v2i32 (trunc (ARM64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))), +def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))), (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>; def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd), - (trunc (ARM64vlshr (v8i16 V128:$Rn), + (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm)))), (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn, vecshiftR16Narrow:$imm)>; def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd), - (trunc (ARM64vlshr (v4i32 V128:$Rn), + (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm)))), (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn, vecshiftR32Narrow:$imm)>; def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd), - (trunc (ARM64vlshr (v2i64 V128:$Rn), + (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm)))), (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn, vecshiftR32Narrow:$imm)>; @@ -4530,30 +4532,30 @@ defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>; defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>; } -def : Pat<(v8i8 (ARM64dup (i32 (extloadi8 GPR64sp:$Rn)))), +def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), (LD1Rv8b GPR64sp:$Rn)>; -def : Pat<(v16i8 (ARM64dup (i32 (extloadi8 GPR64sp:$Rn)))), +def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), (LD1Rv16b GPR64sp:$Rn)>; -def : Pat<(v4i16 (ARM64dup (i32 (extloadi16 GPR64sp:$Rn)))), +def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), (LD1Rv4h GPR64sp:$Rn)>; -def : Pat<(v8i16 (ARM64dup (i32 (extloadi16 GPR64sp:$Rn)))), +def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), (LD1Rv8h GPR64sp:$Rn)>; -def : Pat<(v2i32 (ARM64dup (i32 (load GPR64sp:$Rn)))), +def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), (LD1Rv2s GPR64sp:$Rn)>; -def : Pat<(v4i32 (ARM64dup (i32 (load GPR64sp:$Rn)))), +def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), (LD1Rv4s GPR64sp:$Rn)>; -def : Pat<(v2i64 (ARM64dup (i64 (load GPR64sp:$Rn)))), +def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), (LD1Rv2d GPR64sp:$Rn)>; -def : Pat<(v1i64 (ARM64dup (i64 (load GPR64sp:$Rn)))), +def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), (LD1Rv1d GPR64sp:$Rn)>; // Grab the floating point version too -def : Pat<(v2f32 (ARM64dup (f32 (load GPR64sp:$Rn)))), +def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), (LD1Rv2s GPR64sp:$Rn)>; -def : Pat<(v4f32 (ARM64dup (f32 (load GPR64sp:$Rn)))), +def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), (LD1Rv4s GPR64sp:$Rn)>; -def : Pat<(v2f64 (ARM64dup (f64 (load GPR64sp:$Rn)))), +def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), (LD1Rv2d GPR64sp:$Rn)>; -def : Pat<(v1f64 (ARM64dup (f64 (load GPR64sp:$Rn)))), +def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), (LD1Rv1d GPR64sp:$Rn)>; class Ld1Lane128Pat; // Crypto extensions //---------------------------------------------------------------------------- -def AESErr : AESTiedInst<0b0100, "aese", int_arm64_crypto_aese>; -def AESDrr : AESTiedInst<0b0101, "aesd", int_arm64_crypto_aesd>; -def AESMCrr : AESInst< 0b0110, "aesmc", int_arm64_crypto_aesmc>; -def AESIMCrr : AESInst< 0b0111, "aesimc", int_arm64_crypto_aesimc>; +def AESErr : AESTiedInst<0b0100, "aese", int_aarch64_crypto_aese>; +def AESDrr : AESTiedInst<0b0101, "aesd", int_aarch64_crypto_aesd>; +def AESMCrr : AESInst< 0b0110, "aesmc", int_aarch64_crypto_aesmc>; +def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>; -def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_arm64_crypto_sha1c>; -def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_arm64_crypto_sha1p>; -def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_arm64_crypto_sha1m>; -def SHA1SU0rrr : SHATiedInstVVV<0b011, "sha1su0", int_arm64_crypto_sha1su0>; -def SHA256Hrrr : SHATiedInstQQV<0b100, "sha256h", int_arm64_crypto_sha256h>; -def SHA256H2rrr : SHATiedInstQQV<0b101, "sha256h2",int_arm64_crypto_sha256h2>; -def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_arm64_crypto_sha256su1>; +def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_aarch64_crypto_sha1c>; +def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_aarch64_crypto_sha1p>; +def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_aarch64_crypto_sha1m>; +def SHA1SU0rrr : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>; +def SHA256Hrrr : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>; +def SHA256H2rrr : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>; +def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>; -def SHA1Hrr : SHAInstSS< 0b0000, "sha1h", int_arm64_crypto_sha1h>; -def SHA1SU1rr : SHATiedInstVV<0b0001, "sha1su1", int_arm64_crypto_sha1su1>; -def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_arm64_crypto_sha256su0>; +def SHA1Hrr : SHAInstSS< 0b0000, "sha1h", int_aarch64_crypto_sha1h>; +def SHA1SU1rr : SHATiedInstVV<0b0001, "sha1su1", int_aarch64_crypto_sha1su1>; +def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>; //---------------------------------------------------------------------------- // Compiler-pseudos @@ -4799,7 +4801,7 @@ def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)), def : Pat<(i32 (trunc GPR64sp:$src)), (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>; -// __builtin_trap() uses the BRK instruction on ARM64. +// __builtin_trap() uses the BRK instruction on AArch64. def : Pat<(trap), (BRK 1)>; // Conversions within AdvSIMD types in the same register size are free. @@ -5256,13 +5258,13 @@ def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)), (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; // Scalar 64-bit shifts in FPR64 registers. -def : Pat<(i64 (int_arm64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), +def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(i64 (int_arm64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), +def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(i64 (int_arm64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), +def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(i64 (int_arm64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), +def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; // Tail call return handling. These are all compiler pseudo-instructions, @@ -5272,11 +5274,11 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>; } -def : Pat<(ARM64tcret tcGPR64:$dst, (i32 timm:$FPDiff)), +def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)), (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>; -def : Pat<(ARM64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)), +def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)), (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; -def : Pat<(ARM64tcret texternalsym:$dst, (i32 timm:$FPDiff)), +def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; -include "ARM64InstrAtomics.td" +include "AArch64InstrAtomics.td" diff --git a/lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp similarity index 80% rename from lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp rename to lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index e2c4b13f0369..e7454be125bc 100644 --- a/lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -1,4 +1,4 @@ -//===-- ARM64LoadStoreOptimizer.cpp - ARM64 load/store opt. pass --*- C++ -*-=// +//=- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -*- C++ -*-=// // // The LLVM Compiler Infrastructure // @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "ARM64InstrInfo.h" -#include "MCTargetDesc/ARM64AddressingModes.h" +#include "AArch64InstrInfo.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -29,9 +29,9 @@ #include "llvm/ADT/Statistic.h" using namespace llvm; -#define DEBUG_TYPE "arm64-ldst-opt" +#define DEBUG_TYPE "aarch64-ldst-opt" -/// ARM64AllocLoadStoreOpt - Post-register allocation pass to combine +/// AArch64AllocLoadStoreOpt - Post-register allocation pass to combine /// load / store instructions to form ldp / stp instructions. STATISTIC(NumPairCreated, "Number of load/store pair instructions generated"); @@ -40,21 +40,21 @@ STATISTIC(NumPreFolded, "Number of pre-index updates folded"); STATISTIC(NumUnscaledPairCreated, "Number of load/store from unscaled generated"); -static cl::opt ScanLimit("arm64-load-store-scan-limit", cl::init(20), +static cl::opt ScanLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden); // Place holder while testing unscaled load/store combining static cl::opt -EnableARM64UnscaledMemOp("arm64-unscaled-mem-op", cl::Hidden, - cl::desc("Allow ARM64 unscaled load/store combining"), +EnableAArch64UnscaledMemOp("aarch64-unscaled-mem-op", cl::Hidden, + cl::desc("Allow AArch64 unscaled load/store combining"), cl::init(true)); namespace { -struct ARM64LoadStoreOpt : public MachineFunctionPass { +struct AArch64LoadStoreOpt : public MachineFunctionPass { static char ID; - ARM64LoadStoreOpt() : MachineFunctionPass(ID) {} + AArch64LoadStoreOpt() : MachineFunctionPass(ID) {} - const ARM64InstrInfo *TII; + const AArch64InstrInfo *TII; const TargetRegisterInfo *TRI; // Scan the instructions looking for a load/store that can be combined @@ -102,76 +102,76 @@ struct ARM64LoadStoreOpt : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &Fn) override; const char *getPassName() const override { - return "ARM64 load / store optimization pass"; + return "AArch64 load / store optimization pass"; } private: int getMemSize(MachineInstr *MemMI); }; -char ARM64LoadStoreOpt::ID = 0; +char AArch64LoadStoreOpt::ID = 0; } static bool isUnscaledLdst(unsigned Opc) { switch (Opc) { default: return false; - case ARM64::STURSi: + case AArch64::STURSi: return true; - case ARM64::STURDi: + case AArch64::STURDi: return true; - case ARM64::STURQi: + case AArch64::STURQi: return true; - case ARM64::STURWi: + case AArch64::STURWi: return true; - case ARM64::STURXi: + case AArch64::STURXi: return true; - case ARM64::LDURSi: + case AArch64::LDURSi: return true; - case ARM64::LDURDi: + case AArch64::LDURDi: return true; - case ARM64::LDURQi: + case AArch64::LDURQi: return true; - case ARM64::LDURWi: + case AArch64::LDURWi: return true; - case ARM64::LDURXi: + case AArch64::LDURXi: return true; } } // Size in bytes of the data moved by an unscaled load or store -int ARM64LoadStoreOpt::getMemSize(MachineInstr *MemMI) { +int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) { switch (MemMI->getOpcode()) { default: llvm_unreachable("Opcode has has unknown size!"); - case ARM64::STRSui: - case ARM64::STURSi: + case AArch64::STRSui: + case AArch64::STURSi: return 4; - case ARM64::STRDui: - case ARM64::STURDi: + case AArch64::STRDui: + case AArch64::STURDi: return 8; - case ARM64::STRQui: - case ARM64::STURQi: + case AArch64::STRQui: + case AArch64::STURQi: return 16; - case ARM64::STRWui: - case ARM64::STURWi: + case AArch64::STRWui: + case AArch64::STURWi: return 4; - case ARM64::STRXui: - case ARM64::STURXi: + case AArch64::STRXui: + case AArch64::STURXi: return 8; - case ARM64::LDRSui: - case ARM64::LDURSi: + case AArch64::LDRSui: + case AArch64::LDURSi: return 4; - case ARM64::LDRDui: - case ARM64::LDURDi: + case AArch64::LDRDui: + case AArch64::LDURDi: return 8; - case ARM64::LDRQui: - case ARM64::LDURQi: + case AArch64::LDRQui: + case AArch64::LDURQi: return 16; - case ARM64::LDRWui: - case ARM64::LDURWi: + case AArch64::LDRWui: + case AArch64::LDURWi: return 4; - case ARM64::LDRXui: - case ARM64::LDURXi: + case AArch64::LDRXui: + case AArch64::LDURXi: return 8; } } @@ -180,36 +180,36 @@ static unsigned getMatchingPairOpcode(unsigned Opc) { switch (Opc) { default: llvm_unreachable("Opcode has no pairwise equivalent!"); - case ARM64::STRSui: - case ARM64::STURSi: - return ARM64::STPSi; - case ARM64::STRDui: - case ARM64::STURDi: - return ARM64::STPDi; - case ARM64::STRQui: - case ARM64::STURQi: - return ARM64::STPQi; - case ARM64::STRWui: - case ARM64::STURWi: - return ARM64::STPWi; - case ARM64::STRXui: - case ARM64::STURXi: - return ARM64::STPXi; - case ARM64::LDRSui: - case ARM64::LDURSi: - return ARM64::LDPSi; - case ARM64::LDRDui: - case ARM64::LDURDi: - return ARM64::LDPDi; - case ARM64::LDRQui: - case ARM64::LDURQi: - return ARM64::LDPQi; - case ARM64::LDRWui: - case ARM64::LDURWi: - return ARM64::LDPWi; - case ARM64::LDRXui: - case ARM64::LDURXi: - return ARM64::LDPXi; + case AArch64::STRSui: + case AArch64::STURSi: + return AArch64::STPSi; + case AArch64::STRDui: + case AArch64::STURDi: + return AArch64::STPDi; + case AArch64::STRQui: + case AArch64::STURQi: + return AArch64::STPQi; + case AArch64::STRWui: + case AArch64::STURWi: + return AArch64::STPWi; + case AArch64::STRXui: + case AArch64::STURXi: + return AArch64::STPXi; + case AArch64::LDRSui: + case AArch64::LDURSi: + return AArch64::LDPSi; + case AArch64::LDRDui: + case AArch64::LDURDi: + return AArch64::LDPDi; + case AArch64::LDRQui: + case AArch64::LDURQi: + return AArch64::LDPQi; + case AArch64::LDRWui: + case AArch64::LDURWi: + return AArch64::LDPWi; + case AArch64::LDRXui: + case AArch64::LDURXi: + return AArch64::LDPXi; } } @@ -217,16 +217,16 @@ static unsigned getPreIndexedOpcode(unsigned Opc) { switch (Opc) { default: llvm_unreachable("Opcode has no pre-indexed equivalent!"); - case ARM64::STRSui: return ARM64::STRSpre; - case ARM64::STRDui: return ARM64::STRDpre; - case ARM64::STRQui: return ARM64::STRQpre; - case ARM64::STRWui: return ARM64::STRWpre; - case ARM64::STRXui: return ARM64::STRXpre; - case ARM64::LDRSui: return ARM64::LDRSpre; - case ARM64::LDRDui: return ARM64::LDRDpre; - case ARM64::LDRQui: return ARM64::LDRQpre; - case ARM64::LDRWui: return ARM64::LDRWpre; - case ARM64::LDRXui: return ARM64::LDRXpre; + case AArch64::STRSui: return AArch64::STRSpre; + case AArch64::STRDui: return AArch64::STRDpre; + case AArch64::STRQui: return AArch64::STRQpre; + case AArch64::STRWui: return AArch64::STRWpre; + case AArch64::STRXui: return AArch64::STRXpre; + case AArch64::LDRSui: return AArch64::LDRSpre; + case AArch64::LDRDui: return AArch64::LDRDpre; + case AArch64::LDRQui: return AArch64::LDRQpre; + case AArch64::LDRWui: return AArch64::LDRWpre; + case AArch64::LDRXui: return AArch64::LDRXpre; } } @@ -234,33 +234,33 @@ static unsigned getPostIndexedOpcode(unsigned Opc) { switch (Opc) { default: llvm_unreachable("Opcode has no post-indexed wise equivalent!"); - case ARM64::STRSui: - return ARM64::STRSpost; - case ARM64::STRDui: - return ARM64::STRDpost; - case ARM64::STRQui: - return ARM64::STRQpost; - case ARM64::STRWui: - return ARM64::STRWpost; - case ARM64::STRXui: - return ARM64::STRXpost; - case ARM64::LDRSui: - return ARM64::LDRSpost; - case ARM64::LDRDui: - return ARM64::LDRDpost; - case ARM64::LDRQui: - return ARM64::LDRQpost; - case ARM64::LDRWui: - return ARM64::LDRWpost; - case ARM64::LDRXui: - return ARM64::LDRXpost; + case AArch64::STRSui: + return AArch64::STRSpost; + case AArch64::STRDui: + return AArch64::STRDpost; + case AArch64::STRQui: + return AArch64::STRQpost; + case AArch64::STRWui: + return AArch64::STRWpost; + case AArch64::STRXui: + return AArch64::STRXpost; + case AArch64::LDRSui: + return AArch64::LDRSpost; + case AArch64::LDRDui: + return AArch64::LDRDpost; + case AArch64::LDRQui: + return AArch64::LDRQpost; + case AArch64::LDRWui: + return AArch64::LDRWpost; + case AArch64::LDRXui: + return AArch64::LDRXpost; } } MachineBasicBlock::iterator -ARM64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Paired, - bool mergeForward) { +AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Paired, + bool mergeForward) { MachineBasicBlock::iterator NextI = I; ++NextI; // If NextI is the second of the two instructions to be merged, we need @@ -271,7 +271,8 @@ ARM64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, ++NextI; bool IsUnscaled = isUnscaledLdst(I->getOpcode()); - int OffsetStride = IsUnscaled && EnableARM64UnscaledMemOp ? getMemSize(I) : 1; + int OffsetStride = + IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(I) : 1; unsigned NewOpc = getMatchingPairOpcode(I->getOpcode()); // Insert our new paired instruction after whichever of the paired @@ -294,7 +295,7 @@ ARM64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, } // Handle Unscaled int OffsetImm = RtMI->getOperand(2).getImm(); - if (IsUnscaled && EnableARM64UnscaledMemOp) + if (IsUnscaled && EnableAArch64UnscaledMemOp) OffsetImm /= OffsetStride; // Construct the new instruction. @@ -372,8 +373,8 @@ static int alignTo(int Num, int PowOf2) { /// findMatchingInsn - Scan the instructions looking for a load/store that can /// be combined with the current instruction into a load/store pair. MachineBasicBlock::iterator -ARM64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, - bool &mergeForward, unsigned Limit) { +AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, + bool &mergeForward, unsigned Limit) { MachineBasicBlock::iterator E = I->getParent()->end(); MachineBasicBlock::iterator MBBI = I; MachineInstr *FirstMI = I; @@ -394,7 +395,7 @@ ARM64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, if (FirstMI->modifiesRegister(BaseReg, TRI)) return E; int OffsetStride = - IsUnscaled && EnableARM64UnscaledMemOp ? getMemSize(FirstMI) : 1; + IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(FirstMI) : 1; if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride)) return E; @@ -444,7 +445,7 @@ ARM64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // If the alignment requirements of the paired (scaled) instruction // can't express the offset of the unscaled input, bail and keep // looking. - if (IsUnscaled && EnableARM64UnscaledMemOp && + if (IsUnscaled && EnableAArch64UnscaledMemOp && (alignTo(MinOffset, OffsetStride) != MinOffset)) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); continue; @@ -507,10 +508,10 @@ ARM64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, } MachineBasicBlock::iterator -ARM64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Update) { - assert((Update->getOpcode() == ARM64::ADDXri || - Update->getOpcode() == ARM64::SUBXri) && +AArch64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Update) { + assert((Update->getOpcode() == AArch64::ADDXri || + Update->getOpcode() == AArch64::SUBXri) && "Unexpected base register update instruction to merge!"); MachineBasicBlock::iterator NextI = I; // Return the instruction following the merged instruction, which is @@ -520,9 +521,9 @@ ARM64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I, ++NextI; int Value = Update->getOperand(2).getImm(); - assert(ARM64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && + assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && "Can't merge 1 << 12 offset into pre-indexed load / store"); - if (Update->getOpcode() == ARM64::SUBXri) + if (Update->getOpcode() == AArch64::SUBXri) Value = -Value; unsigned NewOpc = getPreIndexedOpcode(I->getOpcode()); @@ -550,11 +551,10 @@ ARM64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I, return NextI; } -MachineBasicBlock::iterator -ARM64LoadStoreOpt::mergePostIdxUpdateInsn(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Update) { - assert((Update->getOpcode() == ARM64::ADDXri || - Update->getOpcode() == ARM64::SUBXri) && +MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePostIdxUpdateInsn( + MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update) { + assert((Update->getOpcode() == AArch64::ADDXri || + Update->getOpcode() == AArch64::SUBXri) && "Unexpected base register update instruction to merge!"); MachineBasicBlock::iterator NextI = I; // Return the instruction following the merged instruction, which is @@ -564,9 +564,9 @@ ARM64LoadStoreOpt::mergePostIdxUpdateInsn(MachineBasicBlock::iterator I, ++NextI; int Value = Update->getOperand(2).getImm(); - assert(ARM64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && + assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && "Can't merge 1 << 12 offset into post-indexed load / store"); - if (Update->getOpcode() == ARM64::SUBXri) + if (Update->getOpcode() == AArch64::SUBXri) Value = -Value; unsigned NewOpc = getPostIndexedOpcode(I->getOpcode()); @@ -599,17 +599,17 @@ static bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg, switch (MI->getOpcode()) { default: break; - case ARM64::SUBXri: + case AArch64::SUBXri: // Negate the offset for a SUB instruction. Offset *= -1; // FALLTHROUGH - case ARM64::ADDXri: + case AArch64::ADDXri: // Make sure it's a vanilla immediate operand, not a relocation or // anything else we can't handle. if (!MI->getOperand(2).isImm()) break; // Watch out for 1 << 12 shifted value. - if (ARM64_AM::getShiftValue(MI->getOperand(3).getImm())) + if (AArch64_AM::getShiftValue(MI->getOperand(3).getImm())) break; // If the instruction has the base register as source and dest and the // immediate will fit in a signed 9-bit integer, then we have a match. @@ -627,9 +627,8 @@ static bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg, return false; } -MachineBasicBlock::iterator -ARM64LoadStoreOpt::findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, - unsigned Limit, int Value) { +MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( + MachineBasicBlock::iterator I, unsigned Limit, int Value) { MachineBasicBlock::iterator E = I->getParent()->end(); MachineInstr *MemMI = I; MachineBasicBlock::iterator MBBI = I; @@ -682,9 +681,8 @@ ARM64LoadStoreOpt::findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, return E; } -MachineBasicBlock::iterator -ARM64LoadStoreOpt::findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, - unsigned Limit) { +MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( + MachineBasicBlock::iterator I, unsigned Limit) { MachineBasicBlock::iterator B = I->getParent()->begin(); MachineBasicBlock::iterator E = I->getParent()->end(); MachineInstr *MemMI = I; @@ -736,7 +734,7 @@ ARM64LoadStoreOpt::findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, return E; } -bool ARM64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { +bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { bool Modified = false; // Two tranformations to do here: // 1) Find loads and stores that can be merged into a single load or store @@ -762,27 +760,27 @@ bool ARM64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { // Just move on to the next instruction. ++MBBI; break; - case ARM64::STRSui: - case ARM64::STRDui: - case ARM64::STRQui: - case ARM64::STRXui: - case ARM64::STRWui: - case ARM64::LDRSui: - case ARM64::LDRDui: - case ARM64::LDRQui: - case ARM64::LDRXui: - case ARM64::LDRWui: + case AArch64::STRSui: + case AArch64::STRDui: + case AArch64::STRQui: + case AArch64::STRXui: + case AArch64::STRWui: + case AArch64::LDRSui: + case AArch64::LDRDui: + case AArch64::LDRQui: + case AArch64::LDRXui: + case AArch64::LDRWui: // do the unscaled versions as well - case ARM64::STURSi: - case ARM64::STURDi: - case ARM64::STURQi: - case ARM64::STURWi: - case ARM64::STURXi: - case ARM64::LDURSi: - case ARM64::LDURDi: - case ARM64::LDURQi: - case ARM64::LDURWi: - case ARM64::LDURXi: { + case AArch64::STURSi: + case AArch64::STURDi: + case AArch64::STURQi: + case AArch64::STURWi: + case AArch64::STURXi: + case AArch64::LDURSi: + case AArch64::LDURDi: + case AArch64::LDURQi: + case AArch64::LDURWi: + case AArch64::LDURXi: { // If this is a volatile load/store, don't mess with it. if (MI->hasOrderedMemoryRef()) { ++MBBI; @@ -794,7 +792,7 @@ bool ARM64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { break; } // Check if this load/store has a hint to avoid pair formation. - // MachineMemOperands hints are set by the ARM64StorePairSuppress pass. + // MachineMemOperands hints are set by the AArch64StorePairSuppress pass. if (TII->isLdStPairSuppressed(MI)) { ++MBBI; break; @@ -833,27 +831,27 @@ bool ARM64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { // Just move on to the next instruction. ++MBBI; break; - case ARM64::STRSui: - case ARM64::STRDui: - case ARM64::STRQui: - case ARM64::STRXui: - case ARM64::STRWui: - case ARM64::LDRSui: - case ARM64::LDRDui: - case ARM64::LDRQui: - case ARM64::LDRXui: - case ARM64::LDRWui: + case AArch64::STRSui: + case AArch64::STRDui: + case AArch64::STRQui: + case AArch64::STRXui: + case AArch64::STRWui: + case AArch64::LDRSui: + case AArch64::LDRDui: + case AArch64::LDRQui: + case AArch64::LDRXui: + case AArch64::LDRWui: // do the unscaled versions as well - case ARM64::STURSi: - case ARM64::STURDi: - case ARM64::STURQi: - case ARM64::STURWi: - case ARM64::STURXi: - case ARM64::LDURSi: - case ARM64::LDURDi: - case ARM64::LDURQi: - case ARM64::LDURWi: - case ARM64::LDURXi: { + case AArch64::STURSi: + case AArch64::STURDi: + case AArch64::STURQi: + case AArch64::STURWi: + case AArch64::STURXi: + case AArch64::LDURSi: + case AArch64::LDURDi: + case AArch64::LDURQi: + case AArch64::LDURWi: + case AArch64::LDURXi: { // Make sure this is a reg+imm (as opposed to an address reloc). if (!MI->getOperand(2).isImm()) { ++MBBI; @@ -922,9 +920,9 @@ bool ARM64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { return Modified; } -bool ARM64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { +bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { const TargetMachine &TM = Fn.getTarget(); - TII = static_cast(TM.getInstrInfo()); + TII = static_cast(TM.getInstrInfo()); TRI = TM.getRegisterInfo(); bool Modified = false; @@ -939,6 +937,6 @@ bool ARM64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { /// createARMLoadStoreOptimizationPass - returns an instance of the load / store /// optimization pass. -FunctionPass *llvm::createARM64LoadStoreOptimizationPass() { - return new ARM64LoadStoreOpt(); +FunctionPass *llvm::createAArch64LoadStoreOptimizationPass() { + return new AArch64LoadStoreOpt(); } diff --git a/lib/Target/ARM64/ARM64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp similarity index 58% rename from lib/Target/ARM64/ARM64MCInstLower.cpp rename to lib/Target/AArch64/AArch64MCInstLower.cpp index 525f484ca4c5..ab6d37532a70 100644 --- a/lib/Target/ARM64/ARM64MCInstLower.cpp +++ b/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -1,4 +1,4 @@ -//===-- ARM64MCInstLower.cpp - Convert ARM64 MachineInstr to an MCInst---===// +//==-- AArch64MCInstLower.cpp - Convert AArch64 MachineInstr to an MCInst --==// // // The LLVM Compiler Infrastructure // @@ -7,14 +7,14 @@ // //===----------------------------------------------------------------------===// // -// This file contains code to lower ARM64 MachineInstrs to their corresponding +// This file contains code to lower AArch64 MachineInstrs to their corresponding // MCInst records. // //===----------------------------------------------------------------------===// -#include "ARM64MCInstLower.h" -#include "MCTargetDesc/ARM64MCExpr.h" -#include "Utils/ARM64BaseInfo.h" +#include "AArch64MCInstLower.h" +#include "MCTargetDesc/AArch64MCExpr.h" +#include "Utils/AArch64BaseInfo.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" @@ -25,46 +25,46 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; -ARM64MCInstLower::ARM64MCInstLower(MCContext &ctx, Mangler &mang, - AsmPrinter &printer) +AArch64MCInstLower::AArch64MCInstLower(MCContext &ctx, Mangler &mang, + AsmPrinter &printer) : Ctx(ctx), Printer(printer), TargetTriple(printer.getTargetTriple()) {} MCSymbol * -ARM64MCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const { +AArch64MCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const { return Printer.getSymbol(MO.getGlobal()); } MCSymbol * -ARM64MCInstLower::GetExternalSymbolSymbol(const MachineOperand &MO) const { +AArch64MCInstLower::GetExternalSymbolSymbol(const MachineOperand &MO) const { return Printer.GetExternalSymbolSymbol(MO.getSymbolName()); } -MCOperand ARM64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO, - MCSymbol *Sym) const { +MCOperand AArch64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO, + MCSymbol *Sym) const { // FIXME: We would like an efficient form for this, so we don't have to do a // lot of extra uniquing. MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; - if ((MO.getTargetFlags() & ARM64II::MO_GOT) != 0) { - if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE) + if ((MO.getTargetFlags() & AArch64II::MO_GOT) != 0) { + if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE) RefKind = MCSymbolRefExpr::VK_GOTPAGE; - else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == - ARM64II::MO_PAGEOFF) + else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == + AArch64II::MO_PAGEOFF) RefKind = MCSymbolRefExpr::VK_GOTPAGEOFF; else assert(0 && "Unexpected target flags with MO_GOT on GV operand"); - } else if ((MO.getTargetFlags() & ARM64II::MO_TLS) != 0) { - if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE) + } else if ((MO.getTargetFlags() & AArch64II::MO_TLS) != 0) { + if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE) RefKind = MCSymbolRefExpr::VK_TLVPPAGE; - else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == - ARM64II::MO_PAGEOFF) + else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == + AArch64II::MO_PAGEOFF) RefKind = MCSymbolRefExpr::VK_TLVPPAGEOFF; else llvm_unreachable("Unexpected target flags with MO_TLS on GV operand"); } else { - if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE) + if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE) RefKind = MCSymbolRefExpr::VK_PAGE; - else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == - ARM64II::MO_PAGEOFF) + else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == + AArch64II::MO_PAGEOFF) RefKind = MCSymbolRefExpr::VK_PAGEOFF; } const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx); @@ -74,13 +74,13 @@ MCOperand ARM64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO, return MCOperand::CreateExpr(Expr); } -MCOperand ARM64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO, - MCSymbol *Sym) const { +MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO, + MCSymbol *Sym) const { uint32_t RefFlags = 0; - if (MO.getTargetFlags() & ARM64II::MO_GOT) - RefFlags |= ARM64MCExpr::VK_GOT; - else if (MO.getTargetFlags() & ARM64II::MO_TLS) { + if (MO.getTargetFlags() & AArch64II::MO_GOT) + RefFlags |= AArch64MCExpr::VK_GOT; + else if (MO.getTargetFlags() & AArch64II::MO_TLS) { TLSModel::Model Model; if (MO.isGlobal()) { const GlobalValue *GV = MO.getGlobal(); @@ -93,39 +93,40 @@ MCOperand ARM64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO, } switch (Model) { case TLSModel::InitialExec: - RefFlags |= ARM64MCExpr::VK_GOTTPREL; + RefFlags |= AArch64MCExpr::VK_GOTTPREL; break; case TLSModel::LocalExec: - RefFlags |= ARM64MCExpr::VK_TPREL; + RefFlags |= AArch64MCExpr::VK_TPREL; break; case TLSModel::LocalDynamic: - RefFlags |= ARM64MCExpr::VK_DTPREL; + RefFlags |= AArch64MCExpr::VK_DTPREL; break; case TLSModel::GeneralDynamic: - RefFlags |= ARM64MCExpr::VK_TLSDESC; + RefFlags |= AArch64MCExpr::VK_TLSDESC; break; } } else { // No modifier means this is a generic reference, classified as absolute for // the cases where it matters (:abs_g0: etc). - RefFlags |= ARM64MCExpr::VK_ABS; + RefFlags |= AArch64MCExpr::VK_ABS; } - if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE) - RefFlags |= ARM64MCExpr::VK_PAGE; - else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGEOFF) - RefFlags |= ARM64MCExpr::VK_PAGEOFF; - else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G3) - RefFlags |= ARM64MCExpr::VK_G3; - else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G2) - RefFlags |= ARM64MCExpr::VK_G2; - else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G1) - RefFlags |= ARM64MCExpr::VK_G1; - else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G0) - RefFlags |= ARM64MCExpr::VK_G0; - - if (MO.getTargetFlags() & ARM64II::MO_NC) - RefFlags |= ARM64MCExpr::VK_NC; + if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE) + RefFlags |= AArch64MCExpr::VK_PAGE; + else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == + AArch64II::MO_PAGEOFF) + RefFlags |= AArch64MCExpr::VK_PAGEOFF; + else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G3) + RefFlags |= AArch64MCExpr::VK_G3; + else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G2) + RefFlags |= AArch64MCExpr::VK_G2; + else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G1) + RefFlags |= AArch64MCExpr::VK_G1; + else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G0) + RefFlags |= AArch64MCExpr::VK_G0; + + if (MO.getTargetFlags() & AArch64II::MO_NC) + RefFlags |= AArch64MCExpr::VK_NC; const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, Ctx); @@ -133,15 +134,15 @@ MCOperand ARM64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO, Expr = MCBinaryExpr::CreateAdd( Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx); - ARM64MCExpr::VariantKind RefKind; - RefKind = static_cast(RefFlags); - Expr = ARM64MCExpr::Create(Expr, RefKind, Ctx); + AArch64MCExpr::VariantKind RefKind; + RefKind = static_cast(RefFlags); + Expr = AArch64MCExpr::Create(Expr, RefKind, Ctx); return MCOperand::CreateExpr(Expr); } -MCOperand ARM64MCInstLower::LowerSymbolOperand(const MachineOperand &MO, - MCSymbol *Sym) const { +MCOperand AArch64MCInstLower::LowerSymbolOperand(const MachineOperand &MO, + MCSymbol *Sym) const { if (TargetTriple.isOSDarwin()) return lowerSymbolOperandDarwin(MO, Sym); @@ -149,8 +150,8 @@ MCOperand ARM64MCInstLower::LowerSymbolOperand(const MachineOperand &MO, return lowerSymbolOperandELF(MO, Sym); } -bool ARM64MCInstLower::lowerOperand(const MachineOperand &MO, - MCOperand &MCOp) const { +bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO, + MCOperand &MCOp) const { switch (MO.getType()) { default: assert(0 && "unknown operand type"); @@ -190,7 +191,7 @@ bool ARM64MCInstLower::lowerOperand(const MachineOperand &MO, return true; } -void ARM64MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { +void AArch64MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(MI->getOpcode()); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { diff --git a/lib/Target/ARM64/ARM64MCInstLower.h b/lib/Target/AArch64/AArch64MCInstLower.h similarity index 78% rename from lib/Target/ARM64/ARM64MCInstLower.h rename to lib/Target/AArch64/AArch64MCInstLower.h index 7e3a2c8e54f2..ba50ba9e2fe5 100644 --- a/lib/Target/ARM64/ARM64MCInstLower.h +++ b/lib/Target/AArch64/AArch64MCInstLower.h @@ -1,4 +1,4 @@ -//===-- ARM64MCInstLower.h - Lower MachineInstr to MCInst ----------------===// +//===-- AArch64MCInstLower.h - Lower MachineInstr to MCInst ---------------===// // // The LLVM Compiler Infrastructure // @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARM64_MCINSTLOWER_H -#define ARM64_MCINSTLOWER_H +#ifndef AArch64_MCINSTLOWER_H +#define AArch64_MCINSTLOWER_H #include "llvm/ADT/Triple.h" #include "llvm/Support/Compiler.h" @@ -25,15 +25,15 @@ class MachineModuleInfoMachO; class MachineOperand; class Mangler; -/// ARM64MCInstLower - This class is used to lower an MachineInstr +/// AArch64MCInstLower - This class is used to lower an MachineInstr /// into an MCInst. -class LLVM_LIBRARY_VISIBILITY ARM64MCInstLower { +class LLVM_LIBRARY_VISIBILITY AArch64MCInstLower { MCContext &Ctx; AsmPrinter &Printer; Triple TargetTriple; public: - ARM64MCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer); + AArch64MCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer); bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const; void Lower(const MachineInstr *MI, MCInst &OutMI) const; diff --git a/lib/Target/ARM64/ARM64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h similarity index 90% rename from lib/Target/ARM64/ARM64MachineFunctionInfo.h rename to lib/Target/AArch64/AArch64MachineFunctionInfo.h index 0b6f4f1ec646..7c257ba9116f 100644 --- a/lib/Target/ARM64/ARM64MachineFunctionInfo.h +++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -1,4 +1,4 @@ -//===- ARM64MachineFuctionInfo.h - ARM64 machine function info --*- C++ -*-===// +//=- AArch64MachineFuctionInfo.h - AArch64 machine function info --*- C++ -*-=// // // The LLVM Compiler Infrastructure // @@ -7,12 +7,12 @@ // //===----------------------------------------------------------------------===// // -// This file declares ARM64-specific per-machine-function information. +// This file declares AArch64-specific per-machine-function information. // //===----------------------------------------------------------------------===// -#ifndef ARM64MACHINEFUNCTIONINFO_H -#define ARM64MACHINEFUNCTIONINFO_H +#ifndef AArch64MACHINEFUNCTIONINFO_H +#define AArch64MACHINEFUNCTIONINFO_H #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -21,9 +21,9 @@ namespace llvm { -/// ARM64FunctionInfo - This class is derived from MachineFunctionInfo and -/// contains private ARM64-specific information for each MachineFunction. -class ARM64FunctionInfo : public MachineFunctionInfo { +/// AArch64FunctionInfo - This class is derived from MachineFunctionInfo and +/// contains private AArch64-specific information for each MachineFunction. +class AArch64FunctionInfo : public MachineFunctionInfo { /// Number of bytes of arguments this function has on the stack. If the callee /// is expected to restore the argument stack this should be a multiple of 16, @@ -73,12 +73,12 @@ class ARM64FunctionInfo : public MachineFunctionInfo { unsigned VarArgsFPRSize; public: - ARM64FunctionInfo() + AArch64FunctionInfo() : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false), NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0), VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0) {} - explicit ARM64FunctionInfo(MachineFunction &MF) + explicit AArch64FunctionInfo(MachineFunction &MF) : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false), NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0), VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0) { @@ -160,4 +160,4 @@ class ARM64FunctionInfo : public MachineFunctionInfo { }; } // End llvm namespace -#endif // ARM64MACHINEFUNCTIONINFO_H +#endif // AArch64MACHINEFUNCTIONINFO_H diff --git a/lib/Target/ARM64/ARM64PerfectShuffle.h b/lib/Target/AArch64/AArch64PerfectShuffle.h similarity index 99% rename from lib/Target/ARM64/ARM64PerfectShuffle.h rename to lib/Target/AArch64/AArch64PerfectShuffle.h index 6759236fd143..b22fa2424d5c 100644 --- a/lib/Target/ARM64/ARM64PerfectShuffle.h +++ b/lib/Target/AArch64/AArch64PerfectShuffle.h @@ -1,4 +1,4 @@ -//===-- ARM64PerfectShuffle.h - AdvSIMD Perfect Shuffle Table -------------===// +//===-- AArch64PerfectShuffle.h - AdvSIMD Perfect Shuffle Table -----------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM64/ARM64PromoteConstant.cpp b/lib/Target/AArch64/AArch64PromoteConstant.cpp similarity index 92% rename from lib/Target/ARM64/ARM64PromoteConstant.cpp rename to lib/Target/AArch64/AArch64PromoteConstant.cpp index e61a62262d39..4723cc4978e5 100644 --- a/lib/Target/ARM64/ARM64PromoteConstant.cpp +++ b/lib/Target/AArch64/AArch64PromoteConstant.cpp @@ -1,5 +1,4 @@ - -//===-- ARM64PromoteConstant.cpp --- Promote constant to global for ARM64 -===// +//=- AArch64PromoteConstant.cpp --- Promote constant to global for AArch64 -==// // // The LLVM Compiler Infrastructure // @@ -8,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file implements the ARM64PromoteConstant pass which promotes constants +// This file implements the AArch64PromoteConstant pass which promotes constants // to global variables when this is likely to be more efficient. Currently only // types related to constant vector (i.e., constant vector, array of constant // vectors, constant structure with a constant vector field, etc.) are promoted @@ -21,7 +20,7 @@ // FIXME: This pass may be useful for other targets too. //===----------------------------------------------------------------------===// -#include "ARM64.h" +#include "AArch64.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" @@ -41,17 +40,17 @@ using namespace llvm; -#define DEBUG_TYPE "arm64-promote-const" +#define DEBUG_TYPE "aarch64-promote-const" // Stress testing mode - disable heuristics. -static cl::opt Stress("arm64-stress-promote-const", cl::Hidden, +static cl::opt Stress("aarch64-stress-promote-const", cl::Hidden, cl::desc("Promote all vector constants")); STATISTIC(NumPromoted, "Number of promoted constants"); STATISTIC(NumPromotedUses, "Number of promoted constants uses"); //===----------------------------------------------------------------------===// -// ARM64PromoteConstant +// AArch64PromoteConstant //===----------------------------------------------------------------------===// namespace { @@ -81,13 +80,13 @@ namespace { /// /// Therefore the final assembly final has 4 different loads. With this pass /// enabled, only one load is issued for the constants. -class ARM64PromoteConstant : public ModulePass { +class AArch64PromoteConstant : public ModulePass { public: static char ID; - ARM64PromoteConstant() : ModulePass(ID) {} + AArch64PromoteConstant() : ModulePass(ID) {} - const char *getPassName() const override { return "ARM64 Promote Constant"; } + const char *getPassName() const override { return "AArch64 Promote Constant"; } /// Iterate over the functions and promote the interesting constants into /// global variables with module scope. @@ -202,20 +201,20 @@ class ARM64PromoteConstant : public ModulePass { }; } // end anonymous namespace -char ARM64PromoteConstant::ID = 0; +char AArch64PromoteConstant::ID = 0; namespace llvm { -void initializeARM64PromoteConstantPass(PassRegistry &); +void initializeAArch64PromoteConstantPass(PassRegistry &); } -INITIALIZE_PASS_BEGIN(ARM64PromoteConstant, "arm64-promote-const", - "ARM64 Promote Constant Pass", false, false) +INITIALIZE_PASS_BEGIN(AArch64PromoteConstant, "aarch64-promote-const", + "AArch64 Promote Constant Pass", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(ARM64PromoteConstant, "arm64-promote-const", - "ARM64 Promote Constant Pass", false, false) +INITIALIZE_PASS_END(AArch64PromoteConstant, "aarch64-promote-const", + "AArch64 Promote Constant Pass", false, false) -ModulePass *llvm::createARM64PromoteConstantPass() { - return new ARM64PromoteConstant(); +ModulePass *llvm::createAArch64PromoteConstantPass() { + return new AArch64PromoteConstant(); } /// Check if the given type uses a vector type. @@ -330,7 +329,7 @@ static bool shouldConvert(const Constant *Cst) { } Instruction * -ARM64PromoteConstant::findInsertionPoint(Value::user_iterator &Use) { +AArch64PromoteConstant::findInsertionPoint(Value::user_iterator &Use) { // If this user is a phi, the insertion point is in the related // incoming basic block. PHINode *PhiInst = dyn_cast(*Use); @@ -344,9 +343,9 @@ ARM64PromoteConstant::findInsertionPoint(Value::user_iterator &Use) { return InsertionPoint; } -bool ARM64PromoteConstant::isDominated(Instruction *NewPt, - Value::user_iterator &UseIt, - InsertionPoints &InsertPts) { +bool AArch64PromoteConstant::isDominated(Instruction *NewPt, + Value::user_iterator &UseIt, + InsertionPoints &InsertPts) { DominatorTree &DT = getAnalysis( *NewPt->getParent()->getParent()).getDomTree(); @@ -371,9 +370,9 @@ bool ARM64PromoteConstant::isDominated(Instruction *NewPt, return false; } -bool ARM64PromoteConstant::tryAndMerge(Instruction *NewPt, - Value::user_iterator &UseIt, - InsertionPoints &InsertPts) { +bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt, + Value::user_iterator &UseIt, + InsertionPoints &InsertPts) { DominatorTree &DT = getAnalysis( *NewPt->getParent()->getParent()).getDomTree(); BasicBlock *NewBB = NewPt->getParent(); @@ -422,7 +421,7 @@ bool ARM64PromoteConstant::tryAndMerge(Instruction *NewPt, return false; } -void ARM64PromoteConstant::computeInsertionPoints( +void AArch64PromoteConstant::computeInsertionPoints( Constant *Val, InsertionPointsPerFunc &InsPtsPerFunc) { DEBUG(dbgs() << "** Compute insertion points **\n"); for (Value::user_iterator UseIt = Val->user_begin(), @@ -464,9 +463,8 @@ void ARM64PromoteConstant::computeInsertionPoints( } } -bool -ARM64PromoteConstant::insertDefinitions(Constant *Cst, - InsertionPointsPerFunc &InsPtsPerFunc) { +bool AArch64PromoteConstant::insertDefinitions( + Constant *Cst, InsertionPointsPerFunc &InsPtsPerFunc) { // We will create one global variable per Module. DenseMap ModuleToMergedGV; bool HasChanged = false; @@ -533,13 +531,13 @@ ARM64PromoteConstant::insertDefinitions(Constant *Cst, return HasChanged; } -bool ARM64PromoteConstant::computeAndInsertDefinitions(Constant *Val) { +bool AArch64PromoteConstant::computeAndInsertDefinitions(Constant *Val) { InsertionPointsPerFunc InsertPtsPerFunc; computeInsertionPoints(Val, InsertPtsPerFunc); return insertDefinitions(Val, InsertPtsPerFunc); } -bool ARM64PromoteConstant::promoteConstant(Constant *Cst) { +bool AArch64PromoteConstant::promoteConstant(Constant *Cst) { assert(Cst && "Given variable is not a valid constant."); if (!shouldConvert(Cst)) @@ -553,7 +551,7 @@ bool ARM64PromoteConstant::promoteConstant(Constant *Cst) { return computeAndInsertDefinitions(Cst); } -bool ARM64PromoteConstant::runOnFunction(Function &F) { +bool AArch64PromoteConstant::runOnFunction(Function &F) { // Look for instructions using constant vector. Promote that constant to a // global variable. Create as few loads of this variable as possible and // update the uses accordingly. diff --git a/lib/Target/ARM64/ARM64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp similarity index 66% rename from lib/Target/ARM64/ARM64RegisterInfo.cpp rename to lib/Target/AArch64/AArch64RegisterInfo.cpp index d3c647bd90b2..48a361d50e55 100644 --- a/lib/Target/ARM64/ARM64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- ARM64RegisterInfo.cpp - ARM64 Register Information -----------------===// +//===- AArch64RegisterInfo.cpp - AArch64 Register Information -------------===// // // The LLVM Compiler Infrastructure // @@ -7,15 +7,16 @@ // //===----------------------------------------------------------------------===// // -// This file contains the ARM64 implementation of the TargetRegisterInfo class. +// This file contains the AArch64 implementation of the TargetRegisterInfo +// class. // //===----------------------------------------------------------------------===// -#include "ARM64RegisterInfo.h" -#include "ARM64FrameLowering.h" -#include "ARM64InstrInfo.h" -#include "ARM64Subtarget.h" -#include "MCTargetDesc/ARM64AddressingModes.h" +#include "AArch64RegisterInfo.h" +#include "AArch64FrameLowering.h" +#include "AArch64InstrInfo.h" +#include "AArch64Subtarget.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -30,39 +31,39 @@ using namespace llvm; #define GET_REGINFO_TARGET_DESC -#include "ARM64GenRegisterInfo.inc" +#include "AArch64GenRegisterInfo.inc" -ARM64RegisterInfo::ARM64RegisterInfo(const ARM64InstrInfo *tii, - const ARM64Subtarget *sti) - : ARM64GenRegisterInfo(ARM64::LR), TII(tii), STI(sti) {} +AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo *tii, + const AArch64Subtarget *sti) + : AArch64GenRegisterInfo(AArch64::LR), TII(tii), STI(sti) {} const MCPhysReg * -ARM64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { +AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { assert(MF && "Invalid MachineFunction pointer."); if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) - return CSR_ARM64_AllRegs_SaveList; + return CSR_AArch64_AllRegs_SaveList; else - return CSR_ARM64_AAPCS_SaveList; + return CSR_AArch64_AAPCS_SaveList; } const uint32_t * -ARM64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { +AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { if (CC == CallingConv::AnyReg) - return CSR_ARM64_AllRegs_RegMask; + return CSR_AArch64_AllRegs_RegMask; else - return CSR_ARM64_AAPCS_RegMask; + return CSR_AArch64_AAPCS_RegMask; } -const uint32_t *ARM64RegisterInfo::getTLSCallPreservedMask() const { +const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const { if (STI->isTargetDarwin()) - return CSR_ARM64_TLS_Darwin_RegMask; + return CSR_AArch64_TLS_Darwin_RegMask; assert(STI->isTargetELF() && "only expect Darwin or ELF TLS"); - return CSR_ARM64_TLS_ELF_RegMask; + return CSR_AArch64_TLS_ELF_RegMask; } const uint32_t * -ARM64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const { +AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const { // This should return a register mask that is the same as that returned by // getCallPreservedMask but that additionally preserves the register used for // the first i64 argument (which must also be the register used to return a @@ -70,57 +71,58 @@ ARM64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const { // // In case that the calling convention does not use the same register for // both, the function should return NULL (does not currently apply) - return CSR_ARM64_AAPCS_ThisReturn_RegMask; + return CSR_AArch64_AAPCS_ThisReturn_RegMask; } -BitVector ARM64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { +BitVector +AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); // FIXME: avoid re-calculating this every time. BitVector Reserved(getNumRegs()); - Reserved.set(ARM64::SP); - Reserved.set(ARM64::XZR); - Reserved.set(ARM64::WSP); - Reserved.set(ARM64::WZR); + Reserved.set(AArch64::SP); + Reserved.set(AArch64::XZR); + Reserved.set(AArch64::WSP); + Reserved.set(AArch64::WZR); if (TFI->hasFP(MF) || STI->isTargetDarwin()) { - Reserved.set(ARM64::FP); - Reserved.set(ARM64::W29); + Reserved.set(AArch64::FP); + Reserved.set(AArch64::W29); } if (STI->isTargetDarwin()) { - Reserved.set(ARM64::X18); // Platform register - Reserved.set(ARM64::W18); + Reserved.set(AArch64::X18); // Platform register + Reserved.set(AArch64::W18); } if (hasBasePointer(MF)) { - Reserved.set(ARM64::X19); - Reserved.set(ARM64::W19); + Reserved.set(AArch64::X19); + Reserved.set(AArch64::W19); } return Reserved; } -bool ARM64RegisterInfo::isReservedReg(const MachineFunction &MF, +bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); switch (Reg) { default: break; - case ARM64::SP: - case ARM64::XZR: - case ARM64::WSP: - case ARM64::WZR: + case AArch64::SP: + case AArch64::XZR: + case AArch64::WSP: + case AArch64::WZR: return true; - case ARM64::X18: - case ARM64::W18: + case AArch64::X18: + case AArch64::W18: return STI->isTargetDarwin(); - case ARM64::FP: - case ARM64::W29: + case AArch64::FP: + case AArch64::W29: return TFI->hasFP(MF) || STI->isTargetDarwin(); - case ARM64::W19: - case ARM64::X19: + case AArch64::W19: + case AArch64::X19: return hasBasePointer(MF); } @@ -128,21 +130,21 @@ bool ARM64RegisterInfo::isReservedReg(const MachineFunction &MF, } const TargetRegisterClass * -ARM64RegisterInfo::getPointerRegClass(const MachineFunction &MF, +AArch64RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const { - return &ARM64::GPR64RegClass; + return &AArch64::GPR64RegClass; } const TargetRegisterClass * -ARM64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { - if (RC == &ARM64::CCRRegClass) +AArch64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { + if (RC == &AArch64::CCRRegClass) return nullptr; // Can't copy NZCV. return RC; } -unsigned ARM64RegisterInfo::getBaseRegister() const { return ARM64::X19; } +unsigned AArch64RegisterInfo::getBaseRegister() const { return AArch64::X19; } -bool ARM64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { +bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); // In the presence of variable sized objects, if the fixed stack size is @@ -165,37 +167,39 @@ bool ARM64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { return false; } -unsigned ARM64RegisterInfo::getFrameRegister(const MachineFunction &MF) const { +unsigned +AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - return TFI->hasFP(MF) ? ARM64::FP : ARM64::SP; + return TFI->hasFP(MF) ? AArch64::FP : AArch64::SP; } -bool -ARM64RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { +bool AArch64RegisterInfo::requiresRegisterScavenging( + const MachineFunction &MF) const { return true; } -bool ARM64RegisterInfo::requiresVirtualBaseRegisters(const MachineFunction &MF) - const { +bool AArch64RegisterInfo::requiresVirtualBaseRegisters( + const MachineFunction &MF) const { return true; } bool -ARM64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { +AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - // ARM64FrameLowering::resolveFrameIndexReference() can always fall back + // AArch64FrameLowering::resolveFrameIndexReference() can always fall back // to the stack pointer, so only put the emergency spill slot next to the // FP when there's no better way to access it (SP or base pointer). return MFI->hasVarSizedObjects() && !hasBasePointer(MF); } -bool ARM64RegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) - const { +bool AArch64RegisterInfo::requiresFrameIndexScavenging( + const MachineFunction &MF) const { return true; } -bool ARM64RegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const { +bool +AArch64RegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); // Only consider eliminating leaf frames. if (MFI->hasCalls() || (MF.getTarget().Options.DisableFramePointerElim(MF) && @@ -208,8 +212,8 @@ bool ARM64RegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const { /// reference would be better served by a base register other than FP /// or SP. Used by LocalStackFrameAllocation to determine which frame index /// references it should create new base registers for. -bool ARM64RegisterInfo::needsFrameBaseReg(MachineInstr *MI, - int64_t Offset) const { +bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI, + int64_t Offset) const { for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i) assert(i < MI->getNumOperands() && "Instr doesn't have FrameIndex operand!"); @@ -268,30 +272,30 @@ bool ARM64RegisterInfo::needsFrameBaseReg(MachineInstr *MI, return true; } -bool ARM64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, - int64_t Offset) const { +bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, + int64_t Offset) const { assert(Offset <= INT_MAX && "Offset too big to fit in int."); assert(MI && "Unable to get the legal offset for nil instruction."); int SaveOffset = Offset; - return isARM64FrameOffsetLegal(*MI, SaveOffset) & ARM64FrameOffsetIsLegal; + return isAArch64FrameOffsetLegal(*MI, SaveOffset) & AArch64FrameOffsetIsLegal; } /// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx /// at the beginning of the basic block. -void ARM64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, - unsigned BaseReg, - int FrameIdx, - int64_t Offset) const { +void AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, + unsigned BaseReg, + int FrameIdx, + int64_t Offset) const { MachineBasicBlock::iterator Ins = MBB->begin(); DebugLoc DL; // Defaults to "unknown" if (Ins != MBB->end()) DL = Ins->getDebugLoc(); - const MCInstrDesc &MCID = TII->get(ARM64::ADDXri); + const MCInstrDesc &MCID = TII->get(AArch64::ADDXri); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); const MachineFunction &MF = *MBB->getParent(); MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF)); - unsigned Shifter = ARM64_AM::getShifterImm(ARM64_AM::LSL, 0); + unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); BuildMI(*MBB, Ins, DL, MCID, BaseReg) .addFrameIndex(FrameIdx) @@ -299,8 +303,8 @@ void ARM64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, .addImm(Shifter); } -void ARM64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, - int64_t Offset) const { +void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, + int64_t Offset) const { int Off = Offset; // ARM doesn't need the general 64-bit offsets unsigned i = 0; @@ -308,20 +312,20 @@ void ARM64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, ++i; assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); } - bool Done = rewriteARM64FrameIndex(MI, i, BaseReg, Off, TII); + bool Done = rewriteAArch64FrameIndex(MI, i, BaseReg, Off, TII); assert(Done && "Unable to resolve frame index!"); (void)Done; } -void ARM64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, unsigned FIOperandNum, - RegScavenger *RS) const { +void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); - const ARM64FrameLowering *TFI = static_cast( + const AArch64FrameLowering *TFI = static_cast( MF.getTarget().getFrameLowering()); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); @@ -341,7 +345,7 @@ void ARM64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Modify MI as necessary to handle as much of 'Offset' as possible Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg); - if (rewriteARM64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII)) + if (rewriteAArch64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII)) return; assert((!RS || !RS->isScavengingFrameIndex(FrameIndex)) && @@ -351,48 +355,48 @@ void ARM64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // as much as possible above. Handle the rest, providing a register that is // SP+LargeImm. unsigned ScratchReg = - MF.getRegInfo().createVirtualRegister(&ARM64::GPR64RegClass); + MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII); MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true); } namespace llvm { -unsigned ARM64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const { +unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); switch (RC->getID()) { default: return 0; - case ARM64::GPR32RegClassID: - case ARM64::GPR32spRegClassID: - case ARM64::GPR32allRegClassID: - case ARM64::GPR64spRegClassID: - case ARM64::GPR64allRegClassID: - case ARM64::GPR64RegClassID: - case ARM64::GPR32commonRegClassID: - case ARM64::GPR64commonRegClassID: + case AArch64::GPR32RegClassID: + case AArch64::GPR32spRegClassID: + case AArch64::GPR32allRegClassID: + case AArch64::GPR64spRegClassID: + case AArch64::GPR64allRegClassID: + case AArch64::GPR64RegClassID: + case AArch64::GPR32commonRegClassID: + case AArch64::GPR64commonRegClassID: return 32 - 1 // XZR/SP - (TFI->hasFP(MF) || STI->isTargetDarwin()) // FP - STI->isTargetDarwin() // X18 reserved as platform register - hasBasePointer(MF); // X19 - case ARM64::FPR8RegClassID: - case ARM64::FPR16RegClassID: - case ARM64::FPR32RegClassID: - case ARM64::FPR64RegClassID: - case ARM64::FPR128RegClassID: + case AArch64::FPR8RegClassID: + case AArch64::FPR16RegClassID: + case AArch64::FPR32RegClassID: + case AArch64::FPR64RegClassID: + case AArch64::FPR128RegClassID: return 32; - case ARM64::DDRegClassID: - case ARM64::DDDRegClassID: - case ARM64::DDDDRegClassID: - case ARM64::QQRegClassID: - case ARM64::QQQRegClassID: - case ARM64::QQQQRegClassID: + case AArch64::DDRegClassID: + case AArch64::DDDRegClassID: + case AArch64::DDDDRegClassID: + case AArch64::QQRegClassID: + case AArch64::QQQRegClassID: + case AArch64::QQQQRegClassID: return 32; - case ARM64::FPR128_loRegClassID: + case AArch64::FPR128_loRegClassID: return 16; } } diff --git a/lib/Target/ARM64/ARM64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h similarity index 85% rename from lib/Target/ARM64/ARM64RegisterInfo.h rename to lib/Target/AArch64/AArch64RegisterInfo.h index 7691fadbcc8a..76af1edce723 100644 --- a/lib/Target/ARM64/ARM64RegisterInfo.h +++ b/lib/Target/AArch64/AArch64RegisterInfo.h @@ -1,4 +1,4 @@ -//===- ARM64RegisterInfo.h - ARM64 Register Information Impl ----*- C++ -*-===// +//==- AArch64RegisterInfo.h - AArch64 Register Information Impl --*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -7,31 +7,31 @@ // //===----------------------------------------------------------------------===// // -// This file contains the ARM64 implementation of the MRegisterInfo class. +// This file contains the AArch64 implementation of the MRegisterInfo class. // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_ARM64REGISTERINFO_H -#define LLVM_TARGET_ARM64REGISTERINFO_H +#ifndef LLVM_TARGET_AArch64REGISTERINFO_H +#define LLVM_TARGET_AArch64REGISTERINFO_H #define GET_REGINFO_HEADER -#include "ARM64GenRegisterInfo.inc" +#include "AArch64GenRegisterInfo.inc" namespace llvm { -class ARM64InstrInfo; -class ARM64Subtarget; +class AArch64InstrInfo; +class AArch64Subtarget; class MachineFunction; class RegScavenger; class TargetRegisterClass; -struct ARM64RegisterInfo : public ARM64GenRegisterInfo { +struct AArch64RegisterInfo : public AArch64GenRegisterInfo { private: - const ARM64InstrInfo *TII; - const ARM64Subtarget *STI; + const AArch64InstrInfo *TII; + const AArch64Subtarget *STI; public: - ARM64RegisterInfo(const ARM64InstrInfo *tii, const ARM64Subtarget *sti); + AArch64RegisterInfo(const AArch64InstrInfo *tii, const AArch64Subtarget *sti); bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; @@ -98,4 +98,4 @@ struct ARM64RegisterInfo : public ARM64GenRegisterInfo { } // end namespace llvm -#endif // LLVM_TARGET_ARM64REGISTERINFO_H +#endif // LLVM_TARGET_AArch64REGISTERINFO_H diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td new file mode 100644 index 000000000000..21c927f2385b --- /dev/null +++ b/lib/Target/AArch64/AArch64RegisterInfo.td @@ -0,0 +1,593 @@ +//=- AArch64RegisterInfo.td - Describe the AArch64 Regisers --*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + + +class AArch64Reg enc, string n, list subregs = [], + list altNames = []> + : Register { + let HWEncoding = enc; + let Namespace = "AArch64"; + let SubRegs = subregs; +} + +let Namespace = "AArch64" in { + def sub_32 : SubRegIndex<32>; + + def bsub : SubRegIndex<8>; + def hsub : SubRegIndex<16>; + def ssub : SubRegIndex<32>; + def dsub : SubRegIndex<32>; + def qhisub : SubRegIndex<64>; + def qsub : SubRegIndex<64>; + // Note: Code depends on these having consecutive numbers + def dsub0 : SubRegIndex<64>; + def dsub1 : SubRegIndex<64>; + def dsub2 : SubRegIndex<64>; + def dsub3 : SubRegIndex<64>; + // Note: Code depends on these having consecutive numbers + def qsub0 : SubRegIndex<128>; + def qsub1 : SubRegIndex<128>; + def qsub2 : SubRegIndex<128>; + def qsub3 : SubRegIndex<128>; +} + +let Namespace = "AArch64" in { + def vreg : RegAltNameIndex; + def vlist1 : RegAltNameIndex; +} + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// +def W0 : AArch64Reg<0, "w0" >, DwarfRegNum<[0]>; +def W1 : AArch64Reg<1, "w1" >, DwarfRegNum<[1]>; +def W2 : AArch64Reg<2, "w2" >, DwarfRegNum<[2]>; +def W3 : AArch64Reg<3, "w3" >, DwarfRegNum<[3]>; +def W4 : AArch64Reg<4, "w4" >, DwarfRegNum<[4]>; +def W5 : AArch64Reg<5, "w5" >, DwarfRegNum<[5]>; +def W6 : AArch64Reg<6, "w6" >, DwarfRegNum<[6]>; +def W7 : AArch64Reg<7, "w7" >, DwarfRegNum<[7]>; +def W8 : AArch64Reg<8, "w8" >, DwarfRegNum<[8]>; +def W9 : AArch64Reg<9, "w9" >, DwarfRegNum<[9]>; +def W10 : AArch64Reg<10, "w10">, DwarfRegNum<[10]>; +def W11 : AArch64Reg<11, "w11">, DwarfRegNum<[11]>; +def W12 : AArch64Reg<12, "w12">, DwarfRegNum<[12]>; +def W13 : AArch64Reg<13, "w13">, DwarfRegNum<[13]>; +def W14 : AArch64Reg<14, "w14">, DwarfRegNum<[14]>; +def W15 : AArch64Reg<15, "w15">, DwarfRegNum<[15]>; +def W16 : AArch64Reg<16, "w16">, DwarfRegNum<[16]>; +def W17 : AArch64Reg<17, "w17">, DwarfRegNum<[17]>; +def W18 : AArch64Reg<18, "w18">, DwarfRegNum<[18]>; +def W19 : AArch64Reg<19, "w19">, DwarfRegNum<[19]>; +def W20 : AArch64Reg<20, "w20">, DwarfRegNum<[20]>; +def W21 : AArch64Reg<21, "w21">, DwarfRegNum<[21]>; +def W22 : AArch64Reg<22, "w22">, DwarfRegNum<[22]>; +def W23 : AArch64Reg<23, "w23">, DwarfRegNum<[23]>; +def W24 : AArch64Reg<24, "w24">, DwarfRegNum<[24]>; +def W25 : AArch64Reg<25, "w25">, DwarfRegNum<[25]>; +def W26 : AArch64Reg<26, "w26">, DwarfRegNum<[26]>; +def W27 : AArch64Reg<27, "w27">, DwarfRegNum<[27]>; +def W28 : AArch64Reg<28, "w28">, DwarfRegNum<[28]>; +def W29 : AArch64Reg<29, "w29">, DwarfRegNum<[29]>; +def W30 : AArch64Reg<30, "w30">, DwarfRegNum<[30]>; +def WSP : AArch64Reg<31, "wsp">, DwarfRegNum<[31]>; +def WZR : AArch64Reg<31, "wzr">, DwarfRegAlias; + +let SubRegIndices = [sub_32] in { +def X0 : AArch64Reg<0, "x0", [W0]>, DwarfRegAlias; +def X1 : AArch64Reg<1, "x1", [W1]>, DwarfRegAlias; +def X2 : AArch64Reg<2, "x2", [W2]>, DwarfRegAlias; +def X3 : AArch64Reg<3, "x3", [W3]>, DwarfRegAlias; +def X4 : AArch64Reg<4, "x4", [W4]>, DwarfRegAlias; +def X5 : AArch64Reg<5, "x5", [W5]>, DwarfRegAlias; +def X6 : AArch64Reg<6, "x6", [W6]>, DwarfRegAlias; +def X7 : AArch64Reg<7, "x7", [W7]>, DwarfRegAlias; +def X8 : AArch64Reg<8, "x8", [W8]>, DwarfRegAlias; +def X9 : AArch64Reg<9, "x9", [W9]>, DwarfRegAlias; +def X10 : AArch64Reg<10, "x10", [W10]>, DwarfRegAlias; +def X11 : AArch64Reg<11, "x11", [W11]>, DwarfRegAlias; +def X12 : AArch64Reg<12, "x12", [W12]>, DwarfRegAlias; +def X13 : AArch64Reg<13, "x13", [W13]>, DwarfRegAlias; +def X14 : AArch64Reg<14, "x14", [W14]>, DwarfRegAlias; +def X15 : AArch64Reg<15, "x15", [W15]>, DwarfRegAlias; +def X16 : AArch64Reg<16, "x16", [W16]>, DwarfRegAlias; +def X17 : AArch64Reg<17, "x17", [W17]>, DwarfRegAlias; +def X18 : AArch64Reg<18, "x18", [W18]>, DwarfRegAlias; +def X19 : AArch64Reg<19, "x19", [W19]>, DwarfRegAlias; +def X20 : AArch64Reg<20, "x20", [W20]>, DwarfRegAlias; +def X21 : AArch64Reg<21, "x21", [W21]>, DwarfRegAlias; +def X22 : AArch64Reg<22, "x22", [W22]>, DwarfRegAlias; +def X23 : AArch64Reg<23, "x23", [W23]>, DwarfRegAlias; +def X24 : AArch64Reg<24, "x24", [W24]>, DwarfRegAlias; +def X25 : AArch64Reg<25, "x25", [W25]>, DwarfRegAlias; +def X26 : AArch64Reg<26, "x26", [W26]>, DwarfRegAlias; +def X27 : AArch64Reg<27, "x27", [W27]>, DwarfRegAlias; +def X28 : AArch64Reg<28, "x28", [W28]>, DwarfRegAlias; +def FP : AArch64Reg<29, "x29", [W29]>, DwarfRegAlias; +def LR : AArch64Reg<30, "x30", [W30]>, DwarfRegAlias; +def SP : AArch64Reg<31, "sp", [WSP]>, DwarfRegAlias; +def XZR : AArch64Reg<31, "xzr", [WZR]>, DwarfRegAlias; +} + +// Condition code register. +def NZCV : AArch64Reg<0, "nzcv">; + +// GPR register classes with the intersections of GPR32/GPR32sp and +// GPR64/GPR64sp for use by the coalescer. +def GPR32common : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 0, 30)> { + let AltOrders = [(rotl GPR32common, 8)]; + let AltOrderSelect = [{ return 1; }]; +} +def GPR64common : RegisterClass<"AArch64", [i64], 64, + (add (sequence "X%u", 0, 28), FP, LR)> { + let AltOrders = [(rotl GPR64common, 8)]; + let AltOrderSelect = [{ return 1; }]; +} +// GPR register classes which exclude SP/WSP. +def GPR32 : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WZR)> { + let AltOrders = [(rotl GPR32, 8)]; + let AltOrderSelect = [{ return 1; }]; +} +def GPR64 : RegisterClass<"AArch64", [i64], 64, (add GPR64common, XZR)> { + let AltOrders = [(rotl GPR64, 8)]; + let AltOrderSelect = [{ return 1; }]; +} + +// GPR register classes which include SP/WSP. +def GPR32sp : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WSP)> { + let AltOrders = [(rotl GPR32sp, 8)]; + let AltOrderSelect = [{ return 1; }]; +} +def GPR64sp : RegisterClass<"AArch64", [i64], 64, (add GPR64common, SP)> { + let AltOrders = [(rotl GPR64sp, 8)]; + let AltOrderSelect = [{ return 1; }]; +} + +def GPR32sponly : RegisterClass<"AArch64", [i32], 32, (add WSP)>; +def GPR64sponly : RegisterClass<"AArch64", [i64], 64, (add SP)>; + +def GPR64spPlus0Operand : AsmOperandClass { + let Name = "GPR64sp0"; + let RenderMethod = "addRegOperands"; + let ParserMethod = "tryParseGPR64sp0Operand"; +} + +def GPR64sp0 : RegisterOperand { + let ParserMatchClass = GPR64spPlus0Operand; +} + +// GPR register classes which include WZR/XZR AND SP/WSP. This is not a +// constraint used by any instructions, it is used as a common super-class. +def GPR32all : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WZR, WSP)>; +def GPR64all : RegisterClass<"AArch64", [i64], 64, (add GPR64common, XZR, SP)>; + +// For tail calls, we can't use callee-saved registers, as they are restored +// to the saved value before the tail call, which would clobber a call address. +// This is for indirect tail calls to store the address of the destination. +def tcGPR64 : RegisterClass<"AArch64", [i64], 64, (sub GPR64common, X19, X20, X21, + X22, X23, X24, X25, X26, + X27, X28)>; + +// GPR register classes for post increment amount of vector load/store that +// has alternate printing when Rm=31 and prints a constant immediate value +// equal to the total number of bytes transferred. + +// FIXME: TableGen *should* be able to do these itself now. There appears to be +// a bug in counting how many operands a Post-indexed MCInst should have which +// means the aliases don't trigger. +def GPR64pi1 : RegisterOperand">; +def GPR64pi2 : RegisterOperand">; +def GPR64pi3 : RegisterOperand">; +def GPR64pi4 : RegisterOperand">; +def GPR64pi6 : RegisterOperand">; +def GPR64pi8 : RegisterOperand">; +def GPR64pi12 : RegisterOperand">; +def GPR64pi16 : RegisterOperand">; +def GPR64pi24 : RegisterOperand">; +def GPR64pi32 : RegisterOperand">; +def GPR64pi48 : RegisterOperand">; +def GPR64pi64 : RegisterOperand">; + +// Condition code regclass. +def CCR : RegisterClass<"AArch64", [i32], 32, (add NZCV)> { + let CopyCost = -1; // Don't allow copying of status registers. + + // CCR is not allocatable. + let isAllocatable = 0; +} + +//===----------------------------------------------------------------------===// +// Floating Point Scalar Registers +//===----------------------------------------------------------------------===// + +def B0 : AArch64Reg<0, "b0">, DwarfRegNum<[64]>; +def B1 : AArch64Reg<1, "b1">, DwarfRegNum<[65]>; +def B2 : AArch64Reg<2, "b2">, DwarfRegNum<[66]>; +def B3 : AArch64Reg<3, "b3">, DwarfRegNum<[67]>; +def B4 : AArch64Reg<4, "b4">, DwarfRegNum<[68]>; +def B5 : AArch64Reg<5, "b5">, DwarfRegNum<[69]>; +def B6 : AArch64Reg<6, "b6">, DwarfRegNum<[70]>; +def B7 : AArch64Reg<7, "b7">, DwarfRegNum<[71]>; +def B8 : AArch64Reg<8, "b8">, DwarfRegNum<[72]>; +def B9 : AArch64Reg<9, "b9">, DwarfRegNum<[73]>; +def B10 : AArch64Reg<10, "b10">, DwarfRegNum<[74]>; +def B11 : AArch64Reg<11, "b11">, DwarfRegNum<[75]>; +def B12 : AArch64Reg<12, "b12">, DwarfRegNum<[76]>; +def B13 : AArch64Reg<13, "b13">, DwarfRegNum<[77]>; +def B14 : AArch64Reg<14, "b14">, DwarfRegNum<[78]>; +def B15 : AArch64Reg<15, "b15">, DwarfRegNum<[79]>; +def B16 : AArch64Reg<16, "b16">, DwarfRegNum<[80]>; +def B17 : AArch64Reg<17, "b17">, DwarfRegNum<[81]>; +def B18 : AArch64Reg<18, "b18">, DwarfRegNum<[82]>; +def B19 : AArch64Reg<19, "b19">, DwarfRegNum<[83]>; +def B20 : AArch64Reg<20, "b20">, DwarfRegNum<[84]>; +def B21 : AArch64Reg<21, "b21">, DwarfRegNum<[85]>; +def B22 : AArch64Reg<22, "b22">, DwarfRegNum<[86]>; +def B23 : AArch64Reg<23, "b23">, DwarfRegNum<[87]>; +def B24 : AArch64Reg<24, "b24">, DwarfRegNum<[88]>; +def B25 : AArch64Reg<25, "b25">, DwarfRegNum<[89]>; +def B26 : AArch64Reg<26, "b26">, DwarfRegNum<[90]>; +def B27 : AArch64Reg<27, "b27">, DwarfRegNum<[91]>; +def B28 : AArch64Reg<28, "b28">, DwarfRegNum<[92]>; +def B29 : AArch64Reg<29, "b29">, DwarfRegNum<[93]>; +def B30 : AArch64Reg<30, "b30">, DwarfRegNum<[94]>; +def B31 : AArch64Reg<31, "b31">, DwarfRegNum<[95]>; + +let SubRegIndices = [bsub] in { +def H0 : AArch64Reg<0, "h0", [B0]>, DwarfRegAlias; +def H1 : AArch64Reg<1, "h1", [B1]>, DwarfRegAlias; +def H2 : AArch64Reg<2, "h2", [B2]>, DwarfRegAlias; +def H3 : AArch64Reg<3, "h3", [B3]>, DwarfRegAlias; +def H4 : AArch64Reg<4, "h4", [B4]>, DwarfRegAlias; +def H5 : AArch64Reg<5, "h5", [B5]>, DwarfRegAlias; +def H6 : AArch64Reg<6, "h6", [B6]>, DwarfRegAlias; +def H7 : AArch64Reg<7, "h7", [B7]>, DwarfRegAlias; +def H8 : AArch64Reg<8, "h8", [B8]>, DwarfRegAlias; +def H9 : AArch64Reg<9, "h9", [B9]>, DwarfRegAlias; +def H10 : AArch64Reg<10, "h10", [B10]>, DwarfRegAlias; +def H11 : AArch64Reg<11, "h11", [B11]>, DwarfRegAlias; +def H12 : AArch64Reg<12, "h12", [B12]>, DwarfRegAlias; +def H13 : AArch64Reg<13, "h13", [B13]>, DwarfRegAlias; +def H14 : AArch64Reg<14, "h14", [B14]>, DwarfRegAlias; +def H15 : AArch64Reg<15, "h15", [B15]>, DwarfRegAlias; +def H16 : AArch64Reg<16, "h16", [B16]>, DwarfRegAlias; +def H17 : AArch64Reg<17, "h17", [B17]>, DwarfRegAlias; +def H18 : AArch64Reg<18, "h18", [B18]>, DwarfRegAlias; +def H19 : AArch64Reg<19, "h19", [B19]>, DwarfRegAlias; +def H20 : AArch64Reg<20, "h20", [B20]>, DwarfRegAlias; +def H21 : AArch64Reg<21, "h21", [B21]>, DwarfRegAlias; +def H22 : AArch64Reg<22, "h22", [B22]>, DwarfRegAlias; +def H23 : AArch64Reg<23, "h23", [B23]>, DwarfRegAlias; +def H24 : AArch64Reg<24, "h24", [B24]>, DwarfRegAlias; +def H25 : AArch64Reg<25, "h25", [B25]>, DwarfRegAlias; +def H26 : AArch64Reg<26, "h26", [B26]>, DwarfRegAlias; +def H27 : AArch64Reg<27, "h27", [B27]>, DwarfRegAlias; +def H28 : AArch64Reg<28, "h28", [B28]>, DwarfRegAlias; +def H29 : AArch64Reg<29, "h29", [B29]>, DwarfRegAlias; +def H30 : AArch64Reg<30, "h30", [B30]>, DwarfRegAlias; +def H31 : AArch64Reg<31, "h31", [B31]>, DwarfRegAlias; +} + +let SubRegIndices = [hsub] in { +def S0 : AArch64Reg<0, "s0", [H0]>, DwarfRegAlias; +def S1 : AArch64Reg<1, "s1", [H1]>, DwarfRegAlias; +def S2 : AArch64Reg<2, "s2", [H2]>, DwarfRegAlias; +def S3 : AArch64Reg<3, "s3", [H3]>, DwarfRegAlias; +def S4 : AArch64Reg<4, "s4", [H4]>, DwarfRegAlias; +def S5 : AArch64Reg<5, "s5", [H5]>, DwarfRegAlias; +def S6 : AArch64Reg<6, "s6", [H6]>, DwarfRegAlias; +def S7 : AArch64Reg<7, "s7", [H7]>, DwarfRegAlias; +def S8 : AArch64Reg<8, "s8", [H8]>, DwarfRegAlias; +def S9 : AArch64Reg<9, "s9", [H9]>, DwarfRegAlias; +def S10 : AArch64Reg<10, "s10", [H10]>, DwarfRegAlias; +def S11 : AArch64Reg<11, "s11", [H11]>, DwarfRegAlias; +def S12 : AArch64Reg<12, "s12", [H12]>, DwarfRegAlias; +def S13 : AArch64Reg<13, "s13", [H13]>, DwarfRegAlias; +def S14 : AArch64Reg<14, "s14", [H14]>, DwarfRegAlias; +def S15 : AArch64Reg<15, "s15", [H15]>, DwarfRegAlias; +def S16 : AArch64Reg<16, "s16", [H16]>, DwarfRegAlias; +def S17 : AArch64Reg<17, "s17", [H17]>, DwarfRegAlias; +def S18 : AArch64Reg<18, "s18", [H18]>, DwarfRegAlias; +def S19 : AArch64Reg<19, "s19", [H19]>, DwarfRegAlias; +def S20 : AArch64Reg<20, "s20", [H20]>, DwarfRegAlias; +def S21 : AArch64Reg<21, "s21", [H21]>, DwarfRegAlias; +def S22 : AArch64Reg<22, "s22", [H22]>, DwarfRegAlias; +def S23 : AArch64Reg<23, "s23", [H23]>, DwarfRegAlias; +def S24 : AArch64Reg<24, "s24", [H24]>, DwarfRegAlias; +def S25 : AArch64Reg<25, "s25", [H25]>, DwarfRegAlias; +def S26 : AArch64Reg<26, "s26", [H26]>, DwarfRegAlias; +def S27 : AArch64Reg<27, "s27", [H27]>, DwarfRegAlias; +def S28 : AArch64Reg<28, "s28", [H28]>, DwarfRegAlias; +def S29 : AArch64Reg<29, "s29", [H29]>, DwarfRegAlias; +def S30 : AArch64Reg<30, "s30", [H30]>, DwarfRegAlias; +def S31 : AArch64Reg<31, "s31", [H31]>, DwarfRegAlias; +} + +let SubRegIndices = [ssub], RegAltNameIndices = [vreg, vlist1] in { +def D0 : AArch64Reg<0, "d0", [S0], ["v0", ""]>, DwarfRegAlias; +def D1 : AArch64Reg<1, "d1", [S1], ["v1", ""]>, DwarfRegAlias; +def D2 : AArch64Reg<2, "d2", [S2], ["v2", ""]>, DwarfRegAlias; +def D3 : AArch64Reg<3, "d3", [S3], ["v3", ""]>, DwarfRegAlias; +def D4 : AArch64Reg<4, "d4", [S4], ["v4", ""]>, DwarfRegAlias; +def D5 : AArch64Reg<5, "d5", [S5], ["v5", ""]>, DwarfRegAlias; +def D6 : AArch64Reg<6, "d6", [S6], ["v6", ""]>, DwarfRegAlias; +def D7 : AArch64Reg<7, "d7", [S7], ["v7", ""]>, DwarfRegAlias; +def D8 : AArch64Reg<8, "d8", [S8], ["v8", ""]>, DwarfRegAlias; +def D9 : AArch64Reg<9, "d9", [S9], ["v9", ""]>, DwarfRegAlias; +def D10 : AArch64Reg<10, "d10", [S10], ["v10", ""]>, DwarfRegAlias; +def D11 : AArch64Reg<11, "d11", [S11], ["v11", ""]>, DwarfRegAlias; +def D12 : AArch64Reg<12, "d12", [S12], ["v12", ""]>, DwarfRegAlias; +def D13 : AArch64Reg<13, "d13", [S13], ["v13", ""]>, DwarfRegAlias; +def D14 : AArch64Reg<14, "d14", [S14], ["v14", ""]>, DwarfRegAlias; +def D15 : AArch64Reg<15, "d15", [S15], ["v15", ""]>, DwarfRegAlias; +def D16 : AArch64Reg<16, "d16", [S16], ["v16", ""]>, DwarfRegAlias; +def D17 : AArch64Reg<17, "d17", [S17], ["v17", ""]>, DwarfRegAlias; +def D18 : AArch64Reg<18, "d18", [S18], ["v18", ""]>, DwarfRegAlias; +def D19 : AArch64Reg<19, "d19", [S19], ["v19", ""]>, DwarfRegAlias; +def D20 : AArch64Reg<20, "d20", [S20], ["v20", ""]>, DwarfRegAlias; +def D21 : AArch64Reg<21, "d21", [S21], ["v21", ""]>, DwarfRegAlias; +def D22 : AArch64Reg<22, "d22", [S22], ["v22", ""]>, DwarfRegAlias; +def D23 : AArch64Reg<23, "d23", [S23], ["v23", ""]>, DwarfRegAlias; +def D24 : AArch64Reg<24, "d24", [S24], ["v24", ""]>, DwarfRegAlias; +def D25 : AArch64Reg<25, "d25", [S25], ["v25", ""]>, DwarfRegAlias; +def D26 : AArch64Reg<26, "d26", [S26], ["v26", ""]>, DwarfRegAlias; +def D27 : AArch64Reg<27, "d27", [S27], ["v27", ""]>, DwarfRegAlias; +def D28 : AArch64Reg<28, "d28", [S28], ["v28", ""]>, DwarfRegAlias; +def D29 : AArch64Reg<29, "d29", [S29], ["v29", ""]>, DwarfRegAlias; +def D30 : AArch64Reg<30, "d30", [S30], ["v30", ""]>, DwarfRegAlias; +def D31 : AArch64Reg<31, "d31", [S31], ["v31", ""]>, DwarfRegAlias; +} + +let SubRegIndices = [dsub], RegAltNameIndices = [vreg, vlist1] in { +def Q0 : AArch64Reg<0, "q0", [D0], ["v0", ""]>, DwarfRegAlias; +def Q1 : AArch64Reg<1, "q1", [D1], ["v1", ""]>, DwarfRegAlias; +def Q2 : AArch64Reg<2, "q2", [D2], ["v2", ""]>, DwarfRegAlias; +def Q3 : AArch64Reg<3, "q3", [D3], ["v3", ""]>, DwarfRegAlias; +def Q4 : AArch64Reg<4, "q4", [D4], ["v4", ""]>, DwarfRegAlias; +def Q5 : AArch64Reg<5, "q5", [D5], ["v5", ""]>, DwarfRegAlias; +def Q6 : AArch64Reg<6, "q6", [D6], ["v6", ""]>, DwarfRegAlias; +def Q7 : AArch64Reg<7, "q7", [D7], ["v7", ""]>, DwarfRegAlias; +def Q8 : AArch64Reg<8, "q8", [D8], ["v8", ""]>, DwarfRegAlias; +def Q9 : AArch64Reg<9, "q9", [D9], ["v9", ""]>, DwarfRegAlias; +def Q10 : AArch64Reg<10, "q10", [D10], ["v10", ""]>, DwarfRegAlias; +def Q11 : AArch64Reg<11, "q11", [D11], ["v11", ""]>, DwarfRegAlias; +def Q12 : AArch64Reg<12, "q12", [D12], ["v12", ""]>, DwarfRegAlias; +def Q13 : AArch64Reg<13, "q13", [D13], ["v13", ""]>, DwarfRegAlias; +def Q14 : AArch64Reg<14, "q14", [D14], ["v14", ""]>, DwarfRegAlias; +def Q15 : AArch64Reg<15, "q15", [D15], ["v15", ""]>, DwarfRegAlias; +def Q16 : AArch64Reg<16, "q16", [D16], ["v16", ""]>, DwarfRegAlias; +def Q17 : AArch64Reg<17, "q17", [D17], ["v17", ""]>, DwarfRegAlias; +def Q18 : AArch64Reg<18, "q18", [D18], ["v18", ""]>, DwarfRegAlias; +def Q19 : AArch64Reg<19, "q19", [D19], ["v19", ""]>, DwarfRegAlias; +def Q20 : AArch64Reg<20, "q20", [D20], ["v20", ""]>, DwarfRegAlias; +def Q21 : AArch64Reg<21, "q21", [D21], ["v21", ""]>, DwarfRegAlias; +def Q22 : AArch64Reg<22, "q22", [D22], ["v22", ""]>, DwarfRegAlias; +def Q23 : AArch64Reg<23, "q23", [D23], ["v23", ""]>, DwarfRegAlias; +def Q24 : AArch64Reg<24, "q24", [D24], ["v24", ""]>, DwarfRegAlias; +def Q25 : AArch64Reg<25, "q25", [D25], ["v25", ""]>, DwarfRegAlias; +def Q26 : AArch64Reg<26, "q26", [D26], ["v26", ""]>, DwarfRegAlias; +def Q27 : AArch64Reg<27, "q27", [D27], ["v27", ""]>, DwarfRegAlias; +def Q28 : AArch64Reg<28, "q28", [D28], ["v28", ""]>, DwarfRegAlias; +def Q29 : AArch64Reg<29, "q29", [D29], ["v29", ""]>, DwarfRegAlias; +def Q30 : AArch64Reg<30, "q30", [D30], ["v30", ""]>, DwarfRegAlias; +def Q31 : AArch64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias; +} + +def FPR8 : RegisterClass<"AArch64", [untyped], 8, (sequence "B%u", 0, 31)> { + let Size = 8; +} +def FPR16 : RegisterClass<"AArch64", [f16], 16, (sequence "H%u", 0, 31)> { + let Size = 16; +} +def FPR32 : RegisterClass<"AArch64", [f32, i32], 32,(sequence "S%u", 0, 31)>; +def FPR64 : RegisterClass<"AArch64", [f64, i64, v2f32, v1f64, v8i8, v4i16, v2i32, + v1i64], + 64, (sequence "D%u", 0, 31)>; +// We don't (yet) have an f128 legal type, so don't use that here. We +// normalize 128-bit vectors to v2f64 for arg passing and such, so use +// that here. +def FPR128 : RegisterClass<"AArch64", + [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128], + 128, (sequence "Q%u", 0, 31)>; + +// The lower 16 vector registers. Some instructions can only take registers +// in this range. +def FPR128_lo : RegisterClass<"AArch64", + [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + 128, (trunc FPR128, 16)>; + +// Pairs, triples, and quads of 64-bit vector registers. +def DSeqPairs : RegisterTuples<[dsub0, dsub1], [(rotl FPR64, 0), (rotl FPR64, 1)]>; +def DSeqTriples : RegisterTuples<[dsub0, dsub1, dsub2], + [(rotl FPR64, 0), (rotl FPR64, 1), + (rotl FPR64, 2)]>; +def DSeqQuads : RegisterTuples<[dsub0, dsub1, dsub2, dsub3], + [(rotl FPR64, 0), (rotl FPR64, 1), + (rotl FPR64, 2), (rotl FPR64, 3)]>; +def DD : RegisterClass<"AArch64", [untyped], 64, (add DSeqPairs)> { + let Size = 128; +} +def DDD : RegisterClass<"AArch64", [untyped], 64, (add DSeqTriples)> { + let Size = 196; +} +def DDDD : RegisterClass<"AArch64", [untyped], 64, (add DSeqQuads)> { + let Size = 256; +} + +// Pairs, triples, and quads of 128-bit vector registers. +def QSeqPairs : RegisterTuples<[qsub0, qsub1], [(rotl FPR128, 0), (rotl FPR128, 1)]>; +def QSeqTriples : RegisterTuples<[qsub0, qsub1, qsub2], + [(rotl FPR128, 0), (rotl FPR128, 1), + (rotl FPR128, 2)]>; +def QSeqQuads : RegisterTuples<[qsub0, qsub1, qsub2, qsub3], + [(rotl FPR128, 0), (rotl FPR128, 1), + (rotl FPR128, 2), (rotl FPR128, 3)]>; +def QQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqPairs)> { + let Size = 256; +} +def QQQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqTriples)> { + let Size = 384; +} +def QQQQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqQuads)> { + let Size = 512; +} + + +// Vector operand versions of the FP registers. Alternate name printing and +// assmebler matching. +def VectorReg64AsmOperand : AsmOperandClass { + let Name = "VectorReg64"; + let PredicateMethod = "isVectorReg"; +} +def VectorReg128AsmOperand : AsmOperandClass { + let Name = "VectorReg128"; + let PredicateMethod = "isVectorReg"; +} + +def V64 : RegisterOperand { + let ParserMatchClass = VectorReg64AsmOperand; +} + +def V128 : RegisterOperand { + let ParserMatchClass = VectorReg128AsmOperand; +} + +def VectorRegLoAsmOperand : AsmOperandClass { let Name = "VectorRegLo"; } +def V128_lo : RegisterOperand { + let ParserMatchClass = VectorRegLoAsmOperand; +} + +class TypedVecListAsmOperand + : AsmOperandClass { + let Name = "TypedVectorList" # count # "_" # lanes # kind; + + let PredicateMethod + = "isTypedVectorList<" # count # ", " # lanes # ", '" # kind # "'>"; + let RenderMethod = "addVectorList" # regsize # "Operands<" # count # ">"; +} + +class TypedVecListRegOperand + : RegisterOperand">; + +multiclass VectorList { + // With implicit types (probably on instruction instead). E.g. { v0, v1 } + def _64AsmOperand : AsmOperandClass { + let Name = NAME # "64"; + let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">"; + let RenderMethod = "addVectorList64Operands<" # count # ">"; + } + + def "64" : RegisterOperand { + let ParserMatchClass = !cast(NAME # "_64AsmOperand"); + } + + def _128AsmOperand : AsmOperandClass { + let Name = NAME # "128"; + let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">"; + let RenderMethod = "addVectorList128Operands<" # count # ">"; + } + + def "128" : RegisterOperand { + let ParserMatchClass = !cast(NAME # "_128AsmOperand"); + } + + // 64-bit register lists with explicit type. + + // { v0.8b, v1.8b } + def _8bAsmOperand : TypedVecListAsmOperand; + def "8b" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_8bAsmOperand"); + } + + // { v0.4h, v1.4h } + def _4hAsmOperand : TypedVecListAsmOperand; + def "4h" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_4hAsmOperand"); + } + + // { v0.2s, v1.2s } + def _2sAsmOperand : TypedVecListAsmOperand; + def "2s" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_2sAsmOperand"); + } + + // { v0.1d, v1.1d } + def _1dAsmOperand : TypedVecListAsmOperand; + def "1d" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_1dAsmOperand"); + } + + // 128-bit register lists with explicit type + + // { v0.16b, v1.16b } + def _16bAsmOperand : TypedVecListAsmOperand; + def "16b" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_16bAsmOperand"); + } + + // { v0.8h, v1.8h } + def _8hAsmOperand : TypedVecListAsmOperand; + def "8h" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_8hAsmOperand"); + } + + // { v0.4s, v1.4s } + def _4sAsmOperand : TypedVecListAsmOperand; + def "4s" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_4sAsmOperand"); + } + + // { v0.2d, v1.2d } + def _2dAsmOperand : TypedVecListAsmOperand; + def "2d" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_2dAsmOperand"); + } + + // { v0.b, v1.b } + def _bAsmOperand : TypedVecListAsmOperand; + def "b" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_bAsmOperand"); + } + + // { v0.h, v1.h } + def _hAsmOperand : TypedVecListAsmOperand; + def "h" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_hAsmOperand"); + } + + // { v0.s, v1.s } + def _sAsmOperand : TypedVecListAsmOperand; + def "s" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_sAsmOperand"); + } + + // { v0.d, v1.d } + def _dAsmOperand : TypedVecListAsmOperand; + def "d" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_dAsmOperand"); + } + + +} + +defm VecListOne : VectorList<1, FPR64, FPR128>; +defm VecListTwo : VectorList<2, DD, QQ>; +defm VecListThree : VectorList<3, DDD, QQQ>; +defm VecListFour : VectorList<4, DDDD, QQQQ>; + + +// Register operand versions of the scalar FP registers. +def FPR16Op : RegisterOperand; +def FPR32Op : RegisterOperand; +def FPR64Op : RegisterOperand; +def FPR128Op : RegisterOperand; diff --git a/lib/Target/ARM64/ARM64SchedA53.td b/lib/Target/AArch64/AArch64SchedA53.td similarity index 99% rename from lib/Target/ARM64/ARM64SchedA53.td rename to lib/Target/AArch64/AArch64SchedA53.td index cf1a82027642..0c3949ecfc17 100644 --- a/lib/Target/ARM64/ARM64SchedA53.td +++ b/lib/Target/AArch64/AArch64SchedA53.td @@ -1,4 +1,4 @@ -//=- ARM64SchedA53.td - ARM Cortex-A53 Scheduling Definitions -*- tablegen -*-=// +//==- AArch64SchedA53.td - Cortex-A53 Scheduling Definitions -*- tablegen -*-=// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM64/ARM64SchedCyclone.td b/lib/Target/AArch64/AArch64SchedCyclone.td similarity index 98% rename from lib/Target/ARM64/ARM64SchedCyclone.td rename to lib/Target/AArch64/AArch64SchedCyclone.td index c04a7bb8bafa..a2a180237789 100644 --- a/lib/Target/ARM64/ARM64SchedCyclone.td +++ b/lib/Target/AArch64/AArch64SchedCyclone.td @@ -1,4 +1,4 @@ -//=- ARMSchedCyclone.td - ARM64 Cyclone Scheduling Defs ------*- tablegen -*-=// +//=- ARMSchedCyclone.td - AArch64 Cyclone Scheduling Defs ----*- tablegen -*-=// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file defines the machine model for ARM64 Cyclone to support +// This file defines the machine model for AArch64 Cyclone to support // instruction scheduling and other instruction cost heuristics. // //===----------------------------------------------------------------------===// @@ -239,13 +239,13 @@ def : WriteRes { def CyWriteLDIdx : SchedWriteVariant<[ SchedVar, // Load from scaled register. SchedVar]>; // Load from register offset. -def : SchedAlias; // Map ARM64->Cyclone type. +def : SchedAlias; // Map AArch64->Cyclone type. // EXAMPLE: STR Xn, Xm [, lsl 3] def CyWriteSTIdx : SchedWriteVariant<[ SchedVar, // Store to scaled register. SchedVar]>; // Store to register offset. -def : SchedAlias; // Map ARM64->Cyclone type. +def : SchedAlias; // Map AArch64->Cyclone type. // Read the (unshifted) base register Xn in the second micro-op one cycle later. // EXAMPLE: LDR Xn, Xm [, lsl 3] @@ -253,7 +253,7 @@ def ReadBaseRS : SchedReadAdvance<1>; def CyReadAdrBase : SchedReadVariant<[ SchedVar, // Read base reg after shifting offset. SchedVar]>; // Read base reg with no shift. -def : SchedAlias; // Map ARM64->Cyclone type. +def : SchedAlias; // Map AArch64->Cyclone type. //--- // 7.8.9,7.8.11. Load/Store, paired diff --git a/lib/Target/ARM64/ARM64Schedule.td b/lib/Target/AArch64/AArch64Schedule.td similarity index 95% rename from lib/Target/ARM64/ARM64Schedule.td rename to lib/Target/AArch64/AArch64Schedule.td index 3a4194173a8e..eaa9110ab1bc 100644 --- a/lib/Target/ARM64/ARM64Schedule.td +++ b/lib/Target/AArch64/AArch64Schedule.td @@ -1,4 +1,4 @@ -//===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===// +//==-- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -11,12 +11,12 @@ // const MachineInstr *MI and const TargetSchedModel *SchedModel // are defined by default. def : PredicateProlog<[{ - const ARM64InstrInfo *TII = - static_cast(SchedModel->getInstrInfo()); + const AArch64InstrInfo *TII = + static_cast(SchedModel->getInstrInfo()); (void)TII; }]>; -// ARM64 Scheduler Definitions +// AArch64 Scheduler Definitions def WriteImm : SchedWrite; // MOVN, MOVZ // TODO: Provide variants for MOV32/64imm Pseudos that dynamically diff --git a/lib/Target/ARM64/ARM64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp similarity index 74% rename from lib/Target/ARM64/ARM64SelectionDAGInfo.cpp rename to lib/Target/AArch64/AArch64SelectionDAGInfo.cpp index f8a2527616c0..5c65b750ee55 100644 --- a/lib/Target/ARM64/ARM64SelectionDAGInfo.cpp +++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -1,4 +1,4 @@ -//===-- ARM64SelectionDAGInfo.cpp - ARM64 SelectionDAG Info ---------------===// +//===-- AArch64SelectionDAGInfo.cpp - AArch64 SelectionDAG Info -----------===// // // The LLVM Compiler Infrastructure // @@ -7,22 +7,22 @@ // //===----------------------------------------------------------------------===// // -// This file implements the ARM64SelectionDAGInfo class. +// This file implements the AArch64SelectionDAGInfo class. // //===----------------------------------------------------------------------===// -#include "ARM64TargetMachine.h" +#include "AArch64TargetMachine.h" using namespace llvm; -#define DEBUG_TYPE "arm64-selectiondag-info" +#define DEBUG_TYPE "aarch64-selectiondag-info" -ARM64SelectionDAGInfo::ARM64SelectionDAGInfo(const TargetMachine &TM) +AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const TargetMachine &TM) : TargetSelectionDAGInfo(TM), - Subtarget(&TM.getSubtarget()) {} + Subtarget(&TM.getSubtarget()) {} -ARM64SelectionDAGInfo::~ARM64SelectionDAGInfo() {} +AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() {} -SDValue ARM64SelectionDAGInfo::EmitTargetCodeForMemset( +SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset( SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const { @@ -34,8 +34,9 @@ SDValue ARM64SelectionDAGInfo::EmitTargetCodeForMemset( // For small size (< 256), it is not beneficial to use bzero // instead of memset. if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) { - const ARM64TargetLowering &TLI = *static_cast( - DAG.getTarget().getTargetLowering()); + const AArch64TargetLowering &TLI = + *static_cast( + DAG.getTarget().getTargetLowering()); EVT IntPtr = TLI.getPointerTy(); Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); diff --git a/lib/Target/ARM64/ARM64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h similarity index 69% rename from lib/Target/ARM64/ARM64SelectionDAGInfo.h rename to lib/Target/AArch64/AArch64SelectionDAGInfo.h index 770775fc02dc..8381f9916a8c 100644 --- a/lib/Target/ARM64/ARM64SelectionDAGInfo.h +++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h @@ -1,4 +1,4 @@ -//===-- ARM64SelectionDAGInfo.h - ARM64 SelectionDAG Info -------*- C++ -*-===// +//===-- AArch64SelectionDAGInfo.h - AArch64 SelectionDAG Info ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,25 +7,25 @@ // //===----------------------------------------------------------------------===// // -// This file defines the ARM64 subclass for TargetSelectionDAGInfo. +// This file defines the AArch64 subclass for TargetSelectionDAGInfo. // //===----------------------------------------------------------------------===// -#ifndef ARM64SELECTIONDAGINFO_H -#define ARM64SELECTIONDAGINFO_H +#ifndef AArch64SELECTIONDAGINFO_H +#define AArch64SELECTIONDAGINFO_H #include "llvm/Target/TargetSelectionDAGInfo.h" namespace llvm { -class ARM64SelectionDAGInfo : public TargetSelectionDAGInfo { +class AArch64SelectionDAGInfo : public TargetSelectionDAGInfo { /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when generating code for different targets. - const ARM64Subtarget *Subtarget; + const AArch64Subtarget *Subtarget; public: - explicit ARM64SelectionDAGInfo(const TargetMachine &TM); - ~ARM64SelectionDAGInfo(); + explicit AArch64SelectionDAGInfo(const TargetMachine &TM); + ~AArch64SelectionDAGInfo(); SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, diff --git a/lib/Target/ARM64/ARM64StorePairSuppress.cpp b/lib/Target/AArch64/AArch64StorePairSuppress.cpp similarity index 82% rename from lib/Target/ARM64/ARM64StorePairSuppress.cpp rename to lib/Target/AArch64/AArch64StorePairSuppress.cpp index a9501ed92177..45f8ddbd2d85 100644 --- a/lib/Target/ARM64/ARM64StorePairSuppress.cpp +++ b/lib/Target/AArch64/AArch64StorePairSuppress.cpp @@ -1,4 +1,4 @@ -//===---- ARM64StorePairSuppress.cpp --- Suppress store pair formation ----===// +//===--- AArch64StorePairSuppress.cpp --- Suppress store pair formation ---===// // // The LLVM Compiler Infrastructure // @@ -11,7 +11,7 @@ // store pairs. Later we may do the same for floating point loads. // ===---------------------------------------------------------------------===// -#include "ARM64InstrInfo.h" +#include "AArch64InstrInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -23,11 +23,11 @@ using namespace llvm; -#define DEBUG_TYPE "arm64-stp-suppress" +#define DEBUG_TYPE "aarch64-stp-suppress" namespace { -class ARM64StorePairSuppress : public MachineFunctionPass { - const ARM64InstrInfo *TII; +class AArch64StorePairSuppress : public MachineFunctionPass { + const AArch64InstrInfo *TII; const TargetRegisterInfo *TRI; const MachineRegisterInfo *MRI; MachineFunction *MF; @@ -37,10 +37,10 @@ class ARM64StorePairSuppress : public MachineFunctionPass { public: static char ID; - ARM64StorePairSuppress() : MachineFunctionPass(ID) {} + AArch64StorePairSuppress() : MachineFunctionPass(ID) {} virtual const char *getPassName() const override { - return "ARM64 Store Pair Suppression"; + return "AArch64 Store Pair Suppression"; } bool runOnMachineFunction(MachineFunction &F) override; @@ -57,11 +57,11 @@ class ARM64StorePairSuppress : public MachineFunctionPass { MachineFunctionPass::getAnalysisUsage(AU); } }; -char ARM64StorePairSuppress::ID = 0; +char AArch64StorePairSuppress::ID = 0; } // anonymous -FunctionPass *llvm::createARM64StorePairSuppressPass() { - return new ARM64StorePairSuppress(); +FunctionPass *llvm::createAArch64StorePairSuppressPass() { + return new AArch64StorePairSuppress(); } /// Return true if an STP can be added to this block without increasing the @@ -70,7 +70,7 @@ FunctionPass *llvm::createARM64StorePairSuppressPass() { /// critical path. If the critical path is longer than the resource height, the /// extra vector ops can limit physreg renaming. Otherwise, it could simply /// oversaturate the vector units. -bool ARM64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) { +bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) { if (!MinInstr) MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); @@ -79,7 +79,7 @@ bool ARM64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) { // Get the machine model's scheduling class for STPQi. // Bypass TargetSchedule's SchedClass resolution since we only have an opcode. - unsigned SCIdx = TII->get(ARM64::STPDi).getSchedClass(); + unsigned SCIdx = TII->get(AArch64::STPDi).getSchedClass(); const MCSchedClassDesc *SCDesc = SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx); @@ -103,22 +103,22 @@ bool ARM64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) { /// tell us if it's profitable with no cpu knowledge here. /// /// FIXME: We plan to develop a decent Target abstraction for simple loads and -/// stores. Until then use a nasty switch similar to ARM64LoadStoreOptimizer. -bool ARM64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) { +/// stores. Until then use a nasty switch similar to AArch64LoadStoreOptimizer. +bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) { switch (MI.getOpcode()) { default: return false; - case ARM64::STRSui: - case ARM64::STRDui: - case ARM64::STURSi: - case ARM64::STURDi: + case AArch64::STRSui: + case AArch64::STRDui: + case AArch64::STURSi: + case AArch64::STURDi: return true; } } -bool ARM64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) { +bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) { MF = &mf; - TII = static_cast(MF->getTarget().getInstrInfo()); + TII = static_cast(MF->getTarget().getInstrInfo()); TRI = MF->getTarget().getRegisterInfo(); MRI = &MF->getRegInfo(); const TargetSubtargetInfo &ST = diff --git a/lib/Target/ARM64/ARM64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp similarity index 71% rename from lib/Target/ARM64/ARM64Subtarget.cpp rename to lib/Target/AArch64/AArch64Subtarget.cpp index 624e47483ffa..cd69994620da 100644 --- a/lib/Target/ARM64/ARM64Subtarget.cpp +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -1,4 +1,4 @@ -//===-- ARM64Subtarget.cpp - ARM64 Subtarget Information --------*- C++ -*-===// +//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,12 +7,12 @@ // //===----------------------------------------------------------------------===// // -// This file implements the ARM64 specific subclass of TargetSubtarget. +// This file implements the AArch64 specific subclass of TargetSubtarget. // //===----------------------------------------------------------------------===// -#include "ARM64InstrInfo.h" -#include "ARM64Subtarget.h" +#include "AArch64InstrInfo.h" +#include "AArch64Subtarget.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/IR/GlobalValue.h" @@ -20,22 +20,23 @@ using namespace llvm; -#define DEBUG_TYPE "arm64-subtarget" +#define DEBUG_TYPE "aarch64-subtarget" #define GET_SUBTARGETINFO_CTOR #define GET_SUBTARGETINFO_TARGET_DESC -#include "ARM64GenSubtargetInfo.inc" +#include "AArch64GenSubtargetInfo.inc" static cl::opt -EnableEarlyIfConvert("arm64-early-ifcvt", cl::desc("Enable the early if " +EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if " "converter pass"), cl::init(true), cl::Hidden); -ARM64Subtarget::ARM64Subtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool LittleEndian) - : ARM64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), +AArch64Subtarget::AArch64Subtarget(const std::string &TT, + const std::string &CPU, + const std::string &FS, bool LittleEndian) + : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false), - HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), - CPUString(CPU), TargetTriple(TT), IsLittleEndian(LittleEndian) { + HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), CPUString(CPU), + TargetTriple(TT), IsLittleEndian(LittleEndian) { // Determine default and user-specified characteristics if (CPUString.empty()) @@ -47,7 +48,7 @@ ARM64Subtarget::ARM64Subtarget(const std::string &TT, const std::string &CPU, /// ClassifyGlobalReference - Find the target operand flags that describe /// how a global value should be referenced for the current subtarget. unsigned char -ARM64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, +AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { // Determine whether this is a reference to a definition or a declaration. @@ -60,13 +61,13 @@ ARM64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, // MachO large model always goes via a GOT, simply to get a single 8-byte // absolute relocation on all global addresses. if (TM.getCodeModel() == CodeModel::Large && isTargetMachO()) - return ARM64II::MO_GOT; + return AArch64II::MO_GOT; // The small code mode's direct accesses use ADRP, which cannot necessarily // produce the value 0 (if the code is above 4GB). Therefore they must use the // GOT. if (TM.getCodeModel() == CodeModel::Small && GV->isWeakForLinker() && isDecl) - return ARM64II::MO_GOT; + return AArch64II::MO_GOT; // If symbol visibility is hidden, the extra load is not needed if // the symbol is definitely defined in the current translation unit. @@ -78,14 +79,14 @@ ARM64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, // defined could end up in unexpected places. Use a GOT. if (TM.getRelocationModel() != Reloc::Static && GV->hasDefaultVisibility()) { if (isTargetMachO()) - return (isDecl || GV->isWeakForLinker()) ? ARM64II::MO_GOT - : ARM64II::MO_NO_FLAG; + return (isDecl || GV->isWeakForLinker()) ? AArch64II::MO_GOT + : AArch64II::MO_NO_FLAG; else // No need to go through the GOT for local symbols on ELF. - return GV->hasLocalLinkage() ? ARM64II::MO_NO_FLAG : ARM64II::MO_GOT; + return GV->hasLocalLinkage() ? AArch64II::MO_NO_FLAG : AArch64II::MO_GOT; } - return ARM64II::MO_NO_FLAG; + return AArch64II::MO_NO_FLAG; } /// This function returns the name of a function which has an interface @@ -93,7 +94,7 @@ ARM64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, /// the current subtarget and it is considered prefereable over /// memset with zero passed as the second argument. Otherwise it /// returns null. -const char *ARM64Subtarget::getBZeroEntry() const { +const char *AArch64Subtarget::getBZeroEntry() const { // Prefer bzero on Darwin only. if(isTargetDarwin()) return "bzero"; @@ -101,7 +102,7 @@ const char *ARM64Subtarget::getBZeroEntry() const { return nullptr; } -void ARM64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, +void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, MachineInstr *begin, MachineInstr *end, unsigned NumRegionInstrs) const { // LNT run (at least on Cyclone) showed reasonably significant gains for @@ -110,6 +111,6 @@ void ARM64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, Policy.OnlyBottomUp = false; } -bool ARM64Subtarget::enableEarlyIfConversion() const { +bool AArch64Subtarget::enableEarlyIfConversion() const { return EnableEarlyIfConvert; } diff --git a/lib/Target/ARM64/ARM64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h similarity index 88% rename from lib/Target/ARM64/ARM64Subtarget.h rename to lib/Target/AArch64/AArch64Subtarget.h index 9cea3c387d63..590ea0580ea7 100644 --- a/lib/Target/ARM64/ARM64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -1,4 +1,4 @@ -//=====---- ARM64Subtarget.h - Define Subtarget for the ARM64 -*- C++ -*--====// +//===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -7,25 +7,25 @@ // //===----------------------------------------------------------------------===// // -// This file declares the ARM64 specific subclass of TargetSubtarget. +// This file declares the AArch64 specific subclass of TargetSubtarget. // //===----------------------------------------------------------------------===// -#ifndef ARM64SUBTARGET_H -#define ARM64SUBTARGET_H +#ifndef AArch64SUBTARGET_H +#define AArch64SUBTARGET_H #include "llvm/Target/TargetSubtargetInfo.h" -#include "ARM64RegisterInfo.h" +#include "AArch64RegisterInfo.h" #include #define GET_SUBTARGETINFO_HEADER -#include "ARM64GenSubtargetInfo.inc" +#include "AArch64GenSubtargetInfo.inc" namespace llvm { class GlobalValue; class StringRef; -class ARM64Subtarget : public ARM64GenSubtargetInfo { +class AArch64Subtarget : public AArch64GenSubtargetInfo { protected: enum ARMProcFamilyEnum {Others, CortexA53, CortexA57, Cyclone}; @@ -55,7 +55,7 @@ class ARM64Subtarget : public ARM64GenSubtargetInfo { public: /// This constructor initializes the data members to match that /// of the specified triple. - ARM64Subtarget(const std::string &TT, const std::string &CPU, + AArch64Subtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool LittleEndian); bool enableMachineScheduler() const override { return true; } @@ -107,4 +107,4 @@ class ARM64Subtarget : public ARM64GenSubtargetInfo { }; } // End llvm namespace -#endif // ARM64SUBTARGET_H +#endif // AArch64SUBTARGET_H diff --git a/lib/Target/ARM64/ARM64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp similarity index 51% rename from lib/Target/ARM64/ARM64TargetMachine.cpp rename to lib/Target/AArch64/AArch64TargetMachine.cpp index fc73145be3f7..0b5dd2f067e7 100644 --- a/lib/Target/ARM64/ARM64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -1,4 +1,4 @@ -//===-- ARM64TargetMachine.cpp - Define TargetMachine for ARM64 -----------===// +//===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===// // // The LLVM Compiler Infrastructure // @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#include "ARM64.h" -#include "ARM64TargetMachine.h" +#include "AArch64.h" +#include "AArch64TargetMachine.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/CommandLine.h" @@ -21,28 +21,28 @@ using namespace llvm; static cl::opt -EnableCCMP("arm64-ccmp", cl::desc("Enable the CCMP formation pass"), +EnableCCMP("aarch64-ccmp", cl::desc("Enable the CCMP formation pass"), cl::init(true), cl::Hidden); static cl::opt -EnableStPairSuppress("arm64-stp-suppress", cl::desc("Suppress STP for ARM64"), +EnableStPairSuppress("aarch64-stp-suppress", cl::desc("Suppress STP for AArch64"), cl::init(true), cl::Hidden); static cl::opt -EnableAdvSIMDScalar("arm64-simd-scalar", cl::desc("Enable use of AdvSIMD scalar" +EnableAdvSIMDScalar("aarch64-simd-scalar", cl::desc("Enable use of AdvSIMD scalar" " integer instructions"), cl::init(false), cl::Hidden); static cl::opt -EnablePromoteConstant("arm64-promote-const", cl::desc("Enable the promote " +EnablePromoteConstant("aarch64-promote-const", cl::desc("Enable the promote " "constant pass"), cl::init(true), cl::Hidden); static cl::opt -EnableCollectLOH("arm64-collect-loh", cl::desc("Enable the pass that emits the" +EnableCollectLOH("aarch64-collect-loh", cl::desc("Enable the pass that emits the" " linker optimization hints (LOH)"), cl::init(true), cl::Hidden); static cl::opt -EnableDeadRegisterElimination("arm64-dead-def-elimination", cl::Hidden, +EnableDeadRegisterElimination("aarch64-dead-def-elimination", cl::Hidden, cl::desc("Enable the pass that removes dead" " definitons and replaces stores to" " them with stores to the zero" @@ -50,67 +50,67 @@ EnableDeadRegisterElimination("arm64-dead-def-elimination", cl::Hidden, cl::init(true)); static cl::opt -EnableLoadStoreOpt("arm64-load-store-opt", cl::desc("Enable the load/store pair" +EnableLoadStoreOpt("aarch64-load-store-opt", cl::desc("Enable the load/store pair" " optimization pass"), cl::init(true), cl::Hidden); -extern "C" void LLVMInitializeARM64Target() { +extern "C" void LLVMInitializeAArch64Target() { // Register the target. - RegisterTargetMachine X(TheARM64leTarget); - RegisterTargetMachine Y(TheARM64beTarget); + RegisterTargetMachine X(TheAArch64leTarget); + RegisterTargetMachine Y(TheAArch64beTarget); - RegisterTargetMachine Z(TheAArch64leTarget); - RegisterTargetMachine W(TheAArch64beTarget); + RegisterTargetMachine Z(TheARM64leTarget); + RegisterTargetMachine W(TheARM64beTarget); } -/// TargetMachine ctor - Create an ARM64 architecture model. +/// TargetMachine ctor - Create an AArch64 architecture model. /// -ARM64TargetMachine::ARM64TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, - bool LittleEndian) +AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, + bool LittleEndian) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS, LittleEndian), - // This nested ternary is horrible, but DL needs to be properly initialized + // This nested ternary is horrible, but DL needs to be properly + // initialized // before TLInfo is constructed. - DL(Subtarget.isTargetMachO() ? - "e-m:o-i64:64-i128:128-n32:64-S128" : - (LittleEndian ? - "e-m:e-i64:64-i128:128-n32:64-S128" : - "E-m:e-i64:64-i128:128-n32:64-S128")), + DL(Subtarget.isTargetMachO() + ? "e-m:o-i64:64-i128:128-n32:64-S128" + : (LittleEndian ? "e-m:e-i64:64-i128:128-n32:64-S128" + : "E-m:e-i64:64-i128:128-n32:64-S128")), InstrInfo(Subtarget), TLInfo(*this), FrameLowering(*this, Subtarget), TSInfo(*this) { initAsmInfo(); } -void ARM64leTargetMachine::anchor() { } +void AArch64leTargetMachine::anchor() { } -ARM64leTargetMachine:: -ARM64leTargetMachine(const Target &T, StringRef TT, +AArch64leTargetMachine:: +AArch64leTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : ARM64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} + : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} -void ARM64beTargetMachine::anchor() { } +void AArch64beTargetMachine::anchor() { } -ARM64beTargetMachine:: -ARM64beTargetMachine(const Target &T, StringRef TT, +AArch64beTargetMachine:: +AArch64beTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : ARM64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} + : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} namespace { -/// ARM64 Code Generator Pass Configuration Options. -class ARM64PassConfig : public TargetPassConfig { +/// AArch64 Code Generator Pass Configuration Options. +class AArch64PassConfig : public TargetPassConfig { public: - ARM64PassConfig(ARM64TargetMachine *TM, PassManagerBase &PM) + AArch64PassConfig(AArch64TargetMachine *TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} - ARM64TargetMachine &getARM64TargetMachine() const { - return getTM(); + AArch64TargetMachine &getAArch64TargetMachine() const { + return getTM(); } bool addPreISel() override; @@ -123,28 +123,28 @@ class ARM64PassConfig : public TargetPassConfig { }; } // namespace -void ARM64TargetMachine::addAnalysisPasses(PassManagerBase &PM) { - // Add first the target-independent BasicTTI pass, then our ARM64 pass. This - // allows the ARM64 pass to delegate to the target independent layer when +void AArch64TargetMachine::addAnalysisPasses(PassManagerBase &PM) { + // Add first the target-independent BasicTTI pass, then our AArch64 pass. This + // allows the AArch64 pass to delegate to the target independent layer when // appropriate. PM.add(createBasicTargetTransformInfoPass(this)); - PM.add(createARM64TargetTransformInfoPass(this)); + PM.add(createAArch64TargetTransformInfoPass(this)); } -TargetPassConfig *ARM64TargetMachine::createPassConfig(PassManagerBase &PM) { - return new ARM64PassConfig(this, PM); +TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) { + return new AArch64PassConfig(this, PM); } // Pass Pipeline Configuration -bool ARM64PassConfig::addPreISel() { +bool AArch64PassConfig::addPreISel() { // Run promote constant before global merge, so that the promoted constants // get a chance to be merged if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant) - addPass(createARM64PromoteConstantPass()); + addPass(createAArch64PromoteConstantPass()); if (TM->getOptLevel() != CodeGenOpt::None) addPass(createGlobalMergePass(TM)); if (TM->getOptLevel() != CodeGenOpt::None) - addPass(createARM64AddressTypePromotionPass()); + addPass(createAArch64AddressTypePromotionPass()); // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg // ourselves. @@ -153,56 +153,56 @@ bool ARM64PassConfig::addPreISel() { return false; } -bool ARM64PassConfig::addInstSelector() { - addPass(createARM64ISelDag(getARM64TargetMachine(), getOptLevel())); +bool AArch64PassConfig::addInstSelector() { + addPass(createAArch64ISelDag(getAArch64TargetMachine(), getOptLevel())); // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many // references to _TLS_MODULE_BASE_ as possible. - if (TM->getSubtarget().isTargetELF() && + if (TM->getSubtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None) - addPass(createARM64CleanupLocalDynamicTLSPass()); + addPass(createAArch64CleanupLocalDynamicTLSPass()); return false; } -bool ARM64PassConfig::addILPOpts() { +bool AArch64PassConfig::addILPOpts() { if (EnableCCMP) - addPass(createARM64ConditionalCompares()); + addPass(createAArch64ConditionalCompares()); addPass(&EarlyIfConverterID); if (EnableStPairSuppress) - addPass(createARM64StorePairSuppressPass()); + addPass(createAArch64StorePairSuppressPass()); return true; } -bool ARM64PassConfig::addPreRegAlloc() { +bool AArch64PassConfig::addPreRegAlloc() { // Use AdvSIMD scalar instructions whenever profitable. if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar) - addPass(createARM64AdvSIMDScalar()); + addPass(createAArch64AdvSIMDScalar()); return true; } -bool ARM64PassConfig::addPostRegAlloc() { +bool AArch64PassConfig::addPostRegAlloc() { // Change dead register definitions to refer to the zero register. if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination) - addPass(createARM64DeadRegisterDefinitions()); + addPass(createAArch64DeadRegisterDefinitions()); return true; } -bool ARM64PassConfig::addPreSched2() { +bool AArch64PassConfig::addPreSched2() { // Expand some pseudo instructions to allow proper scheduling. - addPass(createARM64ExpandPseudoPass()); + addPass(createAArch64ExpandPseudoPass()); // Use load/store pair instructions when possible. if (TM->getOptLevel() != CodeGenOpt::None && EnableLoadStoreOpt) - addPass(createARM64LoadStoreOptimizationPass()); + addPass(createAArch64LoadStoreOptimizationPass()); return true; } -bool ARM64PassConfig::addPreEmitPass() { +bool AArch64PassConfig::addPreEmitPass() { // Relax conditional branch instructions if they're otherwise out of // range of their destination. - addPass(createARM64BranchRelaxation()); + addPass(createAArch64BranchRelaxation()); if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH && - TM->getSubtarget().isTargetMachO()) - addPass(createARM64CollectLOHPass()); + TM->getSubtarget().isTargetMachO()) + addPass(createAArch64CollectLOHPass()); return true; } diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h new file mode 100644 index 000000000000..079b19b23bb5 --- /dev/null +++ b/lib/Target/AArch64/AArch64TargetMachine.h @@ -0,0 +1,94 @@ +//==-- AArch64TargetMachine.h - Define TargetMachine for AArch64 -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the AArch64 specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef AArch64TARGETMACHINE_H +#define AArch64TARGETMACHINE_H + +#include "AArch64InstrInfo.h" +#include "AArch64ISelLowering.h" +#include "AArch64Subtarget.h" +#include "AArch64FrameLowering.h" +#include "AArch64SelectionDAGInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/MC/MCStreamer.h" + +namespace llvm { + +class AArch64TargetMachine : public LLVMTargetMachine { +protected: + AArch64Subtarget Subtarget; + +private: + const DataLayout DL; + AArch64InstrInfo InstrInfo; + AArch64TargetLowering TLInfo; + AArch64FrameLowering FrameLowering; + AArch64SelectionDAGInfo TSInfo; + +public: + AArch64TargetMachine(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool IsLittleEndian); + + const AArch64Subtarget *getSubtargetImpl() const override { + return &Subtarget; + } + const AArch64TargetLowering *getTargetLowering() const override { + return &TLInfo; + } + const DataLayout *getDataLayout() const override { return &DL; } + const AArch64FrameLowering *getFrameLowering() const override { + return &FrameLowering; + } + const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; } + const AArch64RegisterInfo *getRegisterInfo() const override { + return &InstrInfo.getRegisterInfo(); + } + const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override { + return &TSInfo; + } + + // Pass Pipeline Configuration + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + + /// \brief Register AArch64 analysis passes with a pass manager. + void addAnalysisPasses(PassManagerBase &PM) override; +}; + +// AArch64leTargetMachine - AArch64 little endian target machine. +// +class AArch64leTargetMachine : public AArch64TargetMachine { + virtual void anchor(); +public: + AArch64leTargetMachine(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); +}; + +// AArch64beTargetMachine - AArch64 big endian target machine. +// +class AArch64beTargetMachine : public AArch64TargetMachine { + virtual void anchor(); +public: + AArch64beTargetMachine(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/ARM64/ARM64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp similarity index 80% rename from lib/Target/ARM64/ARM64TargetObjectFile.cpp rename to lib/Target/AArch64/AArch64TargetObjectFile.cpp index cde01e515dc4..4069038dffe7 100644 --- a/lib/Target/ARM64/ARM64TargetObjectFile.cpp +++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp @@ -1,4 +1,4 @@ -//===-- ARM64TargetObjectFile.cpp - ARM64 Object Info ---------------------===// +//===-- AArch64TargetObjectFile.cpp - AArch64 Object Info -----------------===// // // The LLVM Compiler Infrastructure // @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "ARM64TargetObjectFile.h" -#include "ARM64TargetMachine.h" +#include "AArch64TargetObjectFile.h" +#include "AArch64TargetMachine.h" #include "llvm/IR/Mangler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -17,13 +17,13 @@ using namespace llvm; using namespace dwarf; -void ARM64_ELFTargetObjectFile::Initialize(MCContext &Ctx, - const TargetMachine &TM) { +void AArch64_ELFTargetObjectFile::Initialize(MCContext &Ctx, + const TargetMachine &TM) { TargetLoweringObjectFileELF::Initialize(Ctx, TM); InitializeELF(TM.Options.UseInitArray); } -const MCExpr *ARM64_MachoTargetObjectFile::getTTypeGlobalReference( +const MCExpr *AArch64_MachoTargetObjectFile::getTTypeGlobalReference( const GlobalValue *GV, unsigned Encoding, Mangler &Mang, const TargetMachine &TM, MachineModuleInfo *MMI, MCStreamer &Streamer) const { @@ -45,7 +45,7 @@ const MCExpr *ARM64_MachoTargetObjectFile::getTTypeGlobalReference( GV, Encoding, Mang, TM, MMI, Streamer); } -MCSymbol *ARM64_MachoTargetObjectFile::getCFIPersonalitySymbol( +MCSymbol *AArch64_MachoTargetObjectFile::getCFIPersonalitySymbol( const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM, MachineModuleInfo *MMI) const { return TM.getSymbol(GV, Mang); diff --git a/lib/Target/ARM64/ARM64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h similarity index 73% rename from lib/Target/ARM64/ARM64TargetObjectFile.h rename to lib/Target/AArch64/AArch64TargetObjectFile.h index 62446f94f179..de63cb42542a 100644 --- a/lib/Target/ARM64/ARM64TargetObjectFile.h +++ b/lib/Target/AArch64/AArch64TargetObjectFile.h @@ -1,4 +1,4 @@ -//===-- ARM64TargetObjectFile.h - ARM64 Object Info -*- C++ -------------*-===// +//===-- AArch64TargetObjectFile.h - AArch64 Object Info -*- C++ ---------*-===// // // The LLVM Compiler Infrastructure // @@ -7,22 +7,22 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_ARM64_TARGETOBJECTFILE_H -#define LLVM_TARGET_ARM64_TARGETOBJECTFILE_H +#ifndef LLVM_TARGET_AArch64_TARGETOBJECTFILE_H +#define LLVM_TARGET_AArch64_TARGETOBJECTFILE_H #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/Target/TargetLoweringObjectFile.h" namespace llvm { -class ARM64TargetMachine; +class AArch64TargetMachine; /// This implementation is used for AArch64 ELF targets (Linux in particular). -class ARM64_ELFTargetObjectFile : public TargetLoweringObjectFileELF { +class AArch64_ELFTargetObjectFile : public TargetLoweringObjectFileELF { void Initialize(MCContext &Ctx, const TargetMachine &TM) override; }; -/// ARM64_MachoTargetObjectFile - This TLOF implementation is used for Darwin. -class ARM64_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { +/// AArch64_MachoTargetObjectFile - This TLOF implementation is used for Darwin. +class AArch64_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { public: const MCExpr *getTTypeGlobalReference(const GlobalValue *GV, unsigned Encoding, Mangler &Mang, diff --git a/lib/Target/ARM64/ARM64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp similarity index 87% rename from lib/Target/ARM64/ARM64TargetTransformInfo.cpp rename to lib/Target/AArch64/AArch64TargetTransformInfo.cpp index cc4cdff62b5b..33e482a53a46 100644 --- a/lib/Target/ARM64/ARM64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1,4 +1,4 @@ -//===-- ARM64TargetTransformInfo.cpp - ARM64 specific TTI pass ------------===// +//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI pass --------===// // // The LLVM Compiler Infrastructure // @@ -8,15 +8,15 @@ //===----------------------------------------------------------------------===// /// \file /// This file implements a TargetTransformInfo analysis pass specific to the -/// ARM64 target machine. It uses the target's detailed information to provide +/// AArch64 target machine. It uses the target's detailed information to provide /// more precise answers to certain TTI queries, while letting the target /// independent and default TTI implementations handle the rest. /// //===----------------------------------------------------------------------===// -#include "ARM64.h" -#include "ARM64TargetMachine.h" -#include "MCTargetDesc/ARM64AddressingModes.h" +#include "AArch64.h" +#include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Target/CostTable.h" @@ -24,35 +24,35 @@ #include using namespace llvm; -#define DEBUG_TYPE "arm64tti" +#define DEBUG_TYPE "aarch64tti" // Declare the pass initialization routine locally as target-specific passes // don't have a target-wide initialization entry point, and so we rely on the // pass constructor initialization. namespace llvm { -void initializeARM64TTIPass(PassRegistry &); +void initializeAArch64TTIPass(PassRegistry &); } namespace { -class ARM64TTI final : public ImmutablePass, public TargetTransformInfo { - const ARM64TargetMachine *TM; - const ARM64Subtarget *ST; - const ARM64TargetLowering *TLI; +class AArch64TTI final : public ImmutablePass, public TargetTransformInfo { + const AArch64TargetMachine *TM; + const AArch64Subtarget *ST; + const AArch64TargetLowering *TLI; /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the result needs to be inserted and/or extracted from vectors. unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; public: - ARM64TTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) { + AArch64TTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) { llvm_unreachable("This pass cannot be directly constructed"); } - ARM64TTI(const ARM64TargetMachine *TM) + AArch64TTI(const AArch64TargetMachine *TM) : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), TLI(TM->getTargetLowering()) { - initializeARM64TTIPass(*PassRegistry::getPassRegistry()); + initializeAArch64TTIPass(*PassRegistry::getPassRegistry()); } void initializePass() override { pushTTIStack(this); } @@ -129,21 +129,21 @@ class ARM64TTI final : public ImmutablePass, public TargetTransformInfo { } // end anonymous namespace -INITIALIZE_AG_PASS(ARM64TTI, TargetTransformInfo, "arm64tti", - "ARM64 Target Transform Info", true, true, false) -char ARM64TTI::ID = 0; +INITIALIZE_AG_PASS(AArch64TTI, TargetTransformInfo, "aarch64tti", + "AArch64 Target Transform Info", true, true, false) +char AArch64TTI::ID = 0; ImmutablePass * -llvm::createARM64TargetTransformInfoPass(const ARM64TargetMachine *TM) { - return new ARM64TTI(TM); +llvm::createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM) { + return new AArch64TTI(TM); } /// \brief Calculate the cost of materializing a 64-bit value. This helper /// method might only calculate a fraction of a larger immediate. Therefore it /// is valid to return a cost of ZERO. -unsigned ARM64TTI::getIntImmCost(int64_t Val) const { +unsigned AArch64TTI::getIntImmCost(int64_t Val) const { // Check if the immediate can be encoded within an instruction. - if (Val == 0 || ARM64_AM::isLogicalImmediate(Val, 64)) + if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64)) return 0; if (Val < 0) @@ -155,7 +155,7 @@ unsigned ARM64TTI::getIntImmCost(int64_t Val) const { } /// \brief Calculate the cost of materializing the given constant. -unsigned ARM64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const { +unsigned AArch64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -179,7 +179,7 @@ unsigned ARM64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const { return std::max(1U, Cost); } -unsigned ARM64TTI::getIntImmCost(unsigned Opcode, unsigned Idx, +unsigned AArch64TTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); @@ -237,14 +237,14 @@ unsigned ARM64TTI::getIntImmCost(unsigned Opcode, unsigned Idx, if (Idx == ImmIdx) { unsigned NumConstants = (BitSize + 63) / 64; - unsigned Cost = ARM64TTI::getIntImmCost(Imm, Ty); + unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty); return (Cost <= NumConstants * TCC_Basic) ? static_cast(TCC_Free) : Cost; } - return ARM64TTI::getIntImmCost(Imm, Ty); + return AArch64TTI::getIntImmCost(Imm, Ty); } -unsigned ARM64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, +unsigned AArch64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); @@ -265,7 +265,7 @@ unsigned ARM64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, case Intrinsic::umul_with_overflow: if (Idx == 1) { unsigned NumConstants = (BitSize + 63) / 64; - unsigned Cost = ARM64TTI::getIntImmCost(Imm, Ty); + unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty); return (Cost <= NumConstants * TCC_Basic) ? static_cast(TCC_Free) : Cost; } @@ -280,18 +280,19 @@ unsigned ARM64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, return TCC_Free; break; } - return ARM64TTI::getIntImmCost(Imm, Ty); + return AArch64TTI::getIntImmCost(Imm, Ty); } -ARM64TTI::PopcntSupportKind ARM64TTI::getPopcntSupport(unsigned TyWidth) const { +AArch64TTI::PopcntSupportKind +AArch64TTI::getPopcntSupport(unsigned TyWidth) const { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); if (TyWidth == 32 || TyWidth == 64) return PSK_FastHardware; - // TODO: ARM64TargetLowering::LowerCTPOP() supports 128bit popcount. + // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount. return PSK_Software; } -unsigned ARM64TTI::getCastInstrCost(unsigned Opcode, Type *Dst, +unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -338,7 +339,7 @@ unsigned ARM64TTI::getCastInstrCost(unsigned Opcode, Type *Dst, return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); } -unsigned ARM64TTI::getVectorInstrCost(unsigned Opcode, Type *Val, +unsigned AArch64TTI::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const { assert(Val->isVectorTy() && "This must be a vector type"); @@ -363,7 +364,7 @@ unsigned ARM64TTI::getVectorInstrCost(unsigned Opcode, Type *Val, return 2; } -unsigned ARM64TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, +unsigned AArch64TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, OperandValueKind Opd2Info) const { // Legalize the type. @@ -386,7 +387,7 @@ unsigned ARM64TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, } } -unsigned ARM64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { +unsigned AArch64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { // Address computations in vectorized code with non-consecutive addresses will // likely result in more instructions compared to scalar code where the // computation can more often be merged into the index mode. The resulting @@ -401,7 +402,7 @@ unsigned ARM64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { return 1; } -unsigned ARM64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, +unsigned AArch64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const { int ISD = TLI->InstructionOpcodeToISD(Opcode); @@ -432,7 +433,7 @@ unsigned ARM64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); } -unsigned ARM64TTI::getMemoryOpCost(unsigned Opcode, Type *Src, +unsigned AArch64TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const { std::pair LT = TLI->getTypeLegalizationCost(Src); diff --git a/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp similarity index 78% rename from lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp rename to lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 4d710db1d93b..65b77c547dc9 100644 --- a/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -1,4 +1,4 @@ -//===-- ARM64AsmParser.cpp - Parse ARM64 assembly to MCInst instructions --===// +//==- AArch64AsmParser.cpp - Parse AArch64 assembly to MCInst instructions -==// // // The LLVM Compiler Infrastructure // @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "MCTargetDesc/ARM64MCExpr.h" -#include "Utils/ARM64BaseInfo.h" +#include "MCTargetDesc/AArch64AddressingModes.h" +#include "MCTargetDesc/AArch64MCExpr.h" +#include "Utils/AArch64BaseInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" @@ -35,9 +35,9 @@ using namespace llvm; namespace { -class ARM64Operand; +class AArch64Operand; -class ARM64AsmParser : public MCTargetAsmParser { +class AArch64AsmParser : public MCTargetAsmParser { public: typedef SmallVectorImpl OperandVector; @@ -52,7 +52,7 @@ class ARM64AsmParser : public MCTargetAsmParser { SMLoc getLoc() const { return Parser.getTok().getLoc(); } bool parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands); - ARM64CC::CondCode parseCondCodeString(StringRef Cond); + AArch64CC::CondCode parseCondCodeString(StringRef Cond); bool parseCondCode(OperandVector &Operands, bool invertCondCode); int tryParseRegister(); int tryMatchVectorRegister(StringRef &Kind, bool expected); @@ -80,7 +80,7 @@ class ARM64AsmParser : public MCTargetAsmParser { /// { #define GET_ASSEMBLER_HEADER -#include "ARM64GenAsmMatcher.inc" +#include "AArch64GenAsmMatcher.inc" /// } @@ -98,12 +98,12 @@ class ARM64AsmParser : public MCTargetAsmParser { bool tryParseVectorRegister(OperandVector &Operands); public: - enum ARM64MatchResultTy { + enum AArch64MatchResultTy { Match_InvalidSuffix = FIRST_TARGET_MATCH_RESULT_TY, #define GET_OPERAND_DIAGNOSTIC_TYPES -#include "ARM64GenAsmMatcher.inc" +#include "AArch64GenAsmMatcher.inc" }; - ARM64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, + AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { @@ -121,7 +121,7 @@ class ARM64AsmParser : public MCTargetAsmParser { unsigned Kind) override; static bool classifySymbolRef(const MCExpr *Expr, - ARM64MCExpr::VariantKind &ELFRefKind, + AArch64MCExpr::VariantKind &ELFRefKind, MCSymbolRefExpr::VariantKind &DarwinRefKind, int64_t &Addend); }; @@ -129,9 +129,9 @@ class ARM64AsmParser : public MCTargetAsmParser { namespace { -/// ARM64Operand - Instances of this class represent a parsed ARM64 machine +/// AArch64Operand - Instances of this class represent a parsed AArch64 machine /// instruction. -class ARM64Operand : public MCParsedAsmOperand { +class AArch64Operand : public MCParsedAsmOperand { private: enum KindTy { k_Immediate, @@ -183,7 +183,7 @@ class ARM64Operand : public MCParsedAsmOperand { }; struct CondCodeOp { - ARM64CC::CondCode Code; + AArch64CC::CondCode Code; }; struct FPImmOp { @@ -211,7 +211,7 @@ class ARM64Operand : public MCParsedAsmOperand { }; struct ShiftExtendOp { - ARM64_AM::ShiftExtendType Type; + AArch64_AM::ShiftExtendType Type; unsigned Amount; bool HasExplicitAmount; }; @@ -240,11 +240,11 @@ class ARM64Operand : public MCParsedAsmOperand { // the add<>Operands() calls. MCContext &Ctx; - ARM64Operand(KindTy K, MCContext &_Ctx) + AArch64Operand(KindTy K, MCContext &_Ctx) : MCParsedAsmOperand(), Kind(K), Ctx(_Ctx) {} public: - ARM64Operand(const ARM64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) { + AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) { Kind = o.Kind; StartLoc = o.StartLoc; EndLoc = o.EndLoc; @@ -321,7 +321,7 @@ class ARM64Operand : public MCParsedAsmOperand { return ShiftedImm.ShiftAmount; } - ARM64CC::CondCode getCondCode() const { + AArch64CC::CondCode getCondCode() const { assert(Kind == k_CondCode && "Invalid access!"); return CondCode.Code; } @@ -376,7 +376,7 @@ class ARM64Operand : public MCParsedAsmOperand { return Prefetch.Val; } - ARM64_AM::ShiftExtendType getShiftExtendType() const { + AArch64_AM::ShiftExtendType getShiftExtendType() const { assert(Kind == k_ShiftExtend && "Invalid access!"); return ShiftExtend.Type; } @@ -431,10 +431,10 @@ class ARM64Operand : public MCParsedAsmOperand { } bool isSymbolicUImm12Offset(const MCExpr *Expr, unsigned Scale) const { - ARM64MCExpr::VariantKind ELFRefKind; + AArch64MCExpr::VariantKind ELFRefKind; MCSymbolRefExpr::VariantKind DarwinRefKind; int64_t Addend; - if (!ARM64AsmParser::classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, + if (!AArch64AsmParser::classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, Addend)) { // If we don't understand the expression, assume the best and // let the fixup and relocation code deal with it. @@ -442,14 +442,14 @@ class ARM64Operand : public MCParsedAsmOperand { } if (DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF || - ELFRefKind == ARM64MCExpr::VK_LO12 || - ELFRefKind == ARM64MCExpr::VK_GOT_LO12 || - ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12 || - ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12_NC || - ELFRefKind == ARM64MCExpr::VK_TPREL_LO12 || - ELFRefKind == ARM64MCExpr::VK_TPREL_LO12_NC || - ELFRefKind == ARM64MCExpr::VK_GOTTPREL_LO12_NC || - ELFRefKind == ARM64MCExpr::VK_TLSDESC_LO12) { + ELFRefKind == AArch64MCExpr::VK_LO12 || + ELFRefKind == AArch64MCExpr::VK_GOT_LO12 || + ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12 || + ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12_NC || + ELFRefKind == AArch64MCExpr::VK_TPREL_LO12 || + ELFRefKind == AArch64MCExpr::VK_TPREL_LO12_NC || + ELFRefKind == AArch64MCExpr::VK_GOTTPREL_LO12_NC || + ELFRefKind == AArch64MCExpr::VK_TLSDESC_LO12) { // Note that we don't range-check the addend. It's adjusted modulo page // size when converted, so there is no "out of range" condition when using // @pageoff. @@ -607,7 +607,7 @@ class ARM64Operand : public MCParsedAsmOperand { const MCConstantExpr *MCE = dyn_cast(getImm()); if (!MCE) return false; - return ARM64_AM::isLogicalImmediate(MCE->getValue(), 32); + return AArch64_AM::isLogicalImmediate(MCE->getValue(), 32); } bool isLogicalImm64() const { if (!isImm()) @@ -615,7 +615,7 @@ class ARM64Operand : public MCParsedAsmOperand { const MCConstantExpr *MCE = dyn_cast(getImm()); if (!MCE) return false; - return ARM64_AM::isLogicalImmediate(MCE->getValue(), 64); + return AArch64_AM::isLogicalImmediate(MCE->getValue(), 64); } bool isShiftedImm() const { return Kind == k_ShiftedImm; } bool isAddSubImm() const { @@ -634,22 +634,22 @@ class ARM64Operand : public MCParsedAsmOperand { Expr = getImm(); } - ARM64MCExpr::VariantKind ELFRefKind; + AArch64MCExpr::VariantKind ELFRefKind; MCSymbolRefExpr::VariantKind DarwinRefKind; int64_t Addend; - if (ARM64AsmParser::classifySymbolRef(Expr, ELFRefKind, + if (AArch64AsmParser::classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, Addend)) { return DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF || DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF || (DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGEOFF && Addend == 0) - || ELFRefKind == ARM64MCExpr::VK_LO12 - || ELFRefKind == ARM64MCExpr::VK_DTPREL_HI12 - || ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12 - || ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12_NC - || ELFRefKind == ARM64MCExpr::VK_TPREL_HI12 - || ELFRefKind == ARM64MCExpr::VK_TPREL_LO12 - || ELFRefKind == ARM64MCExpr::VK_TPREL_LO12_NC - || ELFRefKind == ARM64MCExpr::VK_TLSDESC_LO12; + || ELFRefKind == AArch64MCExpr::VK_LO12 + || ELFRefKind == AArch64MCExpr::VK_DTPREL_HI12 + || ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12 + || ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12_NC + || ELFRefKind == AArch64MCExpr::VK_TPREL_HI12 + || ELFRefKind == AArch64MCExpr::VK_TPREL_LO12 + || ELFRefKind == AArch64MCExpr::VK_TPREL_LO12_NC + || ELFRefKind == AArch64MCExpr::VK_TLSDESC_LO12; } // Otherwise it should be a real immediate in range: @@ -663,7 +663,7 @@ class ARM64Operand : public MCParsedAsmOperand { const MCConstantExpr *MCE = dyn_cast(getImm()); if (!MCE) return false; - return ARM64_AM::isAdvSIMDModImmType10(MCE->getValue()); + return AArch64_AM::isAdvSIMDModImmType10(MCE->getValue()); } bool isBranchTarget26() const { if (!isImm()) @@ -699,15 +699,16 @@ class ARM64Operand : public MCParsedAsmOperand { return (Val >= -(0x2000 << 2) && Val <= (0x1fff << 2)); } - bool isMovWSymbol(ArrayRef AllowedModifiers) const { + bool + isMovWSymbol(ArrayRef AllowedModifiers) const { if (!isImm()) return false; - ARM64MCExpr::VariantKind ELFRefKind; + AArch64MCExpr::VariantKind ELFRefKind; MCSymbolRefExpr::VariantKind DarwinRefKind; int64_t Addend; - if (!ARM64AsmParser::classifySymbolRef(getImm(), ELFRefKind, DarwinRefKind, - Addend)) { + if (!AArch64AsmParser::classifySymbolRef(getImm(), ELFRefKind, + DarwinRefKind, Addend)) { return false; } if (DarwinRefKind != MCSymbolRefExpr::VK_None) @@ -722,57 +723,56 @@ class ARM64Operand : public MCParsedAsmOperand { } bool isMovZSymbolG3() const { - static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G3 }; + static AArch64MCExpr::VariantKind Variants[] = { AArch64MCExpr::VK_ABS_G3 }; return isMovWSymbol(Variants); } bool isMovZSymbolG2() const { - static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G2, - ARM64MCExpr::VK_ABS_G2_S, - ARM64MCExpr::VK_TPREL_G2, - ARM64MCExpr::VK_DTPREL_G2 }; + static AArch64MCExpr::VariantKind Variants[] = { + AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S, + AArch64MCExpr::VK_TPREL_G2, AArch64MCExpr::VK_DTPREL_G2}; return isMovWSymbol(Variants); } bool isMovZSymbolG1() const { - static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G1, - ARM64MCExpr::VK_ABS_G1_S, - ARM64MCExpr::VK_GOTTPREL_G1, - ARM64MCExpr::VK_TPREL_G1, - ARM64MCExpr::VK_DTPREL_G1, }; + static AArch64MCExpr::VariantKind Variants[] = { + AArch64MCExpr::VK_ABS_G1, AArch64MCExpr::VK_ABS_G1_S, + AArch64MCExpr::VK_GOTTPREL_G1, AArch64MCExpr::VK_TPREL_G1, + AArch64MCExpr::VK_DTPREL_G1, + }; return isMovWSymbol(Variants); } bool isMovZSymbolG0() const { - static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G0, - ARM64MCExpr::VK_ABS_G0_S, - ARM64MCExpr::VK_TPREL_G0, - ARM64MCExpr::VK_DTPREL_G0 }; + static AArch64MCExpr::VariantKind Variants[] = { + AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S, + AArch64MCExpr::VK_TPREL_G0, AArch64MCExpr::VK_DTPREL_G0}; return isMovWSymbol(Variants); } bool isMovKSymbolG3() const { - static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G3 }; + static AArch64MCExpr::VariantKind Variants[] = { AArch64MCExpr::VK_ABS_G3 }; return isMovWSymbol(Variants); } bool isMovKSymbolG2() const { - static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G2_NC }; + static AArch64MCExpr::VariantKind Variants[] = { + AArch64MCExpr::VK_ABS_G2_NC}; return isMovWSymbol(Variants); } bool isMovKSymbolG1() const { - static ARM64MCExpr::VariantKind Variants[] = { - ARM64MCExpr::VK_ABS_G1_NC, ARM64MCExpr::VK_TPREL_G1_NC, - ARM64MCExpr::VK_DTPREL_G1_NC + static AArch64MCExpr::VariantKind Variants[] = { + AArch64MCExpr::VK_ABS_G1_NC, AArch64MCExpr::VK_TPREL_G1_NC, + AArch64MCExpr::VK_DTPREL_G1_NC }; return isMovWSymbol(Variants); } bool isMovKSymbolG0() const { - static ARM64MCExpr::VariantKind Variants[] = { - ARM64MCExpr::VK_ABS_G0_NC, ARM64MCExpr::VK_GOTTPREL_G0_NC, - ARM64MCExpr::VK_TPREL_G0_NC, ARM64MCExpr::VK_DTPREL_G0_NC + static AArch64MCExpr::VariantKind Variants[] = { + AArch64MCExpr::VK_ABS_G0_NC, AArch64MCExpr::VK_GOTTPREL_G0_NC, + AArch64MCExpr::VK_TPREL_G0_NC, AArch64MCExpr::VK_DTPREL_G0_NC }; return isMovWSymbol(Variants); } @@ -822,7 +822,7 @@ class ARM64Operand : public MCParsedAsmOperand { if (!isSysReg()) return false; bool IsKnownRegister; - auto Mapper = ARM64SysReg::MRSMapper(getSysRegFeatureBits()); + auto Mapper = AArch64SysReg::MRSMapper(getSysRegFeatureBits()); Mapper.fromString(getSysReg(), IsKnownRegister); return IsKnownRegister; @@ -831,7 +831,7 @@ class ARM64Operand : public MCParsedAsmOperand { if (!isSysReg()) return false; bool IsKnownRegister; - auto Mapper = ARM64SysReg::MSRMapper(getSysRegFeatureBits()); + auto Mapper = AArch64SysReg::MSRMapper(getSysRegFeatureBits()); Mapper.fromString(getSysReg(), IsKnownRegister); return IsKnownRegister; @@ -840,7 +840,7 @@ class ARM64Operand : public MCParsedAsmOperand { if (!isSysReg()) return false; bool IsKnownRegister; - ARM64PState::PStateMapper().fromString(getSysReg(), IsKnownRegister); + AArch64PState::PStateMapper().fromString(getSysReg(), IsKnownRegister); return IsKnownRegister; } @@ -848,16 +848,17 @@ class ARM64Operand : public MCParsedAsmOperand { bool isVectorReg() const { return Kind == k_Register && Reg.isVector; } bool isVectorRegLo() const { return Kind == k_Register && Reg.isVector && - ARM64MCRegisterClasses[ARM64::FPR128_loRegClassID].contains(Reg.RegNum); + AArch64MCRegisterClasses[AArch64::FPR128_loRegClassID].contains( + Reg.RegNum); } bool isGPR32as64() const { return Kind == k_Register && !Reg.isVector && - ARM64MCRegisterClasses[ARM64::GPR64RegClassID].contains(Reg.RegNum); + AArch64MCRegisterClasses[AArch64::GPR64RegClassID].contains(Reg.RegNum); } bool isGPR64sp0() const { return Kind == k_Register && !Reg.isVector && - ARM64MCRegisterClasses[ARM64::GPR64spRegClassID].contains(Reg.RegNum); + AArch64MCRegisterClasses[AArch64::GPR64spRegClassID].contains(Reg.RegNum); } /// Is this a vector list with the type implicit (presumably attached to the @@ -904,20 +905,21 @@ class ARM64Operand : public MCParsedAsmOperand { if (!isShiftExtend()) return false; - ARM64_AM::ShiftExtendType ST = getShiftExtendType(); - return (ST == ARM64_AM::LSL || ST == ARM64_AM::LSR || ST == ARM64_AM::ASR || - ST == ARM64_AM::ROR || ST == ARM64_AM::MSL); + AArch64_AM::ShiftExtendType ST = getShiftExtendType(); + return (ST == AArch64_AM::LSL || ST == AArch64_AM::LSR || + ST == AArch64_AM::ASR || ST == AArch64_AM::ROR || + ST == AArch64_AM::MSL); } bool isExtend() const { if (!isShiftExtend()) return false; - ARM64_AM::ShiftExtendType ET = getShiftExtendType(); - return (ET == ARM64_AM::UXTB || ET == ARM64_AM::SXTB || - ET == ARM64_AM::UXTH || ET == ARM64_AM::SXTH || - ET == ARM64_AM::UXTW || ET == ARM64_AM::SXTW || - ET == ARM64_AM::UXTX || ET == ARM64_AM::SXTX || - ET == ARM64_AM::LSL) && + AArch64_AM::ShiftExtendType ET = getShiftExtendType(); + return (ET == AArch64_AM::UXTB || ET == AArch64_AM::SXTB || + ET == AArch64_AM::UXTH || ET == AArch64_AM::SXTH || + ET == AArch64_AM::UXTW || ET == AArch64_AM::SXTW || + ET == AArch64_AM::UXTX || ET == AArch64_AM::SXTX || + ET == AArch64_AM::LSL) && getShiftExtendAmount() <= 4; } @@ -925,22 +927,23 @@ class ARM64Operand : public MCParsedAsmOperand { if (!isExtend()) return false; // UXTX and SXTX require a 64-bit source register (the ExtendLSL64 class). - ARM64_AM::ShiftExtendType ET = getShiftExtendType(); - return ET != ARM64_AM::UXTX && ET != ARM64_AM::SXTX; + AArch64_AM::ShiftExtendType ET = getShiftExtendType(); + return ET != AArch64_AM::UXTX && ET != AArch64_AM::SXTX; } bool isExtendLSL64() const { if (!isExtend()) return false; - ARM64_AM::ShiftExtendType ET = getShiftExtendType(); - return (ET == ARM64_AM::UXTX || ET == ARM64_AM::SXTX || ET == ARM64_AM::LSL) && - getShiftExtendAmount() <= 4; + AArch64_AM::ShiftExtendType ET = getShiftExtendType(); + return (ET == AArch64_AM::UXTX || ET == AArch64_AM::SXTX || + ET == AArch64_AM::LSL) && + getShiftExtendAmount() <= 4; } template bool isMemXExtend() const { if (!isExtend()) return false; - ARM64_AM::ShiftExtendType ET = getShiftExtendType(); - return (ET == ARM64_AM::LSL || ET == ARM64_AM::SXTX) && + AArch64_AM::ShiftExtendType ET = getShiftExtendType(); + return (ET == AArch64_AM::LSL || ET == AArch64_AM::SXTX) && (getShiftExtendAmount() == Log2_32(Width / 8) || getShiftExtendAmount() == 0); } @@ -948,8 +951,8 @@ class ARM64Operand : public MCParsedAsmOperand { template bool isMemWExtend() const { if (!isExtend()) return false; - ARM64_AM::ShiftExtendType ET = getShiftExtendType(); - return (ET == ARM64_AM::UXTW || ET == ARM64_AM::SXTW) && + AArch64_AM::ShiftExtendType ET = getShiftExtendType(); + return (ET == AArch64_AM::UXTW || ET == AArch64_AM::SXTW) && (getShiftExtendAmount() == Log2_32(Width / 8) || getShiftExtendAmount() == 0); } @@ -960,9 +963,9 @@ class ARM64Operand : public MCParsedAsmOperand { return false; // An arithmetic shifter is LSL, LSR, or ASR. - ARM64_AM::ShiftExtendType ST = getShiftExtendType(); - return (ST == ARM64_AM::LSL || ST == ARM64_AM::LSR || - ST == ARM64_AM::ASR) && getShiftExtendAmount() < width; + AArch64_AM::ShiftExtendType ST = getShiftExtendType(); + return (ST == AArch64_AM::LSL || ST == AArch64_AM::LSR || + ST == AArch64_AM::ASR) && getShiftExtendAmount() < width; } template @@ -971,9 +974,9 @@ class ARM64Operand : public MCParsedAsmOperand { return false; // A logical shifter is LSL, LSR, ASR or ROR. - ARM64_AM::ShiftExtendType ST = getShiftExtendType(); - return (ST == ARM64_AM::LSL || ST == ARM64_AM::LSR || ST == ARM64_AM::ASR || - ST == ARM64_AM::ROR) && + AArch64_AM::ShiftExtendType ST = getShiftExtendType(); + return (ST == AArch64_AM::LSL || ST == AArch64_AM::LSR || + ST == AArch64_AM::ASR || ST == AArch64_AM::ROR) && getShiftExtendAmount() < width; } @@ -982,8 +985,8 @@ class ARM64Operand : public MCParsedAsmOperand { return false; // A MOVi shifter is LSL of 0, 16, 32, or 48. - ARM64_AM::ShiftExtendType ST = getShiftExtendType(); - if (ST != ARM64_AM::LSL) + AArch64_AM::ShiftExtendType ST = getShiftExtendType(); + if (ST != AArch64_AM::LSL) return false; uint64_t Val = getShiftExtendAmount(); return (Val == 0 || Val == 16); @@ -994,8 +997,8 @@ class ARM64Operand : public MCParsedAsmOperand { return false; // A MOVi shifter is LSL of 0 or 16. - ARM64_AM::ShiftExtendType ST = getShiftExtendType(); - if (ST != ARM64_AM::LSL) + AArch64_AM::ShiftExtendType ST = getShiftExtendType(); + if (ST != AArch64_AM::LSL) return false; uint64_t Val = getShiftExtendAmount(); return (Val == 0 || Val == 16 || Val == 32 || Val == 48); @@ -1007,7 +1010,7 @@ class ARM64Operand : public MCParsedAsmOperand { // A logical vector shifter is a left shift by 0, 8, 16, or 24. unsigned Shift = getShiftExtendAmount(); - return getShiftExtendType() == ARM64_AM::LSL && + return getShiftExtendType() == AArch64_AM::LSL && (Shift == 0 || Shift == 8 || Shift == 16 || Shift == 24); } @@ -1017,7 +1020,8 @@ class ARM64Operand : public MCParsedAsmOperand { // A logical vector shifter is a left shift by 0 or 8. unsigned Shift = getShiftExtendAmount(); - return getShiftExtendType() == ARM64_AM::LSL && (Shift == 0 || Shift == 8); + return getShiftExtendType() == AArch64_AM::LSL && + (Shift == 0 || Shift == 8); } bool isMoveVecShifter() const { @@ -1026,7 +1030,8 @@ class ARM64Operand : public MCParsedAsmOperand { // A logical vector shifter is a left shift by 8 or 16. unsigned Shift = getShiftExtendAmount(); - return getShiftExtendType() == ARM64_AM::MSL && (Shift == 8 || Shift == 16); + return getShiftExtendType() == AArch64_AM::MSL && + (Shift == 8 || Shift == 16); } // Fallback unscaled operands are for aliases of LDR/STR that fall back @@ -1088,10 +1093,11 @@ class ARM64Operand : public MCParsedAsmOperand { void addGPR32as64Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - assert(ARM64MCRegisterClasses[ARM64::GPR64RegClassID].contains(getReg())); + assert( + AArch64MCRegisterClasses[AArch64::GPR64RegClassID].contains(getReg())); const MCRegisterInfo *RI = Ctx.getRegisterInfo(); - uint32_t Reg = RI->getRegClass(ARM64::GPR32RegClassID).getRegister( + uint32_t Reg = RI->getRegClass(AArch64::GPR32RegClassID).getRegister( RI->getEncodingValue(getReg())); Inst.addOperand(MCOperand::CreateReg(Reg)); @@ -1099,13 +1105,15 @@ class ARM64Operand : public MCParsedAsmOperand { void addVectorReg64Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - assert(ARM64MCRegisterClasses[ARM64::FPR128RegClassID].contains(getReg())); - Inst.addOperand(MCOperand::CreateReg(ARM64::D0 + getReg() - ARM64::Q0)); + assert( + AArch64MCRegisterClasses[AArch64::FPR128RegClassID].contains(getReg())); + Inst.addOperand(MCOperand::CreateReg(AArch64::D0 + getReg() - AArch64::Q0)); } void addVectorReg128Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - assert(ARM64MCRegisterClasses[ARM64::FPR128RegClassID].contains(getReg())); + assert( + AArch64MCRegisterClasses[AArch64::FPR128RegClassID].contains(getReg())); Inst.addOperand(MCOperand::CreateReg(getReg())); } @@ -1117,23 +1125,23 @@ class ARM64Operand : public MCParsedAsmOperand { template void addVectorList64Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - static unsigned FirstRegs[] = { ARM64::D0, ARM64::D0_D1, - ARM64::D0_D1_D2, ARM64::D0_D1_D2_D3 }; + static unsigned FirstRegs[] = { AArch64::D0, AArch64::D0_D1, + AArch64::D0_D1_D2, AArch64::D0_D1_D2_D3 }; unsigned FirstReg = FirstRegs[NumRegs - 1]; Inst.addOperand( - MCOperand::CreateReg(FirstReg + getVectorListStart() - ARM64::Q0)); + MCOperand::CreateReg(FirstReg + getVectorListStart() - AArch64::Q0)); } template void addVectorList128Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - static unsigned FirstRegs[] = { ARM64::Q0, ARM64::Q0_Q1, - ARM64::Q0_Q1_Q2, ARM64::Q0_Q1_Q2_Q3 }; + static unsigned FirstRegs[] = { AArch64::Q0, AArch64::Q0_Q1, + AArch64::Q0_Q1_Q2, AArch64::Q0_Q1_Q2_Q3 }; unsigned FirstReg = FirstRegs[NumRegs - 1]; Inst.addOperand( - MCOperand::CreateReg(FirstReg + getVectorListStart() - ARM64::Q0)); + MCOperand::CreateReg(FirstReg + getVectorListStart() - AArch64::Q0)); } void addVectorIndex1Operands(MCInst &Inst, unsigned N) const { @@ -1340,7 +1348,7 @@ class ARM64Operand : public MCParsedAsmOperand { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = dyn_cast(getImm()); assert(MCE && "Invalid logical immediate operand!"); - uint64_t encoding = ARM64_AM::encodeLogicalImmediate(MCE->getValue(), 32); + uint64_t encoding = AArch64_AM::encodeLogicalImmediate(MCE->getValue(), 32); Inst.addOperand(MCOperand::CreateImm(encoding)); } @@ -1348,7 +1356,7 @@ class ARM64Operand : public MCParsedAsmOperand { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = dyn_cast(getImm()); assert(MCE && "Invalid logical immediate operand!"); - uint64_t encoding = ARM64_AM::encodeLogicalImmediate(MCE->getValue(), 64); + uint64_t encoding = AArch64_AM::encodeLogicalImmediate(MCE->getValue(), 64); Inst.addOperand(MCOperand::CreateImm(encoding)); } @@ -1356,7 +1364,7 @@ class ARM64Operand : public MCParsedAsmOperand { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = dyn_cast(getImm()); assert(MCE && "Invalid immediate operand!"); - uint64_t encoding = ARM64_AM::encodeAdvSIMDModImmType10(MCE->getValue()); + uint64_t encoding = AArch64_AM::encodeAdvSIMDModImmType10(MCE->getValue()); Inst.addOperand(MCOperand::CreateImm(encoding)); } @@ -1416,7 +1424,7 @@ class ARM64Operand : public MCParsedAsmOperand { assert(N == 1 && "Invalid number of operands!"); bool Valid; - auto Mapper = ARM64SysReg::MRSMapper(getSysRegFeatureBits()); + auto Mapper = AArch64SysReg::MRSMapper(getSysRegFeatureBits()); uint32_t Bits = Mapper.fromString(getSysReg(), Valid); Inst.addOperand(MCOperand::CreateImm(Bits)); @@ -1426,7 +1434,7 @@ class ARM64Operand : public MCParsedAsmOperand { assert(N == 1 && "Invalid number of operands!"); bool Valid; - auto Mapper = ARM64SysReg::MSRMapper(getSysRegFeatureBits()); + auto Mapper = AArch64SysReg::MSRMapper(getSysRegFeatureBits()); uint32_t Bits = Mapper.fromString(getSysReg(), Valid); Inst.addOperand(MCOperand::CreateImm(Bits)); @@ -1436,7 +1444,8 @@ class ARM64Operand : public MCParsedAsmOperand { assert(N == 1 && "Invalid number of operands!"); bool Valid; - uint32_t Bits = ARM64PState::PStateMapper().fromString(getSysReg(), Valid); + uint32_t Bits = + AArch64PState::PStateMapper().fromString(getSysReg(), Valid); Inst.addOperand(MCOperand::CreateImm(Bits)); } @@ -1454,30 +1463,30 @@ class ARM64Operand : public MCParsedAsmOperand { void addShifterOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); unsigned Imm = - ARM64_AM::getShifterImm(getShiftExtendType(), getShiftExtendAmount()); + AArch64_AM::getShifterImm(getShiftExtendType(), getShiftExtendAmount()); Inst.addOperand(MCOperand::CreateImm(Imm)); } void addExtendOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - ARM64_AM::ShiftExtendType ET = getShiftExtendType(); - if (ET == ARM64_AM::LSL) ET = ARM64_AM::UXTW; - unsigned Imm = ARM64_AM::getArithExtendImm(ET, getShiftExtendAmount()); + AArch64_AM::ShiftExtendType ET = getShiftExtendType(); + if (ET == AArch64_AM::LSL) ET = AArch64_AM::UXTW; + unsigned Imm = AArch64_AM::getArithExtendImm(ET, getShiftExtendAmount()); Inst.addOperand(MCOperand::CreateImm(Imm)); } void addExtend64Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - ARM64_AM::ShiftExtendType ET = getShiftExtendType(); - if (ET == ARM64_AM::LSL) ET = ARM64_AM::UXTX; - unsigned Imm = ARM64_AM::getArithExtendImm(ET, getShiftExtendAmount()); + AArch64_AM::ShiftExtendType ET = getShiftExtendType(); + if (ET == AArch64_AM::LSL) ET = AArch64_AM::UXTX; + unsigned Imm = AArch64_AM::getArithExtendImm(ET, getShiftExtendAmount()); Inst.addOperand(MCOperand::CreateImm(Imm)); } void addMemExtendOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); - ARM64_AM::ShiftExtendType ET = getShiftExtendType(); - bool IsSigned = ET == ARM64_AM::SXTW || ET == ARM64_AM::SXTX; + AArch64_AM::ShiftExtendType ET = getShiftExtendType(); + bool IsSigned = ET == AArch64_AM::SXTW || ET == AArch64_AM::SXTX; Inst.addOperand(MCOperand::CreateImm(IsSigned)); Inst.addOperand(MCOperand::CreateImm(getShiftExtendAmount() != 0)); } @@ -1488,8 +1497,8 @@ class ARM64Operand : public MCParsedAsmOperand { // than its size. void addMemExtend8Operands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); - ARM64_AM::ShiftExtendType ET = getShiftExtendType(); - bool IsSigned = ET == ARM64_AM::SXTW || ET == ARM64_AM::SXTX; + AArch64_AM::ShiftExtendType ET = getShiftExtendType(); + bool IsSigned = ET == AArch64_AM::SXTW || ET == AArch64_AM::SXTX; Inst.addOperand(MCOperand::CreateImm(IsSigned)); Inst.addOperand(MCOperand::CreateImm(hasShiftExtendAmount())); } @@ -1514,9 +1523,9 @@ class ARM64Operand : public MCParsedAsmOperand { void print(raw_ostream &OS) const override; - static ARM64Operand *CreateToken(StringRef Str, bool IsSuffix, SMLoc S, + static AArch64Operand *CreateToken(StringRef Str, bool IsSuffix, SMLoc S, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_Token, Ctx); + AArch64Operand *Op = new AArch64Operand(k_Token, Ctx); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->Tok.IsSuffix = IsSuffix; @@ -1525,9 +1534,9 @@ class ARM64Operand : public MCParsedAsmOperand { return Op; } - static ARM64Operand *CreateReg(unsigned RegNum, bool isVector, SMLoc S, + static AArch64Operand *CreateReg(unsigned RegNum, bool isVector, SMLoc S, SMLoc E, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_Register, Ctx); + AArch64Operand *Op = new AArch64Operand(k_Register, Ctx); Op->Reg.RegNum = RegNum; Op->Reg.isVector = isVector; Op->StartLoc = S; @@ -1535,10 +1544,10 @@ class ARM64Operand : public MCParsedAsmOperand { return Op; } - static ARM64Operand *CreateVectorList(unsigned RegNum, unsigned Count, + static AArch64Operand *CreateVectorList(unsigned RegNum, unsigned Count, unsigned NumElements, char ElementKind, SMLoc S, SMLoc E, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_VectorList, Ctx); + AArch64Operand *Op = new AArch64Operand(k_VectorList, Ctx); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; Op->VectorList.NumElements = NumElements; @@ -1548,27 +1557,28 @@ class ARM64Operand : public MCParsedAsmOperand { return Op; } - static ARM64Operand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, + static AArch64Operand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_VectorIndex, Ctx); + AArch64Operand *Op = new AArch64Operand(k_VectorIndex, Ctx); Op->VectorIndex.Val = Idx; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static ARM64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, + static AArch64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_Immediate, Ctx); + AArch64Operand *Op = new AArch64Operand(k_Immediate, Ctx); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static ARM64Operand *CreateShiftedImm(const MCExpr *Val, unsigned ShiftAmount, - SMLoc S, SMLoc E, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_ShiftedImm, Ctx); + static AArch64Operand *CreateShiftedImm(const MCExpr *Val, + unsigned ShiftAmount, SMLoc S, + SMLoc E, MCContext &Ctx) { + AArch64Operand *Op = new AArch64Operand(k_ShiftedImm, Ctx); Op->ShiftedImm .Val = Val; Op->ShiftedImm.ShiftAmount = ShiftAmount; Op->StartLoc = S; @@ -1576,34 +1586,34 @@ class ARM64Operand : public MCParsedAsmOperand { return Op; } - static ARM64Operand *CreateCondCode(ARM64CC::CondCode Code, SMLoc S, SMLoc E, - MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_CondCode, Ctx); + static AArch64Operand *CreateCondCode(AArch64CC::CondCode Code, SMLoc S, + SMLoc E, MCContext &Ctx) { + AArch64Operand *Op = new AArch64Operand(k_CondCode, Ctx); Op->CondCode.Code = Code; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static ARM64Operand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_FPImm, Ctx); + static AArch64Operand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) { + AArch64Operand *Op = new AArch64Operand(k_FPImm, Ctx); Op->FPImm.Val = Val; Op->StartLoc = S; Op->EndLoc = S; return Op; } - static ARM64Operand *CreateBarrier(unsigned Val, SMLoc S, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_Barrier, Ctx); + static AArch64Operand *CreateBarrier(unsigned Val, SMLoc S, MCContext &Ctx) { + AArch64Operand *Op = new AArch64Operand(k_Barrier, Ctx); Op->Barrier.Val = Val; Op->StartLoc = S; Op->EndLoc = S; return Op; } - static ARM64Operand *CreateSysReg(StringRef Str, SMLoc S, + static AArch64Operand *CreateSysReg(StringRef Str, SMLoc S, uint64_t FeatureBits, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_SysReg, Ctx); + AArch64Operand *Op = new AArch64Operand(k_SysReg, Ctx); Op->SysReg.Data = Str.data(); Op->SysReg.Length = Str.size(); Op->SysReg.FeatureBits = FeatureBits; @@ -1612,27 +1622,27 @@ class ARM64Operand : public MCParsedAsmOperand { return Op; } - static ARM64Operand *CreateSysCR(unsigned Val, SMLoc S, SMLoc E, + static AArch64Operand *CreateSysCR(unsigned Val, SMLoc S, SMLoc E, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_SysCR, Ctx); + AArch64Operand *Op = new AArch64Operand(k_SysCR, Ctx); Op->SysCRImm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static ARM64Operand *CreatePrefetch(unsigned Val, SMLoc S, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_Prefetch, Ctx); + static AArch64Operand *CreatePrefetch(unsigned Val, SMLoc S, MCContext &Ctx) { + AArch64Operand *Op = new AArch64Operand(k_Prefetch, Ctx); Op->Prefetch.Val = Val; Op->StartLoc = S; Op->EndLoc = S; return Op; } - static ARM64Operand *CreateShiftExtend(ARM64_AM::ShiftExtendType ShOp, + static AArch64Operand *CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp, unsigned Val, bool HasExplicitAmount, SMLoc S, SMLoc E, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_ShiftExtend, Ctx); + AArch64Operand *Op = new AArch64Operand(k_ShiftExtend, Ctx); Op->ShiftExtend.Type = ShOp; Op->ShiftExtend.Amount = Val; Op->ShiftExtend.HasExplicitAmount = HasExplicitAmount; @@ -1644,15 +1654,15 @@ class ARM64Operand : public MCParsedAsmOperand { } // end anonymous namespace. -void ARM64Operand::print(raw_ostream &OS) const { +void AArch64Operand::print(raw_ostream &OS) const { switch (Kind) { case k_FPImm: - OS << ""; + OS << ""; break; case k_Barrier: { bool Valid; - StringRef Name = ARM64DB::DBarrierMapper().toString(getBarrier(), Valid); + StringRef Name = AArch64DB::DBarrierMapper().toString(getBarrier(), Valid); if (Valid) OS << ""; else @@ -1666,7 +1676,7 @@ void ARM64Operand::print(raw_ostream &OS) const { unsigned Shift = getShiftedImmShift(); OS << "print(OS); - OS << ", lsl #" << ARM64_AM::getShiftValue(Shift) << ">"; + OS << ", lsl #" << AArch64_AM::getShiftValue(Shift) << ">"; break; } case k_CondCode: @@ -1697,7 +1707,7 @@ void ARM64Operand::print(raw_ostream &OS) const { break; case k_Prefetch: { bool Valid; - StringRef Name = ARM64PRFM::PRFMMapper().toString(getPrefetch(), Valid); + StringRef Name = AArch64PRFM::PRFMMapper().toString(getPrefetch(), Valid); if (Valid) OS << ""; else @@ -1705,7 +1715,7 @@ void ARM64Operand::print(raw_ostream &OS) const { break; } case k_ShiftExtend: { - OS << "<" << ARM64_AM::getShiftExtendName(getShiftExtendType()) << " #" + OS << "<" << AArch64_AM::getShiftExtendName(getShiftExtendType()) << " #" << getShiftExtendAmount(); if (!hasShiftExtendAmount()) OS << ""; @@ -1724,38 +1734,38 @@ static unsigned MatchRegisterName(StringRef Name); static unsigned matchVectorRegName(StringRef Name) { return StringSwitch(Name) - .Case("v0", ARM64::Q0) - .Case("v1", ARM64::Q1) - .Case("v2", ARM64::Q2) - .Case("v3", ARM64::Q3) - .Case("v4", ARM64::Q4) - .Case("v5", ARM64::Q5) - .Case("v6", ARM64::Q6) - .Case("v7", ARM64::Q7) - .Case("v8", ARM64::Q8) - .Case("v9", ARM64::Q9) - .Case("v10", ARM64::Q10) - .Case("v11", ARM64::Q11) - .Case("v12", ARM64::Q12) - .Case("v13", ARM64::Q13) - .Case("v14", ARM64::Q14) - .Case("v15", ARM64::Q15) - .Case("v16", ARM64::Q16) - .Case("v17", ARM64::Q17) - .Case("v18", ARM64::Q18) - .Case("v19", ARM64::Q19) - .Case("v20", ARM64::Q20) - .Case("v21", ARM64::Q21) - .Case("v22", ARM64::Q22) - .Case("v23", ARM64::Q23) - .Case("v24", ARM64::Q24) - .Case("v25", ARM64::Q25) - .Case("v26", ARM64::Q26) - .Case("v27", ARM64::Q27) - .Case("v28", ARM64::Q28) - .Case("v29", ARM64::Q29) - .Case("v30", ARM64::Q30) - .Case("v31", ARM64::Q31) + .Case("v0", AArch64::Q0) + .Case("v1", AArch64::Q1) + .Case("v2", AArch64::Q2) + .Case("v3", AArch64::Q3) + .Case("v4", AArch64::Q4) + .Case("v5", AArch64::Q5) + .Case("v6", AArch64::Q6) + .Case("v7", AArch64::Q7) + .Case("v8", AArch64::Q8) + .Case("v9", AArch64::Q9) + .Case("v10", AArch64::Q10) + .Case("v11", AArch64::Q11) + .Case("v12", AArch64::Q12) + .Case("v13", AArch64::Q13) + .Case("v14", AArch64::Q14) + .Case("v15", AArch64::Q15) + .Case("v16", AArch64::Q16) + .Case("v17", AArch64::Q17) + .Case("v18", AArch64::Q18) + .Case("v19", AArch64::Q19) + .Case("v20", AArch64::Q20) + .Case("v21", AArch64::Q21) + .Case("v22", AArch64::Q22) + .Case("v23", AArch64::Q23) + .Case("v24", AArch64::Q24) + .Case("v25", AArch64::Q25) + .Case("v26", AArch64::Q26) + .Case("v27", AArch64::Q27) + .Case("v28", AArch64::Q28) + .Case("v29", AArch64::Q29) + .Case("v30", AArch64::Q30) + .Case("v31", AArch64::Q31) .Default(0); } @@ -1798,8 +1808,8 @@ static void parseValidVectorKind(StringRef Name, unsigned &NumElements, } } -bool ARM64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, - SMLoc &EndLoc) { +bool AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) { StartLoc = getLoc(); RegNo = tryParseRegister(); EndLoc = SMLoc::getFromPointer(getLoc().getPointer() - 1); @@ -1809,7 +1819,7 @@ bool ARM64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, /// tryParseRegister - Try to parse a register name. The token must be an /// Identifier when called, and if it is a register name the token is eaten and /// the register is added to the operand list. -int ARM64AsmParser::tryParseRegister() { +int AArch64AsmParser::tryParseRegister() { const AsmToken &Tok = Parser.getTok(); assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); @@ -1818,10 +1828,10 @@ int ARM64AsmParser::tryParseRegister() { // Also handle a few aliases of registers. if (RegNum == 0) RegNum = StringSwitch(lowerCase) - .Case("fp", ARM64::FP) - .Case("lr", ARM64::LR) - .Case("x31", ARM64::XZR) - .Case("w31", ARM64::WZR) + .Case("fp", AArch64::FP) + .Case("lr", AArch64::LR) + .Case("x31", AArch64::XZR) + .Case("w31", AArch64::WZR) .Default(0); if (RegNum == 0) @@ -1833,7 +1843,7 @@ int ARM64AsmParser::tryParseRegister() { /// tryMatchVectorRegister - Try to parse a vector register name with optional /// kind specifier. If it is a register specifier, eat the token and return it. -int ARM64AsmParser::tryMatchVectorRegister(StringRef &Kind, bool expected) { +int AArch64AsmParser::tryMatchVectorRegister(StringRef &Kind, bool expected) { if (Parser.getTok().isNot(AsmToken::Identifier)) { TokError("vector register expected"); return -1; @@ -1863,8 +1873,8 @@ int ARM64AsmParser::tryMatchVectorRegister(StringRef &Kind, bool expected) { } /// tryParseSysCROperand - Try to parse a system instruction CR operand name. -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseSysCROperand(OperandVector &Operands) { +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::tryParseSysCROperand(OperandVector &Operands) { SMLoc S = getLoc(); if (Parser.getTok().isNot(AsmToken::Identifier)) { @@ -1886,13 +1896,14 @@ ARM64AsmParser::tryParseSysCROperand(OperandVector &Operands) { } Parser.Lex(); // Eat identifier token. - Operands.push_back(ARM64Operand::CreateSysCR(CRNum, S, getLoc(), getContext())); + Operands.push_back( + AArch64Operand::CreateSysCR(CRNum, S, getLoc(), getContext())); return MatchOperand_Success; } /// tryParsePrefetch - Try to parse a prefetch operand. -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParsePrefetch(OperandVector &Operands) { +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) { SMLoc S = getLoc(); const AsmToken &Tok = Parser.getTok(); // Either an identifier for named values or a 5-bit immediate. @@ -1915,7 +1926,7 @@ ARM64AsmParser::tryParsePrefetch(OperandVector &Operands) { return MatchOperand_ParseFail; } - Operands.push_back(ARM64Operand::CreatePrefetch(prfop, S, getContext())); + Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext())); return MatchOperand_Success; } @@ -1925,21 +1936,21 @@ ARM64AsmParser::tryParsePrefetch(OperandVector &Operands) { } bool Valid; - unsigned prfop = ARM64PRFM::PRFMMapper().fromString(Tok.getString(), Valid); + unsigned prfop = AArch64PRFM::PRFMMapper().fromString(Tok.getString(), Valid); if (!Valid) { TokError("pre-fetch hint expected"); return MatchOperand_ParseFail; } Parser.Lex(); // Eat identifier token. - Operands.push_back(ARM64Operand::CreatePrefetch(prfop, S, getContext())); + Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext())); return MatchOperand_Success; } /// tryParseAdrpLabel - Parse and validate a source label for the ADRP /// instruction. -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseAdrpLabel(OperandVector &Operands) { +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) { SMLoc S = getLoc(); const MCExpr *Expr; @@ -1950,15 +1961,16 @@ ARM64AsmParser::tryParseAdrpLabel(OperandVector &Operands) { if (parseSymbolicImmVal(Expr)) return MatchOperand_ParseFail; - ARM64MCExpr::VariantKind ELFRefKind; + AArch64MCExpr::VariantKind ELFRefKind; MCSymbolRefExpr::VariantKind DarwinRefKind; int64_t Addend; if (classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, Addend)) { if (DarwinRefKind == MCSymbolRefExpr::VK_None && - ELFRefKind == ARM64MCExpr::VK_INVALID) { + ELFRefKind == AArch64MCExpr::VK_INVALID) { // No modifier was specified at all; this is the syntax for an ELF basic // ADRP relocation (unfortunately). - Expr = ARM64MCExpr::Create(Expr, ARM64MCExpr::VK_ABS_PAGE, getContext()); + Expr = + AArch64MCExpr::Create(Expr, AArch64MCExpr::VK_ABS_PAGE, getContext()); } else if ((DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGE || DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGE) && Addend != 0) { @@ -1967,9 +1979,9 @@ ARM64AsmParser::tryParseAdrpLabel(OperandVector &Operands) { } else if (DarwinRefKind != MCSymbolRefExpr::VK_PAGE && DarwinRefKind != MCSymbolRefExpr::VK_GOTPAGE && DarwinRefKind != MCSymbolRefExpr::VK_TLVPPAGE && - ELFRefKind != ARM64MCExpr::VK_GOT_PAGE && - ELFRefKind != ARM64MCExpr::VK_GOTTPREL_PAGE && - ELFRefKind != ARM64MCExpr::VK_TLSDESC_PAGE) { + ELFRefKind != AArch64MCExpr::VK_GOT_PAGE && + ELFRefKind != AArch64MCExpr::VK_GOTTPREL_PAGE && + ELFRefKind != AArch64MCExpr::VK_TLSDESC_PAGE) { // The operand must be an @page or @gotpage qualified symbolref. Error(S, "page or gotpage label reference expected"); return MatchOperand_ParseFail; @@ -1980,15 +1992,15 @@ ARM64AsmParser::tryParseAdrpLabel(OperandVector &Operands) { // addend is a raw value here. The linker will adjust it to only reference the // page. SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); - Operands.push_back(ARM64Operand::CreateImm(Expr, S, E, getContext())); + Operands.push_back(AArch64Operand::CreateImm(Expr, S, E, getContext())); return MatchOperand_Success; } /// tryParseAdrLabel - Parse and validate a source label for the ADR /// instruction. -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseAdrLabel(OperandVector &Operands) { +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::tryParseAdrLabel(OperandVector &Operands) { SMLoc S = getLoc(); const MCExpr *Expr; @@ -2000,14 +2012,14 @@ ARM64AsmParser::tryParseAdrLabel(OperandVector &Operands) { return MatchOperand_ParseFail; SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); - Operands.push_back(ARM64Operand::CreateImm(Expr, S, E, getContext())); + Operands.push_back(AArch64Operand::CreateImm(Expr, S, E, getContext())); return MatchOperand_Success; } /// tryParseFPImm - A floating point immediate expression operand. -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseFPImm(OperandVector &Operands) { +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::tryParseFPImm(OperandVector &Operands) { SMLoc S = getLoc(); bool Hash = false; @@ -2028,7 +2040,7 @@ ARM64AsmParser::tryParseFPImm(OperandVector &Operands) { uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); // If we had a '-' in front, toggle the sign bit. IntVal ^= (uint64_t)isNegative << 63; - int Val = ARM64_AM::getFP64Imm(APInt(64, IntVal)); + int Val = AArch64_AM::getFP64Imm(APInt(64, IntVal)); Parser.Lex(); // Eat the token. // Check for out of range values. As an exception, we let Zero through, // as we handle that special case in post-processing before matching in @@ -2037,7 +2049,7 @@ ARM64AsmParser::tryParseFPImm(OperandVector &Operands) { TokError("expected compatible register or floating-point constant"); return MatchOperand_ParseFail; } - Operands.push_back(ARM64Operand::CreateFPImm(Val, S, getContext())); + Operands.push_back(AArch64Operand::CreateFPImm(Val, S, getContext())); return MatchOperand_Success; } if (Tok.is(AsmToken::Integer)) { @@ -2053,10 +2065,10 @@ ARM64AsmParser::tryParseFPImm(OperandVector &Operands) { uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); // If we had a '-' in front, toggle the sign bit. IntVal ^= (uint64_t)isNegative << 63; - Val = ARM64_AM::getFP64Imm(APInt(64, IntVal)); + Val = AArch64_AM::getFP64Imm(APInt(64, IntVal)); } Parser.Lex(); // Eat the token. - Operands.push_back(ARM64Operand::CreateFPImm(Val, S, getContext())); + Operands.push_back(AArch64Operand::CreateFPImm(Val, S, getContext())); return MatchOperand_Success; } @@ -2068,8 +2080,8 @@ ARM64AsmParser::tryParseFPImm(OperandVector &Operands) { } /// tryParseAddSubImm - Parse ADD/SUB shifted immediate operand -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseAddSubImm(OperandVector &Operands) { +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::tryParseAddSubImm(OperandVector &Operands) { SMLoc S = getLoc(); if (Parser.getTok().is(AsmToken::Hash)) @@ -2092,8 +2104,8 @@ ARM64AsmParser::tryParseAddSubImm(OperandVector &Operands) { } } SMLoc E = Parser.getTok().getLoc(); - Operands.push_back(ARM64Operand::CreateShiftedImm(Imm, ShiftAmount, S, E, - getContext())); + Operands.push_back(AArch64Operand::CreateShiftedImm(Imm, ShiftAmount, S, E, + getContext())); return MatchOperand_Success; } @@ -2128,81 +2140,81 @@ ARM64AsmParser::tryParseAddSubImm(OperandVector &Operands) { Parser.Lex(); // Eat the number SMLoc E = Parser.getTok().getLoc(); - Operands.push_back(ARM64Operand::CreateShiftedImm(Imm, ShiftAmount, - S, E, getContext())); + Operands.push_back(AArch64Operand::CreateShiftedImm(Imm, ShiftAmount, + S, E, getContext())); return MatchOperand_Success; } /// parseCondCodeString - Parse a Condition Code string. -ARM64CC::CondCode ARM64AsmParser::parseCondCodeString(StringRef Cond) { - ARM64CC::CondCode CC = StringSwitch(Cond.lower()) - .Case("eq", ARM64CC::EQ) - .Case("ne", ARM64CC::NE) - .Case("cs", ARM64CC::HS) - .Case("hs", ARM64CC::HS) - .Case("cc", ARM64CC::LO) - .Case("lo", ARM64CC::LO) - .Case("mi", ARM64CC::MI) - .Case("pl", ARM64CC::PL) - .Case("vs", ARM64CC::VS) - .Case("vc", ARM64CC::VC) - .Case("hi", ARM64CC::HI) - .Case("ls", ARM64CC::LS) - .Case("ge", ARM64CC::GE) - .Case("lt", ARM64CC::LT) - .Case("gt", ARM64CC::GT) - .Case("le", ARM64CC::LE) - .Case("al", ARM64CC::AL) - .Case("nv", ARM64CC::NV) - .Default(ARM64CC::Invalid); +AArch64CC::CondCode AArch64AsmParser::parseCondCodeString(StringRef Cond) { + AArch64CC::CondCode CC = StringSwitch(Cond.lower()) + .Case("eq", AArch64CC::EQ) + .Case("ne", AArch64CC::NE) + .Case("cs", AArch64CC::HS) + .Case("hs", AArch64CC::HS) + .Case("cc", AArch64CC::LO) + .Case("lo", AArch64CC::LO) + .Case("mi", AArch64CC::MI) + .Case("pl", AArch64CC::PL) + .Case("vs", AArch64CC::VS) + .Case("vc", AArch64CC::VC) + .Case("hi", AArch64CC::HI) + .Case("ls", AArch64CC::LS) + .Case("ge", AArch64CC::GE) + .Case("lt", AArch64CC::LT) + .Case("gt", AArch64CC::GT) + .Case("le", AArch64CC::LE) + .Case("al", AArch64CC::AL) + .Case("nv", AArch64CC::NV) + .Default(AArch64CC::Invalid); return CC; } /// parseCondCode - Parse a Condition Code operand. -bool ARM64AsmParser::parseCondCode(OperandVector &Operands, - bool invertCondCode) { +bool AArch64AsmParser::parseCondCode(OperandVector &Operands, + bool invertCondCode) { SMLoc S = getLoc(); const AsmToken &Tok = Parser.getTok(); assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); StringRef Cond = Tok.getString(); - ARM64CC::CondCode CC = parseCondCodeString(Cond); - if (CC == ARM64CC::Invalid) + AArch64CC::CondCode CC = parseCondCodeString(Cond); + if (CC == AArch64CC::Invalid) return TokError("invalid condition code"); Parser.Lex(); // Eat identifier token. if (invertCondCode) - CC = ARM64CC::getInvertedCondCode(ARM64CC::CondCode(CC)); + CC = AArch64CC::getInvertedCondCode(AArch64CC::CondCode(CC)); Operands.push_back( - ARM64Operand::CreateCondCode(CC, S, getLoc(), getContext())); + AArch64Operand::CreateCondCode(CC, S, getLoc(), getContext())); return false; } /// tryParseOptionalShift - Some operands take an optional shift argument. Parse /// them if present. -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) { +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) { const AsmToken &Tok = Parser.getTok(); std::string LowerID = Tok.getString().lower(); - ARM64_AM::ShiftExtendType ShOp = - StringSwitch(LowerID) - .Case("lsl", ARM64_AM::LSL) - .Case("lsr", ARM64_AM::LSR) - .Case("asr", ARM64_AM::ASR) - .Case("ror", ARM64_AM::ROR) - .Case("msl", ARM64_AM::MSL) - .Case("uxtb", ARM64_AM::UXTB) - .Case("uxth", ARM64_AM::UXTH) - .Case("uxtw", ARM64_AM::UXTW) - .Case("uxtx", ARM64_AM::UXTX) - .Case("sxtb", ARM64_AM::SXTB) - .Case("sxth", ARM64_AM::SXTH) - .Case("sxtw", ARM64_AM::SXTW) - .Case("sxtx", ARM64_AM::SXTX) - .Default(ARM64_AM::InvalidShiftExtend); - - if (ShOp == ARM64_AM::InvalidShiftExtend) + AArch64_AM::ShiftExtendType ShOp = + StringSwitch(LowerID) + .Case("lsl", AArch64_AM::LSL) + .Case("lsr", AArch64_AM::LSR) + .Case("asr", AArch64_AM::ASR) + .Case("ror", AArch64_AM::ROR) + .Case("msl", AArch64_AM::MSL) + .Case("uxtb", AArch64_AM::UXTB) + .Case("uxth", AArch64_AM::UXTH) + .Case("uxtw", AArch64_AM::UXTW) + .Case("uxtx", AArch64_AM::UXTX) + .Case("sxtb", AArch64_AM::SXTB) + .Case("sxth", AArch64_AM::SXTH) + .Case("sxtw", AArch64_AM::SXTW) + .Case("sxtx", AArch64_AM::SXTX) + .Default(AArch64_AM::InvalidShiftExtend); + + if (ShOp == AArch64_AM::InvalidShiftExtend) return MatchOperand_NoMatch; SMLoc S = Tok.getLoc(); @@ -2210,9 +2222,9 @@ ARM64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) { bool Hash = getLexer().is(AsmToken::Hash); if (!Hash && getLexer().isNot(AsmToken::Integer)) { - if (ShOp == ARM64_AM::LSL || ShOp == ARM64_AM::LSR || - ShOp == ARM64_AM::ASR || ShOp == ARM64_AM::ROR || - ShOp == ARM64_AM::MSL) { + if (ShOp == AArch64_AM::LSL || ShOp == AArch64_AM::LSR || + ShOp == AArch64_AM::ASR || ShOp == AArch64_AM::ROR || + ShOp == AArch64_AM::MSL) { // We expect a number here. TokError("expected #imm after shift specifier"); return MatchOperand_ParseFail; @@ -2221,7 +2233,7 @@ ARM64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) { // "extend" type operatoins don't need an immediate, #0 is implicit. SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); Operands.push_back( - ARM64Operand::CreateShiftExtend(ShOp, 0, false, S, E, getContext())); + AArch64Operand::CreateShiftExtend(ShOp, 0, false, S, E, getContext())); return MatchOperand_Success; } @@ -2246,21 +2258,21 @@ ARM64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) { } SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); - Operands.push_back(ARM64Operand::CreateShiftExtend(ShOp, MCE->getValue(), - true, S, E, getContext())); + Operands.push_back(AArch64Operand::CreateShiftExtend( + ShOp, MCE->getValue(), true, S, E, getContext())); return MatchOperand_Success; } /// parseSysAlias - The IC, DC, AT, and TLBI instructions are simple aliases for /// the SYS instruction. Parse them specially so that we create a SYS MCInst. -bool ARM64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc, +bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands) { if (Name.find('.') != StringRef::npos) return TokError("invalid operand"); Mnemonic = Name; Operands.push_back( - ARM64Operand::CreateToken("sys", false, NameLoc, getContext())); + AArch64Operand::CreateToken("sys", false, NameLoc, getContext())); const AsmToken &Tok = Parser.getTok(); StringRef Op = Tok.getString(); @@ -2272,14 +2284,14 @@ bool ARM64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc, do { \ Expr = MCConstantExpr::Create(op1, getContext()); \ Operands.push_back( \ - ARM64Operand::CreateImm(Expr, S, getLoc(), getContext())); \ + AArch64Operand::CreateImm(Expr, S, getLoc(), getContext())); \ Operands.push_back( \ - ARM64Operand::CreateSysCR(Cn, S, getLoc(), getContext())); \ + AArch64Operand::CreateSysCR(Cn, S, getLoc(), getContext())); \ Operands.push_back( \ - ARM64Operand::CreateSysCR(Cm, S, getLoc(), getContext())); \ + AArch64Operand::CreateSysCR(Cm, S, getLoc(), getContext())); \ Expr = MCConstantExpr::Create(op2, getContext()); \ Operands.push_back( \ - ARM64Operand::CreateImm(Expr, S, getLoc(), getContext())); \ + AArch64Operand::CreateImm(Expr, S, getLoc(), getContext())); \ } while (0) if (Mnemonic == "ic") { @@ -2498,8 +2510,8 @@ bool ARM64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc, return false; } -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseBarrierOperand(OperandVector &Operands) { +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) { const AsmToken &Tok = Parser.getTok(); // Can be either a #imm style literal or an option name @@ -2522,7 +2534,7 @@ ARM64AsmParser::tryParseBarrierOperand(OperandVector &Operands) { return MatchOperand_ParseFail; } Operands.push_back( - ARM64Operand::CreateBarrier(MCE->getValue(), ExprLoc, getContext())); + AArch64Operand::CreateBarrier(MCE->getValue(), ExprLoc, getContext())); return MatchOperand_Success; } @@ -2532,32 +2544,33 @@ ARM64AsmParser::tryParseBarrierOperand(OperandVector &Operands) { } bool Valid; - unsigned Opt = ARM64DB::DBarrierMapper().fromString(Tok.getString(), Valid); + unsigned Opt = AArch64DB::DBarrierMapper().fromString(Tok.getString(), Valid); if (!Valid) { TokError("invalid barrier option name"); return MatchOperand_ParseFail; } // The only valid named option for ISB is 'sy' - if (Mnemonic == "isb" && Opt != ARM64DB::SY) { + if (Mnemonic == "isb" && Opt != AArch64DB::SY) { TokError("'sy' or #imm operand expected"); return MatchOperand_ParseFail; } - Operands.push_back(ARM64Operand::CreateBarrier(Opt, getLoc(), getContext())); + Operands.push_back( + AArch64Operand::CreateBarrier(Opt, getLoc(), getContext())); Parser.Lex(); // Consume the option return MatchOperand_Success; } -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseSysReg(OperandVector &Operands) { +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::tryParseSysReg(OperandVector &Operands) { const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) return MatchOperand_NoMatch; - Operands.push_back(ARM64Operand::CreateSysReg(Tok.getString(), getLoc(), + Operands.push_back(AArch64Operand::CreateSysReg(Tok.getString(), getLoc(), STI.getFeatureBits(), getContext())); Parser.Lex(); // Eat identifier @@ -2565,7 +2578,7 @@ ARM64AsmParser::tryParseSysReg(OperandVector &Operands) { } /// tryParseVectorRegister - Parse a vector register operand. -bool ARM64AsmParser::tryParseVectorRegister(OperandVector &Operands) { +bool AArch64AsmParser::tryParseVectorRegister(OperandVector &Operands) { if (Parser.getTok().isNot(AsmToken::Identifier)) return true; @@ -2576,11 +2589,12 @@ bool ARM64AsmParser::tryParseVectorRegister(OperandVector &Operands) { if (Reg == -1) return true; Operands.push_back( - ARM64Operand::CreateReg(Reg, true, S, getLoc(), getContext())); + AArch64Operand::CreateReg(Reg, true, S, getLoc(), getContext())); // If there was an explicit qualifier, that goes on as a literal text // operand. if (!Kind.empty()) - Operands.push_back(ARM64Operand::CreateToken(Kind, false, S, getContext())); + Operands.push_back( + AArch64Operand::CreateToken(Kind, false, S, getContext())); // If there is an index specifier following the register, parse that too. if (Parser.getTok().is(AsmToken::LBrac)) { @@ -2604,15 +2618,15 @@ bool ARM64AsmParser::tryParseVectorRegister(OperandVector &Operands) { Parser.Lex(); // Eat right bracket token. - Operands.push_back(ARM64Operand::CreateVectorIndex(MCE->getValue(), SIdx, E, - getContext())); + Operands.push_back(AArch64Operand::CreateVectorIndex(MCE->getValue(), SIdx, + E, getContext())); } return false; } /// parseRegister - Parse a non-vector register operand. -bool ARM64AsmParser::parseRegister(OperandVector &Operands) { +bool AArch64AsmParser::parseRegister(OperandVector &Operands) { SMLoc S = getLoc(); // Try for a vector register. if (!tryParseVectorRegister(Operands)) @@ -2623,7 +2637,7 @@ bool ARM64AsmParser::parseRegister(OperandVector &Operands) { if (Reg == -1) return true; Operands.push_back( - ARM64Operand::CreateReg(Reg, false, S, getLoc(), getContext())); + AArch64Operand::CreateReg(Reg, false, S, getLoc(), getContext())); // A small number of instructions (FMOVXDhighr, for example) have "[1]" // as a string token in the instruction itself. @@ -2640,11 +2654,11 @@ bool ARM64AsmParser::parseRegister(OperandVector &Operands) { SMLoc RBracS = getLoc(); Parser.Lex(); Operands.push_back( - ARM64Operand::CreateToken("[", false, LBracS, getContext())); + AArch64Operand::CreateToken("[", false, LBracS, getContext())); Operands.push_back( - ARM64Operand::CreateToken("1", false, IntS, getContext())); + AArch64Operand::CreateToken("1", false, IntS, getContext())); Operands.push_back( - ARM64Operand::CreateToken("]", false, RBracS, getContext())); + AArch64Operand::CreateToken("]", false, RBracS, getContext())); return false; } } @@ -2654,9 +2668,9 @@ bool ARM64AsmParser::parseRegister(OperandVector &Operands) { return false; } -bool ARM64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) { +bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) { bool HasELFModifier = false; - ARM64MCExpr::VariantKind RefKind; + AArch64MCExpr::VariantKind RefKind; if (Parser.getTok().is(AsmToken::Colon)) { Parser.Lex(); // Eat ':" @@ -2669,45 +2683,45 @@ bool ARM64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) { } std::string LowerCase = Parser.getTok().getIdentifier().lower(); - RefKind = StringSwitch(LowerCase) - .Case("lo12", ARM64MCExpr::VK_LO12) - .Case("abs_g3", ARM64MCExpr::VK_ABS_G3) - .Case("abs_g2", ARM64MCExpr::VK_ABS_G2) - .Case("abs_g2_s", ARM64MCExpr::VK_ABS_G2_S) - .Case("abs_g2_nc", ARM64MCExpr::VK_ABS_G2_NC) - .Case("abs_g1", ARM64MCExpr::VK_ABS_G1) - .Case("abs_g1_s", ARM64MCExpr::VK_ABS_G1_S) - .Case("abs_g1_nc", ARM64MCExpr::VK_ABS_G1_NC) - .Case("abs_g0", ARM64MCExpr::VK_ABS_G0) - .Case("abs_g0_s", ARM64MCExpr::VK_ABS_G0_S) - .Case("abs_g0_nc", ARM64MCExpr::VK_ABS_G0_NC) - .Case("dtprel_g2", ARM64MCExpr::VK_DTPREL_G2) - .Case("dtprel_g1", ARM64MCExpr::VK_DTPREL_G1) - .Case("dtprel_g1_nc", ARM64MCExpr::VK_DTPREL_G1_NC) - .Case("dtprel_g0", ARM64MCExpr::VK_DTPREL_G0) - .Case("dtprel_g0_nc", ARM64MCExpr::VK_DTPREL_G0_NC) - .Case("dtprel_hi12", ARM64MCExpr::VK_DTPREL_HI12) - .Case("dtprel_lo12", ARM64MCExpr::VK_DTPREL_LO12) - .Case("dtprel_lo12_nc", ARM64MCExpr::VK_DTPREL_LO12_NC) - .Case("tprel_g2", ARM64MCExpr::VK_TPREL_G2) - .Case("tprel_g1", ARM64MCExpr::VK_TPREL_G1) - .Case("tprel_g1_nc", ARM64MCExpr::VK_TPREL_G1_NC) - .Case("tprel_g0", ARM64MCExpr::VK_TPREL_G0) - .Case("tprel_g0_nc", ARM64MCExpr::VK_TPREL_G0_NC) - .Case("tprel_hi12", ARM64MCExpr::VK_TPREL_HI12) - .Case("tprel_lo12", ARM64MCExpr::VK_TPREL_LO12) - .Case("tprel_lo12_nc", ARM64MCExpr::VK_TPREL_LO12_NC) - .Case("tlsdesc_lo12", ARM64MCExpr::VK_TLSDESC_LO12) - .Case("got", ARM64MCExpr::VK_GOT_PAGE) - .Case("got_lo12", ARM64MCExpr::VK_GOT_LO12) - .Case("gottprel", ARM64MCExpr::VK_GOTTPREL_PAGE) - .Case("gottprel_lo12", ARM64MCExpr::VK_GOTTPREL_LO12_NC) - .Case("gottprel_g1", ARM64MCExpr::VK_GOTTPREL_G1) - .Case("gottprel_g0_nc", ARM64MCExpr::VK_GOTTPREL_G0_NC) - .Case("tlsdesc", ARM64MCExpr::VK_TLSDESC_PAGE) - .Default(ARM64MCExpr::VK_INVALID); - - if (RefKind == ARM64MCExpr::VK_INVALID) { + RefKind = StringSwitch(LowerCase) + .Case("lo12", AArch64MCExpr::VK_LO12) + .Case("abs_g3", AArch64MCExpr::VK_ABS_G3) + .Case("abs_g2", AArch64MCExpr::VK_ABS_G2) + .Case("abs_g2_s", AArch64MCExpr::VK_ABS_G2_S) + .Case("abs_g2_nc", AArch64MCExpr::VK_ABS_G2_NC) + .Case("abs_g1", AArch64MCExpr::VK_ABS_G1) + .Case("abs_g1_s", AArch64MCExpr::VK_ABS_G1_S) + .Case("abs_g1_nc", AArch64MCExpr::VK_ABS_G1_NC) + .Case("abs_g0", AArch64MCExpr::VK_ABS_G0) + .Case("abs_g0_s", AArch64MCExpr::VK_ABS_G0_S) + .Case("abs_g0_nc", AArch64MCExpr::VK_ABS_G0_NC) + .Case("dtprel_g2", AArch64MCExpr::VK_DTPREL_G2) + .Case("dtprel_g1", AArch64MCExpr::VK_DTPREL_G1) + .Case("dtprel_g1_nc", AArch64MCExpr::VK_DTPREL_G1_NC) + .Case("dtprel_g0", AArch64MCExpr::VK_DTPREL_G0) + .Case("dtprel_g0_nc", AArch64MCExpr::VK_DTPREL_G0_NC) + .Case("dtprel_hi12", AArch64MCExpr::VK_DTPREL_HI12) + .Case("dtprel_lo12", AArch64MCExpr::VK_DTPREL_LO12) + .Case("dtprel_lo12_nc", AArch64MCExpr::VK_DTPREL_LO12_NC) + .Case("tprel_g2", AArch64MCExpr::VK_TPREL_G2) + .Case("tprel_g1", AArch64MCExpr::VK_TPREL_G1) + .Case("tprel_g1_nc", AArch64MCExpr::VK_TPREL_G1_NC) + .Case("tprel_g0", AArch64MCExpr::VK_TPREL_G0) + .Case("tprel_g0_nc", AArch64MCExpr::VK_TPREL_G0_NC) + .Case("tprel_hi12", AArch64MCExpr::VK_TPREL_HI12) + .Case("tprel_lo12", AArch64MCExpr::VK_TPREL_LO12) + .Case("tprel_lo12_nc", AArch64MCExpr::VK_TPREL_LO12_NC) + .Case("tlsdesc_lo12", AArch64MCExpr::VK_TLSDESC_LO12) + .Case("got", AArch64MCExpr::VK_GOT_PAGE) + .Case("got_lo12", AArch64MCExpr::VK_GOT_LO12) + .Case("gottprel", AArch64MCExpr::VK_GOTTPREL_PAGE) + .Case("gottprel_lo12", AArch64MCExpr::VK_GOTTPREL_LO12_NC) + .Case("gottprel_g1", AArch64MCExpr::VK_GOTTPREL_G1) + .Case("gottprel_g0_nc", AArch64MCExpr::VK_GOTTPREL_G0_NC) + .Case("tlsdesc", AArch64MCExpr::VK_TLSDESC_PAGE) + .Default(AArch64MCExpr::VK_INVALID); + + if (RefKind == AArch64MCExpr::VK_INVALID) { Error(Parser.getTok().getLoc(), "expect relocation specifier in operand after ':'"); return true; @@ -2726,13 +2740,13 @@ bool ARM64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) { return true; if (HasELFModifier) - ImmVal = ARM64MCExpr::Create(ImmVal, RefKind, getContext()); + ImmVal = AArch64MCExpr::Create(ImmVal, RefKind, getContext()); return false; } /// parseVectorList - Parse a vector list operand for AdvSIMD instructions. -bool ARM64AsmParser::parseVectorList(OperandVector &Operands) { +bool AArch64AsmParser::parseVectorList(OperandVector &Operands) { assert(Parser.getTok().is(AsmToken::LCurly) && "Token is not a Left Bracket"); SMLoc S = getLoc(); Parser.Lex(); // Eat left bracket token. @@ -2798,7 +2812,7 @@ bool ARM64AsmParser::parseVectorList(OperandVector &Operands) { if (!Kind.empty()) parseValidVectorKind(Kind, NumElements, ElementKind); - Operands.push_back(ARM64Operand::CreateVectorList( + Operands.push_back(AArch64Operand::CreateVectorList( FirstReg, Count, NumElements, ElementKind, S, getLoc(), getContext())); // If there is an index specifier following the list, parse that too. @@ -2823,14 +2837,14 @@ bool ARM64AsmParser::parseVectorList(OperandVector &Operands) { Parser.Lex(); // Eat right bracket token. - Operands.push_back(ARM64Operand::CreateVectorIndex(MCE->getValue(), SIdx, E, - getContext())); + Operands.push_back(AArch64Operand::CreateVectorIndex(MCE->getValue(), SIdx, + E, getContext())); } return false; } -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) { +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) { const AsmToken &Tok = Parser.getTok(); if (!Tok.is(AsmToken::Identifier)) return MatchOperand_NoMatch; @@ -2839,14 +2853,15 @@ ARM64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) { MCContext &Ctx = getContext(); const MCRegisterInfo *RI = Ctx.getRegisterInfo(); - if (!RI->getRegClass(ARM64::GPR64spRegClassID).contains(RegNum)) + if (!RI->getRegClass(AArch64::GPR64spRegClassID).contains(RegNum)) return MatchOperand_NoMatch; SMLoc S = getLoc(); Parser.Lex(); // Eat register if (Parser.getTok().isNot(AsmToken::Comma)) { - Operands.push_back(ARM64Operand::CreateReg(RegNum, false, S, getLoc(), Ctx)); + Operands.push_back( + AArch64Operand::CreateReg(RegNum, false, S, getLoc(), Ctx)); return MatchOperand_Success; } Parser.Lex(); // Eat comma. @@ -2866,13 +2881,14 @@ ARM64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) { return MatchOperand_ParseFail; } - Operands.push_back(ARM64Operand::CreateReg(RegNum, false, S, getLoc(), Ctx)); + Operands.push_back( + AArch64Operand::CreateReg(RegNum, false, S, getLoc(), Ctx)); return MatchOperand_Success; } /// parseOperand - Parse a arm instruction operand. For now this parses the /// operand regardless of the mnemonic. -bool ARM64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode, +bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode, bool invertCondCode) { // Check if the current operand has a custom associated parser, if so, try to // custom parse the operand, or fallback to the general approach. @@ -2895,13 +2911,13 @@ bool ARM64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode, return Error(S, "invalid operand"); SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); - Operands.push_back(ARM64Operand::CreateImm(Expr, S, E, getContext())); + Operands.push_back(AArch64Operand::CreateImm(Expr, S, E, getContext())); return false; } case AsmToken::LBrac: { SMLoc Loc = Parser.getTok().getLoc(); - Operands.push_back(ARM64Operand::CreateToken("[", false, Loc, - getContext())); + Operands.push_back(AArch64Operand::CreateToken("[", false, Loc, + getContext())); Parser.Lex(); // Eat '[' // There's no comma after a '[', so we can parse the next operand @@ -2933,7 +2949,7 @@ bool ARM64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode, return true; E = SMLoc::getFromPointer(getLoc().getPointer() - 1); - Operands.push_back(ARM64Operand::CreateImm(IdVal, S, E, getContext())); + Operands.push_back(AArch64Operand::CreateImm(IdVal, S, E, getContext())); return false; } case AsmToken::Integer: @@ -2970,9 +2986,9 @@ bool ARM64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode, Parser.Lex(); // Eat the token. Operands.push_back( - ARM64Operand::CreateToken("#0", false, S, getContext())); + AArch64Operand::CreateToken("#0", false, S, getContext())); Operands.push_back( - ARM64Operand::CreateToken(".0", false, S, getContext())); + AArch64Operand::CreateToken(".0", false, S, getContext())); return false; } @@ -2981,17 +2997,17 @@ bool ARM64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode, return true; E = SMLoc::getFromPointer(getLoc().getPointer() - 1); - Operands.push_back(ARM64Operand::CreateImm(ImmVal, S, E, getContext())); + Operands.push_back(AArch64Operand::CreateImm(ImmVal, S, E, getContext())); return false; } } } -/// ParseInstruction - Parse an ARM64 instruction mnemonic followed by its +/// ParseInstruction - Parse an AArch64 instruction mnemonic followed by its /// operands. -bool ARM64AsmParser::ParseInstruction(ParseInstructionInfo &Info, - StringRef Name, SMLoc NameLoc, - OperandVector &Operands) { +bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info, + StringRef Name, SMLoc NameLoc, + OperandVector &Operands) { Name = StringSwitch(Name.lower()) .Case("beq", "b.eq") .Case("bne", "b.ne") @@ -3026,7 +3042,7 @@ bool ARM64AsmParser::ParseInstruction(ParseInstructionInfo &Info, } Operands.push_back( - ARM64Operand::CreateToken(Head, false, NameLoc, getContext())); + AArch64Operand::CreateToken(Head, false, NameLoc, getContext())); Mnemonic = Head; // Handle condition codes for a branch mnemonic @@ -3037,13 +3053,13 @@ bool ARM64AsmParser::ParseInstruction(ParseInstructionInfo &Info, SMLoc SuffixLoc = SMLoc::getFromPointer(NameLoc.getPointer() + (Head.data() - Name.data())); - ARM64CC::CondCode CC = parseCondCodeString(Head); - if (CC == ARM64CC::Invalid) + AArch64CC::CondCode CC = parseCondCodeString(Head); + if (CC == AArch64CC::Invalid) return Error(SuffixLoc, "invalid condition code"); Operands.push_back( - ARM64Operand::CreateToken(".", true, SuffixLoc, getContext())); + AArch64Operand::CreateToken(".", true, SuffixLoc, getContext())); Operands.push_back( - ARM64Operand::CreateCondCode(CC, NameLoc, NameLoc, getContext())); + AArch64Operand::CreateCondCode(CC, NameLoc, NameLoc, getContext())); } // Add the remaining tokens in the mnemonic. @@ -3054,7 +3070,7 @@ bool ARM64AsmParser::ParseInstruction(ParseInstructionInfo &Info, SMLoc SuffixLoc = SMLoc::getFromPointer(NameLoc.getPointer() + (Head.data() - Name.data()) + 1); Operands.push_back( - ARM64Operand::CreateToken(Head, true, SuffixLoc, getContext())); + AArch64Operand::CreateToken(Head, true, SuffixLoc, getContext())); } // Conditional compare instructions have a Condition Code operand, which needs @@ -3105,15 +3121,15 @@ bool ARM64AsmParser::ParseInstruction(ParseInstructionInfo &Info, // in the given context! if (Parser.getTok().is(AsmToken::RBrac)) { SMLoc Loc = Parser.getTok().getLoc(); - Operands.push_back(ARM64Operand::CreateToken("]", false, Loc, - getContext())); + Operands.push_back(AArch64Operand::CreateToken("]", false, Loc, + getContext())); Parser.Lex(); } if (Parser.getTok().is(AsmToken::Exclaim)) { SMLoc Loc = Parser.getTok().getLoc(); - Operands.push_back(ARM64Operand::CreateToken("!", false, Loc, - getContext())); + Operands.push_back(AArch64Operand::CreateToken("!", false, Loc, + getContext())); Parser.Lex(); } @@ -3134,18 +3150,18 @@ bool ARM64AsmParser::ParseInstruction(ParseInstructionInfo &Info, // FIXME: This entire function is a giant hack to provide us with decent // operand range validation/diagnostics until TableGen/MC can be extended // to support autogeneration of this kind of validation. -bool ARM64AsmParser::validateInstruction(MCInst &Inst, +bool AArch64AsmParser::validateInstruction(MCInst &Inst, SmallVectorImpl &Loc) { const MCRegisterInfo *RI = getContext().getRegisterInfo(); // Check for indexed addressing modes w/ the base register being the // same as a destination/source register or pair load where // the Rt == Rt2. All of those are undefined behaviour. switch (Inst.getOpcode()) { - case ARM64::LDPSWpre: - case ARM64::LDPWpost: - case ARM64::LDPWpre: - case ARM64::LDPXpost: - case ARM64::LDPXpre: { + case AArch64::LDPSWpre: + case AArch64::LDPWpost: + case AArch64::LDPWpre: + case AArch64::LDPXpost: + case AArch64::LDPXpre: { unsigned Rt = Inst.getOperand(1).getReg(); unsigned Rt2 = Inst.getOperand(2).getReg(); unsigned Rn = Inst.getOperand(3).getReg(); @@ -3157,41 +3173,41 @@ bool ARM64AsmParser::validateInstruction(MCInst &Inst, "is also a destination"); // FALLTHROUGH } - case ARM64::LDPDi: - case ARM64::LDPQi: - case ARM64::LDPSi: - case ARM64::LDPSWi: - case ARM64::LDPWi: - case ARM64::LDPXi: { + case AArch64::LDPDi: + case AArch64::LDPQi: + case AArch64::LDPSi: + case AArch64::LDPSWi: + case AArch64::LDPWi: + case AArch64::LDPXi: { unsigned Rt = Inst.getOperand(0).getReg(); unsigned Rt2 = Inst.getOperand(1).getReg(); if (Rt == Rt2) return Error(Loc[1], "unpredictable LDP instruction, Rt2==Rt"); break; } - case ARM64::LDPDpost: - case ARM64::LDPDpre: - case ARM64::LDPQpost: - case ARM64::LDPQpre: - case ARM64::LDPSpost: - case ARM64::LDPSpre: - case ARM64::LDPSWpost: { + case AArch64::LDPDpost: + case AArch64::LDPDpre: + case AArch64::LDPQpost: + case AArch64::LDPQpre: + case AArch64::LDPSpost: + case AArch64::LDPSpre: + case AArch64::LDPSWpost: { unsigned Rt = Inst.getOperand(1).getReg(); unsigned Rt2 = Inst.getOperand(2).getReg(); if (Rt == Rt2) return Error(Loc[1], "unpredictable LDP instruction, Rt2==Rt"); break; } - case ARM64::STPDpost: - case ARM64::STPDpre: - case ARM64::STPQpost: - case ARM64::STPQpre: - case ARM64::STPSpost: - case ARM64::STPSpre: - case ARM64::STPWpost: - case ARM64::STPWpre: - case ARM64::STPXpost: - case ARM64::STPXpre: { + case AArch64::STPDpost: + case AArch64::STPDpre: + case AArch64::STPQpost: + case AArch64::STPQpre: + case AArch64::STPSpost: + case AArch64::STPSpre: + case AArch64::STPWpost: + case AArch64::STPWpre: + case AArch64::STPXpost: + case AArch64::STPXpre: { unsigned Rt = Inst.getOperand(1).getReg(); unsigned Rt2 = Inst.getOperand(2).getReg(); unsigned Rn = Inst.getOperand(3).getReg(); @@ -3203,28 +3219,28 @@ bool ARM64AsmParser::validateInstruction(MCInst &Inst, "is also a source"); break; } - case ARM64::LDRBBpre: - case ARM64::LDRBpre: - case ARM64::LDRHHpre: - case ARM64::LDRHpre: - case ARM64::LDRSBWpre: - case ARM64::LDRSBXpre: - case ARM64::LDRSHWpre: - case ARM64::LDRSHXpre: - case ARM64::LDRSWpre: - case ARM64::LDRWpre: - case ARM64::LDRXpre: - case ARM64::LDRBBpost: - case ARM64::LDRBpost: - case ARM64::LDRHHpost: - case ARM64::LDRHpost: - case ARM64::LDRSBWpost: - case ARM64::LDRSBXpost: - case ARM64::LDRSHWpost: - case ARM64::LDRSHXpost: - case ARM64::LDRSWpost: - case ARM64::LDRWpost: - case ARM64::LDRXpost: { + case AArch64::LDRBBpre: + case AArch64::LDRBpre: + case AArch64::LDRHHpre: + case AArch64::LDRHpre: + case AArch64::LDRSBWpre: + case AArch64::LDRSBXpre: + case AArch64::LDRSHWpre: + case AArch64::LDRSHXpre: + case AArch64::LDRSWpre: + case AArch64::LDRWpre: + case AArch64::LDRXpre: + case AArch64::LDRBBpost: + case AArch64::LDRBpost: + case AArch64::LDRHHpost: + case AArch64::LDRHpost: + case AArch64::LDRSBWpost: + case AArch64::LDRSBXpost: + case AArch64::LDRSHWpost: + case AArch64::LDRSHXpost: + case AArch64::LDRSWpost: + case AArch64::LDRWpost: + case AArch64::LDRXpost: { unsigned Rt = Inst.getOperand(1).getReg(); unsigned Rn = Inst.getOperand(2).getReg(); if (RI->isSubRegisterEq(Rn, Rt)) @@ -3232,18 +3248,18 @@ bool ARM64AsmParser::validateInstruction(MCInst &Inst, "is also a source"); break; } - case ARM64::STRBBpost: - case ARM64::STRBpost: - case ARM64::STRHHpost: - case ARM64::STRHpost: - case ARM64::STRWpost: - case ARM64::STRXpost: - case ARM64::STRBBpre: - case ARM64::STRBpre: - case ARM64::STRHHpre: - case ARM64::STRHpre: - case ARM64::STRWpre: - case ARM64::STRXpre: { + case AArch64::STRBBpost: + case AArch64::STRBpost: + case AArch64::STRHHpost: + case AArch64::STRHpost: + case AArch64::STRWpost: + case AArch64::STRXpost: + case AArch64::STRBBpre: + case AArch64::STRBpre: + case AArch64::STRHHpre: + case AArch64::STRHpre: + case AArch64::STRWpre: + case AArch64::STRXpre: { unsigned Rt = Inst.getOperand(1).getReg(); unsigned Rn = Inst.getOperand(2).getReg(); if (RI->isSubRegisterEq(Rn, Rt)) @@ -3257,19 +3273,19 @@ bool ARM64AsmParser::validateInstruction(MCInst &Inst, // in the instructions being checked and this keeps the nested conditionals // to a minimum. switch (Inst.getOpcode()) { - case ARM64::ADDSWri: - case ARM64::ADDSXri: - case ARM64::ADDWri: - case ARM64::ADDXri: - case ARM64::SUBSWri: - case ARM64::SUBSXri: - case ARM64::SUBWri: - case ARM64::SUBXri: { + case AArch64::ADDSWri: + case AArch64::ADDSXri: + case AArch64::ADDWri: + case AArch64::ADDXri: + case AArch64::SUBSWri: + case AArch64::SUBSXri: + case AArch64::SUBWri: + case AArch64::SUBXri: { // Annoyingly we can't do this in the isAddSubImm predicate, so there is // some slight duplication here. if (Inst.getOperand(2).isExpr()) { const MCExpr *Expr = Inst.getOperand(2).getExpr(); - ARM64MCExpr::VariantKind ELFRefKind; + AArch64MCExpr::VariantKind ELFRefKind; MCSymbolRefExpr::VariantKind DarwinRefKind; int64_t Addend; if (!classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, Addend)) { @@ -3279,20 +3295,20 @@ bool ARM64AsmParser::validateInstruction(MCInst &Inst, // Only allow these with ADDXri. if ((DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF || DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF) && - Inst.getOpcode() == ARM64::ADDXri) + Inst.getOpcode() == AArch64::ADDXri) return false; // Only allow these with ADDXri/ADDWri - if ((ELFRefKind == ARM64MCExpr::VK_LO12 || - ELFRefKind == ARM64MCExpr::VK_DTPREL_HI12 || - ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12 || - ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12_NC || - ELFRefKind == ARM64MCExpr::VK_TPREL_HI12 || - ELFRefKind == ARM64MCExpr::VK_TPREL_LO12 || - ELFRefKind == ARM64MCExpr::VK_TPREL_LO12_NC || - ELFRefKind == ARM64MCExpr::VK_TLSDESC_LO12) && - (Inst.getOpcode() == ARM64::ADDXri || - Inst.getOpcode() == ARM64::ADDWri)) + if ((ELFRefKind == AArch64MCExpr::VK_LO12 || + ELFRefKind == AArch64MCExpr::VK_DTPREL_HI12 || + ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12 || + ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12_NC || + ELFRefKind == AArch64MCExpr::VK_TPREL_HI12 || + ELFRefKind == AArch64MCExpr::VK_TPREL_LO12 || + ELFRefKind == AArch64MCExpr::VK_TPREL_LO12_NC || + ELFRefKind == AArch64MCExpr::VK_TLSDESC_LO12) && + (Inst.getOpcode() == AArch64::ADDXri || + Inst.getOpcode() == AArch64::ADDWri)) return false; // Don't allow expressions in the immediate field otherwise @@ -3305,7 +3321,7 @@ bool ARM64AsmParser::validateInstruction(MCInst &Inst, } } -bool ARM64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) { +bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) { switch (ErrCode) { case Match_MissingFeature: return Error(Loc, @@ -3434,28 +3450,28 @@ bool ARM64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) { static const char *getSubtargetFeatureName(unsigned Val); -bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - OperandVector &Operands, - MCStreamer &Out, - unsigned &ErrorInfo, - bool MatchingInlineAsm) { +bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, + unsigned &ErrorInfo, + bool MatchingInlineAsm) { assert(!Operands.empty() && "Unexpect empty operand list!"); - ARM64Operand *Op = static_cast(Operands[0]); + AArch64Operand *Op = static_cast(Operands[0]); assert(Op->isToken() && "Leading operand should always be a mnemonic!"); StringRef Tok = Op->getToken(); unsigned NumOperands = Operands.size(); if (NumOperands == 4 && Tok == "lsl") { - ARM64Operand *Op2 = static_cast(Operands[2]); - ARM64Operand *Op3 = static_cast(Operands[3]); + AArch64Operand *Op2 = static_cast(Operands[2]); + AArch64Operand *Op3 = static_cast(Operands[3]); if (Op2->isReg() && Op3->isImm()) { const MCConstantExpr *Op3CE = dyn_cast(Op3->getImm()); if (Op3CE) { uint64_t Op3Val = Op3CE->getValue(); uint64_t NewOp3Val = 0; uint64_t NewOp4Val = 0; - if (ARM64MCRegisterClasses[ARM64::GPR32allRegClassID].contains( + if (AArch64MCRegisterClasses[AArch64::GPR32allRegClassID].contains( Op2->getReg())) { NewOp3Val = (32 - Op3Val) & 0x1f; NewOp4Val = 31 - Op3Val; @@ -3467,11 +3483,11 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, const MCExpr *NewOp3 = MCConstantExpr::Create(NewOp3Val, getContext()); const MCExpr *NewOp4 = MCConstantExpr::Create(NewOp4Val, getContext()); - Operands[0] = ARM64Operand::CreateToken( + Operands[0] = AArch64Operand::CreateToken( "ubfm", false, Op->getStartLoc(), getContext()); - Operands[3] = ARM64Operand::CreateImm(NewOp3, Op3->getStartLoc(), - Op3->getEndLoc(), getContext()); - Operands.push_back(ARM64Operand::CreateImm( + Operands[3] = AArch64Operand::CreateImm(NewOp3, Op3->getStartLoc(), + Op3->getEndLoc(), getContext()); + Operands.push_back(AArch64Operand::CreateImm( NewOp4, Op3->getStartLoc(), Op3->getEndLoc(), getContext())); delete Op3; delete Op; @@ -3481,9 +3497,9 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // FIXME: Horrible hack to handle the BFI -> BFM, SBFIZ->SBFM, and // UBFIZ -> UBFM aliases. if (Tok == "bfi" || Tok == "sbfiz" || Tok == "ubfiz") { - ARM64Operand *Op1 = static_cast(Operands[1]); - ARM64Operand *Op3 = static_cast(Operands[3]); - ARM64Operand *Op4 = static_cast(Operands[4]); + AArch64Operand *Op1 = static_cast(Operands[1]); + AArch64Operand *Op3 = static_cast(Operands[3]); + AArch64Operand *Op4 = static_cast(Operands[4]); if (Op1->isReg() && Op3->isImm() && Op4->isImm()) { const MCConstantExpr *Op3CE = dyn_cast(Op3->getImm()); @@ -3494,7 +3510,7 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, uint64_t Op4Val = Op4CE->getValue(); uint64_t RegWidth = 0; - if (ARM64MCRegisterClasses[ARM64::GPR64allRegClassID].contains( + if (AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains( Op1->getReg())) RegWidth = 64; else @@ -3508,7 +3524,7 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, "expected integer in range [1, 32]"); uint64_t NewOp3Val = 0; - if (ARM64MCRegisterClasses[ARM64::GPR32allRegClassID].contains( + if (AArch64MCRegisterClasses[AArch64::GPR32allRegClassID].contains( Op1->getReg())) NewOp3Val = (32 - Op3Val) & 0x1f; else @@ -3524,18 +3540,18 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, MCConstantExpr::Create(NewOp3Val, getContext()); const MCExpr *NewOp4 = MCConstantExpr::Create(NewOp4Val, getContext()); - Operands[3] = ARM64Operand::CreateImm(NewOp3, Op3->getStartLoc(), - Op3->getEndLoc(), getContext()); - Operands[4] = ARM64Operand::CreateImm(NewOp4, Op4->getStartLoc(), - Op4->getEndLoc(), getContext()); + Operands[3] = AArch64Operand::CreateImm( + NewOp3, Op3->getStartLoc(), Op3->getEndLoc(), getContext()); + Operands[4] = AArch64Operand::CreateImm( + NewOp4, Op4->getStartLoc(), Op4->getEndLoc(), getContext()); if (Tok == "bfi") - Operands[0] = ARM64Operand::CreateToken( + Operands[0] = AArch64Operand::CreateToken( "bfm", false, Op->getStartLoc(), getContext()); else if (Tok == "sbfiz") - Operands[0] = ARM64Operand::CreateToken( + Operands[0] = AArch64Operand::CreateToken( "sbfm", false, Op->getStartLoc(), getContext()); else if (Tok == "ubfiz") - Operands[0] = ARM64Operand::CreateToken( + Operands[0] = AArch64Operand::CreateToken( "ubfm", false, Op->getStartLoc(), getContext()); else llvm_unreachable("No valid mnemonic for alias?"); @@ -3550,9 +3566,9 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // UBFX -> UBFM aliases. } else if (NumOperands == 5 && (Tok == "bfxil" || Tok == "sbfx" || Tok == "ubfx")) { - ARM64Operand *Op1 = static_cast(Operands[1]); - ARM64Operand *Op3 = static_cast(Operands[3]); - ARM64Operand *Op4 = static_cast(Operands[4]); + AArch64Operand *Op1 = static_cast(Operands[1]); + AArch64Operand *Op3 = static_cast(Operands[3]); + AArch64Operand *Op4 = static_cast(Operands[4]); if (Op1->isReg() && Op3->isImm() && Op4->isImm()) { const MCConstantExpr *Op3CE = dyn_cast(Op3->getImm()); @@ -3563,7 +3579,7 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, uint64_t Op4Val = Op4CE->getValue(); uint64_t RegWidth = 0; - if (ARM64MCRegisterClasses[ARM64::GPR64allRegClassID].contains( + if (AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains( Op1->getReg())) RegWidth = 64; else @@ -3584,16 +3600,16 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, const MCExpr *NewOp4 = MCConstantExpr::Create(NewOp4Val, getContext()); - Operands[4] = ARM64Operand::CreateImm( + Operands[4] = AArch64Operand::CreateImm( NewOp4, Op4->getStartLoc(), Op4->getEndLoc(), getContext()); if (Tok == "bfxil") - Operands[0] = ARM64Operand::CreateToken( + Operands[0] = AArch64Operand::CreateToken( "bfm", false, Op->getStartLoc(), getContext()); else if (Tok == "sbfx") - Operands[0] = ARM64Operand::CreateToken( + Operands[0] = AArch64Operand::CreateToken( "sbfm", false, Op->getStartLoc(), getContext()); else if (Tok == "ubfx") - Operands[0] = ARM64Operand::CreateToken( + Operands[0] = AArch64Operand::CreateToken( "ubfm", false, Op->getStartLoc(), getContext()); else llvm_unreachable("No valid mnemonic for alias?"); @@ -3610,44 +3626,44 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, if (NumOperands == 3 && (Tok == "sxtw" || Tok == "uxtw")) { // The source register can be Wn here, but the matcher expects a // GPR64. Twiddle it here if necessary. - ARM64Operand *Op = static_cast(Operands[2]); + AArch64Operand *Op = static_cast(Operands[2]); if (Op->isReg()) { unsigned Reg = getXRegFromWReg(Op->getReg()); - Operands[2] = ARM64Operand::CreateReg(Reg, false, Op->getStartLoc(), - Op->getEndLoc(), getContext()); + Operands[2] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(), + Op->getEndLoc(), getContext()); delete Op; } } // FIXME: Likewise for sxt[bh] with a Xd dst operand else if (NumOperands == 3 && (Tok == "sxtb" || Tok == "sxth")) { - ARM64Operand *Op = static_cast(Operands[1]); + AArch64Operand *Op = static_cast(Operands[1]); if (Op->isReg() && - ARM64MCRegisterClasses[ARM64::GPR64allRegClassID].contains( + AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains( Op->getReg())) { // The source register can be Wn here, but the matcher expects a // GPR64. Twiddle it here if necessary. - ARM64Operand *Op = static_cast(Operands[2]); + AArch64Operand *Op = static_cast(Operands[2]); if (Op->isReg()) { unsigned Reg = getXRegFromWReg(Op->getReg()); - Operands[2] = ARM64Operand::CreateReg(Reg, false, Op->getStartLoc(), - Op->getEndLoc(), getContext()); + Operands[2] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(), + Op->getEndLoc(), getContext()); delete Op; } } } // FIXME: Likewise for uxt[bh] with a Xd dst operand else if (NumOperands == 3 && (Tok == "uxtb" || Tok == "uxth")) { - ARM64Operand *Op = static_cast(Operands[1]); + AArch64Operand *Op = static_cast(Operands[1]); if (Op->isReg() && - ARM64MCRegisterClasses[ARM64::GPR64allRegClassID].contains( + AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains( Op->getReg())) { // The source register can be Wn here, but the matcher expects a // GPR32. Twiddle it here if necessary. - ARM64Operand *Op = static_cast(Operands[1]); + AArch64Operand *Op = static_cast(Operands[1]); if (Op->isReg()) { unsigned Reg = getWRegFromXReg(Op->getReg()); - Operands[1] = ARM64Operand::CreateReg(Reg, false, Op->getStartLoc(), - Op->getEndLoc(), getContext()); + Operands[1] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(), + Op->getEndLoc(), getContext()); delete Op; } } @@ -3655,16 +3671,17 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // Yet another horrible hack to handle FMOV Rd, #0.0 using [WX]ZR. if (NumOperands == 3 && Tok == "fmov") { - ARM64Operand *RegOp = static_cast(Operands[1]); - ARM64Operand *ImmOp = static_cast(Operands[2]); + AArch64Operand *RegOp = static_cast(Operands[1]); + AArch64Operand *ImmOp = static_cast(Operands[2]); if (RegOp->isReg() && ImmOp->isFPImm() && ImmOp->getFPImm() == (unsigned)-1) { - unsigned zreg = ARM64MCRegisterClasses[ARM64::FPR32RegClassID].contains( - RegOp->getReg()) - ? ARM64::WZR - : ARM64::XZR; - Operands[2] = ARM64Operand::CreateReg(zreg, false, Op->getStartLoc(), - Op->getEndLoc(), getContext()); + unsigned zreg = + AArch64MCRegisterClasses[AArch64::FPR32RegClassID].contains( + RegOp->getReg()) + ? AArch64::WZR + : AArch64::XZR; + Operands[2] = AArch64Operand::CreateReg(zreg, false, Op->getStartLoc(), + Op->getEndLoc(), getContext()); delete ImmOp; } } @@ -3718,14 +3735,14 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, if (ErrorInfo >= Operands.size()) return Error(IDLoc, "too few operands for instruction"); - ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getStartLoc(); + ErrorLoc = ((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; } // If the match failed on a suffix token operand, tweak the diagnostic // accordingly. - if (((ARM64Operand *)Operands[ErrorInfo])->isToken() && - ((ARM64Operand *)Operands[ErrorInfo])->isTokenSuffix()) + if (((AArch64Operand *)Operands[ErrorInfo])->isToken() && + ((AArch64Operand *)Operands[ErrorInfo])->isTokenSuffix()) MatchResult = Match_InvalidSuffix; return showMatchError(ErrorLoc, MatchResult); @@ -3779,7 +3796,7 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_MRS: { // Any time we get here, there's nothing fancy to do. Just get the // operand SMLoc and display the diagnostic. - SMLoc ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getStartLoc(); + SMLoc ErrorLoc = ((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; return showMatchError(ErrorLoc, MatchResult); @@ -3791,7 +3808,7 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, } /// ParseDirective parses the arm specific directives -bool ARM64AsmParser::ParseDirective(AsmToken DirectiveID) { +bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getIdentifier(); SMLoc Loc = DirectiveID.getLoc(); if (IDVal == ".hword") @@ -3808,7 +3825,7 @@ bool ARM64AsmParser::ParseDirective(AsmToken DirectiveID) { /// parseDirectiveWord /// ::= .word [ expression (, expression)* ] -bool ARM64AsmParser::parseDirectiveWord(unsigned Size, SMLoc L) { +bool AArch64AsmParser::parseDirectiveWord(unsigned Size, SMLoc L) { if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { const MCExpr *Value; @@ -3833,17 +3850,17 @@ bool ARM64AsmParser::parseDirectiveWord(unsigned Size, SMLoc L) { // parseDirectiveTLSDescCall: // ::= .tlsdesccall symbol -bool ARM64AsmParser::parseDirectiveTLSDescCall(SMLoc L) { +bool AArch64AsmParser::parseDirectiveTLSDescCall(SMLoc L) { StringRef Name; if (getParser().parseIdentifier(Name)) return Error(L, "expected symbol after directive"); MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext()); - Expr = ARM64MCExpr::Create(Expr, ARM64MCExpr::VK_TLSDESC, getContext()); + Expr = AArch64MCExpr::Create(Expr, AArch64MCExpr::VK_TLSDESC, getContext()); MCInst Inst; - Inst.setOpcode(ARM64::TLSDESCCALL); + Inst.setOpcode(AArch64::TLSDESCCALL); Inst.addOperand(MCOperand::CreateExpr(Expr)); getParser().getStreamer().EmitInstruction(Inst, STI); @@ -3852,7 +3869,7 @@ bool ARM64AsmParser::parseDirectiveTLSDescCall(SMLoc L) { /// ::= .loh label1, ..., labelN /// The number of arguments depends on the loh identifier. -bool ARM64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) { +bool AArch64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) { if (IDVal != MCLOHDirectiveName()) return true; MCLOHType Kind; @@ -3904,15 +3921,15 @@ bool ARM64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) { } bool -ARM64AsmParser::classifySymbolRef(const MCExpr *Expr, - ARM64MCExpr::VariantKind &ELFRefKind, - MCSymbolRefExpr::VariantKind &DarwinRefKind, - int64_t &Addend) { - ELFRefKind = ARM64MCExpr::VK_INVALID; +AArch64AsmParser::classifySymbolRef(const MCExpr *Expr, + AArch64MCExpr::VariantKind &ELFRefKind, + MCSymbolRefExpr::VariantKind &DarwinRefKind, + int64_t &Addend) { + ELFRefKind = AArch64MCExpr::VK_INVALID; DarwinRefKind = MCSymbolRefExpr::VK_None; Addend = 0; - if (const ARM64MCExpr *AE = dyn_cast(Expr)) { + if (const AArch64MCExpr *AE = dyn_cast(Expr)) { ELFRefKind = AE->getKind(); Expr = AE->getSubExpr(); } @@ -3949,29 +3966,29 @@ ARM64AsmParser::classifySymbolRef(const MCExpr *Expr, // It's some symbol reference + a constant addend, but really // shouldn't use both Darwin and ELF syntax. - return ELFRefKind == ARM64MCExpr::VK_INVALID || + return ELFRefKind == AArch64MCExpr::VK_INVALID || DarwinRefKind == MCSymbolRefExpr::VK_None; } /// Force static initialization. -extern "C" void LLVMInitializeARM64AsmParser() { - RegisterMCAsmParser X(TheARM64leTarget); - RegisterMCAsmParser Y(TheARM64beTarget); +extern "C" void LLVMInitializeAArch64AsmParser() { + RegisterMCAsmParser X(TheAArch64leTarget); + RegisterMCAsmParser Y(TheAArch64beTarget); - RegisterMCAsmParser Z(TheAArch64leTarget); - RegisterMCAsmParser W(TheAArch64beTarget); + RegisterMCAsmParser Z(TheARM64leTarget); + RegisterMCAsmParser W(TheARM64beTarget); } #define GET_REGISTER_MATCHER #define GET_SUBTARGET_FEATURE_NAME #define GET_MATCHER_IMPLEMENTATION -#include "ARM64GenAsmMatcher.inc" +#include "AArch64GenAsmMatcher.inc" // Define this matcher function after the auto-generated include so we // have the match class enum definitions. -unsigned ARM64AsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, - unsigned Kind) { - ARM64Operand *Op = static_cast(AsmOp); +unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, + unsigned Kind) { + AArch64Operand *Op = static_cast(AsmOp); // If the kind is a token for a literal immediate, check if our asm // operand matches. This is for InstAliases which have a fixed-value // immediate in the syntax. diff --git a/lib/Target/ARM64/AsmParser/CMakeLists.txt b/lib/Target/AArch64/AsmParser/CMakeLists.txt similarity index 59% rename from lib/Target/ARM64/AsmParser/CMakeLists.txt rename to lib/Target/AArch64/AsmParser/CMakeLists.txt index 826158b1ed17..cc0a9d86a14e 100644 --- a/lib/Target/ARM64/AsmParser/CMakeLists.txt +++ b/lib/Target/AArch64/AsmParser/CMakeLists.txt @@ -1,6 +1,6 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) -add_llvm_library(LLVMARM64AsmParser - ARM64AsmParser.cpp +add_llvm_library(LLVMAArch64AsmParser + AArch64AsmParser.cpp ) diff --git a/lib/Target/ARM64/AsmParser/LLVMBuild.txt b/lib/Target/AArch64/AsmParser/LLVMBuild.txt similarity index 70% rename from lib/Target/ARM64/AsmParser/LLVMBuild.txt rename to lib/Target/AArch64/AsmParser/LLVMBuild.txt index 9045283e9192..11eb9d55f615 100644 --- a/lib/Target/ARM64/AsmParser/LLVMBuild.txt +++ b/lib/Target/AArch64/AsmParser/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/ARM64/AsmParser/LLVMBuild.txt ---------------*- Conf -*--===; +;===- ./lib/Target/AArch64/AsmParser/LLVMBuild.txt ---------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -17,7 +17,7 @@ [component_0] type = Library -name = ARM64AsmParser -parent = ARM64 -required_libraries = ARM64Desc ARM64Info ARM64Utils MC MCParser Support -add_to_library_groups = ARM64 +name = AArch64AsmParser +parent = AArch64 +required_libraries = AArch64Desc AArch64Info AArch64Utils MC MCParser Support +add_to_library_groups = AArch64 diff --git a/lib/Target/ARM64/AsmParser/Makefile b/lib/Target/AArch64/AsmParser/Makefile similarity index 82% rename from lib/Target/ARM64/AsmParser/Makefile rename to lib/Target/AArch64/AsmParser/Makefile index d25c47f9af99..00268c76f8e8 100644 --- a/lib/Target/ARM64/AsmParser/Makefile +++ b/lib/Target/AArch64/AsmParser/Makefile @@ -1,4 +1,4 @@ -##===- lib/Target/ARM/AsmParser/Makefile -------------------*- Makefile -*-===## +##===- lib/Target/AArch64/AsmParser/Makefile ---------------*- Makefile -*-===## # # The LLVM Compiler Infrastructure # @@ -7,7 +7,7 @@ # ##===----------------------------------------------------------------------===## LEVEL = ../../../.. -LIBRARYNAME = LLVMARM64AsmParser +LIBRARYNAME = LLVMAArch64AsmParser # Hack: we need to include 'main' ARM target directory to grab private headers CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt new file mode 100644 index 000000000000..789d549bb156 --- /dev/null +++ b/lib/Target/AArch64/CMakeLists.txt @@ -0,0 +1,51 @@ +set(LLVM_TARGET_DEFINITIONS AArch64.td) + +tablegen(LLVM AArch64GenRegisterInfo.inc -gen-register-info) +tablegen(LLVM AArch64GenInstrInfo.inc -gen-instr-info) +tablegen(LLVM AArch64GenMCCodeEmitter.inc -gen-emitter -mc-emitter) +tablegen(LLVM AArch64GenMCPseudoLowering.inc -gen-pseudo-lowering) +tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM AArch64GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1) +tablegen(LLVM AArch64GenAsmMatcher.inc -gen-asm-matcher) +tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel) +tablegen(LLVM AArch64GenFastISel.inc -gen-fast-isel) +tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv) +tablegen(LLVM AArch64GenSubtargetInfo.inc -gen-subtarget) +tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler) +add_public_tablegen_target(AArch64CommonTableGen) + +add_llvm_target(AArch64CodeGen + AArch64AddressTypePromotion.cpp + AArch64AdvSIMDScalarPass.cpp + AArch64AsmPrinter.cpp + AArch64BranchRelaxation.cpp + AArch64CleanupLocalDynamicTLSPass.cpp + AArch64CollectLOH.cpp + AArch64ConditionalCompares.cpp + AArch64DeadRegisterDefinitionsPass.cpp + AArch64ExpandPseudoInsts.cpp + AArch64FastISel.cpp + AArch64FrameLowering.cpp + AArch64ISelDAGToDAG.cpp + AArch64ISelLowering.cpp + AArch64InstrInfo.cpp + AArch64LoadStoreOptimizer.cpp + AArch64MCInstLower.cpp + AArch64PromoteConstant.cpp + AArch64RegisterInfo.cpp + AArch64SelectionDAGInfo.cpp + AArch64StorePairSuppress.cpp + AArch64Subtarget.cpp + AArch64TargetMachine.cpp + AArch64TargetObjectFile.cpp + AArch64TargetTransformInfo.cpp +) + +add_dependencies(LLVMAArch64CodeGen intrinsics_gen) + +add_subdirectory(TargetInfo) +add_subdirectory(AsmParser) +add_subdirectory(Disassembler) +add_subdirectory(InstPrinter) +add_subdirectory(MCTargetDesc) +add_subdirectory(Utils) diff --git a/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp similarity index 69% rename from lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp rename to lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index bb47b3a0982a..6de27d6d51a5 100644 --- a/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -1,4 +1,4 @@ -//===- ARM64Disassembler.cpp - Disassembler for ARM64 -----------*- C++ -*-===// +//===- AArch64Disassembler.cpp - Disassembler for AArch64 -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -10,11 +10,11 @@ // //===----------------------------------------------------------------------===// -#include "ARM64Disassembler.h" -#include "ARM64ExternalSymbolizer.h" -#include "ARM64Subtarget.h" -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "Utils/ARM64BaseInfo.h" +#include "AArch64Disassembler.h" +#include "AArch64ExternalSymbolizer.h" +#include "AArch64Subtarget.h" +#include "MCTargetDesc/AArch64AddressingModes.h" +#include "Utils/AArch64BaseInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/Support/Debug.h" @@ -24,7 +24,7 @@ using namespace llvm; -#define DEBUG_TYPE "arm64-disassembler" +#define DEBUG_TYPE "aarch64-disassembler" // Pull DecodeStatus and its enum values into the global namespace. typedef llvm::MCDisassembler::DecodeStatus DecodeStatus; @@ -186,20 +186,20 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) { llvm_unreachable("Invalid DecodeStatus!"); } -#include "ARM64GenDisassemblerTables.inc" -#include "ARM64GenInstrInfo.inc" +#include "AArch64GenDisassemblerTables.inc" +#include "AArch64GenInstrInfo.inc" #define Success llvm::MCDisassembler::Success #define Fail llvm::MCDisassembler::Fail #define SoftFail llvm::MCDisassembler::SoftFail -static MCDisassembler *createARM64Disassembler(const Target &T, +static MCDisassembler *createAArch64Disassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx) { - return new ARM64Disassembler(STI, Ctx); + return new AArch64Disassembler(STI, Ctx); } -DecodeStatus ARM64Disassembler::getInstruction(MCInst &MI, uint64_t &Size, +DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size, const MemoryObject &Region, uint64_t Address, raw_ostream &os, @@ -223,43 +223,44 @@ DecodeStatus ARM64Disassembler::getInstruction(MCInst &MI, uint64_t &Size, } static MCSymbolizer * -createARM64ExternalSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo, +createAArch64ExternalSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo, LLVMSymbolLookupCallback SymbolLookUp, void *DisInfo, MCContext *Ctx, MCRelocationInfo *RelInfo) { - return new llvm::ARM64ExternalSymbolizer( + return new llvm::AArch64ExternalSymbolizer( *Ctx, std::unique_ptr(RelInfo), GetOpInfo, SymbolLookUp, DisInfo); } -extern "C" void LLVMInitializeARM64Disassembler() { - TargetRegistry::RegisterMCDisassembler(TheARM64leTarget, - createARM64Disassembler); - TargetRegistry::RegisterMCDisassembler(TheARM64beTarget, - createARM64Disassembler); - TargetRegistry::RegisterMCSymbolizer(TheARM64leTarget, - createARM64ExternalSymbolizer); - TargetRegistry::RegisterMCSymbolizer(TheARM64beTarget, - createARM64ExternalSymbolizer); - +extern "C" void LLVMInitializeAArch64Disassembler() { TargetRegistry::RegisterMCDisassembler(TheAArch64leTarget, - createARM64Disassembler); + createAArch64Disassembler); TargetRegistry::RegisterMCDisassembler(TheAArch64beTarget, - createARM64Disassembler); + createAArch64Disassembler); TargetRegistry::RegisterMCSymbolizer(TheAArch64leTarget, - createARM64ExternalSymbolizer); + createAArch64ExternalSymbolizer); TargetRegistry::RegisterMCSymbolizer(TheAArch64beTarget, - createARM64ExternalSymbolizer); + createAArch64ExternalSymbolizer); + + TargetRegistry::RegisterMCDisassembler(TheARM64leTarget, + createAArch64Disassembler); + TargetRegistry::RegisterMCDisassembler(TheARM64beTarget, + createAArch64Disassembler); + TargetRegistry::RegisterMCSymbolizer(TheARM64leTarget, + createAArch64ExternalSymbolizer); + TargetRegistry::RegisterMCSymbolizer(TheARM64beTarget, + createAArch64ExternalSymbolizer); } static const unsigned FPR128DecoderTable[] = { - ARM64::Q0, ARM64::Q1, ARM64::Q2, ARM64::Q3, ARM64::Q4, ARM64::Q5, - ARM64::Q6, ARM64::Q7, ARM64::Q8, ARM64::Q9, ARM64::Q10, ARM64::Q11, - ARM64::Q12, ARM64::Q13, ARM64::Q14, ARM64::Q15, ARM64::Q16, ARM64::Q17, - ARM64::Q18, ARM64::Q19, ARM64::Q20, ARM64::Q21, ARM64::Q22, ARM64::Q23, - ARM64::Q24, ARM64::Q25, ARM64::Q26, ARM64::Q27, ARM64::Q28, ARM64::Q29, - ARM64::Q30, ARM64::Q31 + AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, + AArch64::Q5, AArch64::Q6, AArch64::Q7, AArch64::Q8, AArch64::Q9, + AArch64::Q10, AArch64::Q11, AArch64::Q12, AArch64::Q13, AArch64::Q14, + AArch64::Q15, AArch64::Q16, AArch64::Q17, AArch64::Q18, AArch64::Q19, + AArch64::Q20, AArch64::Q21, AArch64::Q22, AArch64::Q23, AArch64::Q24, + AArch64::Q25, AArch64::Q26, AArch64::Q27, AArch64::Q28, AArch64::Q29, + AArch64::Q30, AArch64::Q31 }; static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, unsigned RegNo, @@ -282,12 +283,13 @@ static DecodeStatus DecodeFPR128_loRegisterClass(MCInst &Inst, unsigned RegNo, } static const unsigned FPR64DecoderTable[] = { - ARM64::D0, ARM64::D1, ARM64::D2, ARM64::D3, ARM64::D4, ARM64::D5, - ARM64::D6, ARM64::D7, ARM64::D8, ARM64::D9, ARM64::D10, ARM64::D11, - ARM64::D12, ARM64::D13, ARM64::D14, ARM64::D15, ARM64::D16, ARM64::D17, - ARM64::D18, ARM64::D19, ARM64::D20, ARM64::D21, ARM64::D22, ARM64::D23, - ARM64::D24, ARM64::D25, ARM64::D26, ARM64::D27, ARM64::D28, ARM64::D29, - ARM64::D30, ARM64::D31 + AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, + AArch64::D5, AArch64::D6, AArch64::D7, AArch64::D8, AArch64::D9, + AArch64::D10, AArch64::D11, AArch64::D12, AArch64::D13, AArch64::D14, + AArch64::D15, AArch64::D16, AArch64::D17, AArch64::D18, AArch64::D19, + AArch64::D20, AArch64::D21, AArch64::D22, AArch64::D23, AArch64::D24, + AArch64::D25, AArch64::D26, AArch64::D27, AArch64::D28, AArch64::D29, + AArch64::D30, AArch64::D31 }; static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo, @@ -302,12 +304,13 @@ static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo, } static const unsigned FPR32DecoderTable[] = { - ARM64::S0, ARM64::S1, ARM64::S2, ARM64::S3, ARM64::S4, ARM64::S5, - ARM64::S6, ARM64::S7, ARM64::S8, ARM64::S9, ARM64::S10, ARM64::S11, - ARM64::S12, ARM64::S13, ARM64::S14, ARM64::S15, ARM64::S16, ARM64::S17, - ARM64::S18, ARM64::S19, ARM64::S20, ARM64::S21, ARM64::S22, ARM64::S23, - ARM64::S24, ARM64::S25, ARM64::S26, ARM64::S27, ARM64::S28, ARM64::S29, - ARM64::S30, ARM64::S31 + AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, + AArch64::S5, AArch64::S6, AArch64::S7, AArch64::S8, AArch64::S9, + AArch64::S10, AArch64::S11, AArch64::S12, AArch64::S13, AArch64::S14, + AArch64::S15, AArch64::S16, AArch64::S17, AArch64::S18, AArch64::S19, + AArch64::S20, AArch64::S21, AArch64::S22, AArch64::S23, AArch64::S24, + AArch64::S25, AArch64::S26, AArch64::S27, AArch64::S28, AArch64::S29, + AArch64::S30, AArch64::S31 }; static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, unsigned RegNo, @@ -322,12 +325,13 @@ static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, unsigned RegNo, } static const unsigned FPR16DecoderTable[] = { - ARM64::H0, ARM64::H1, ARM64::H2, ARM64::H3, ARM64::H4, ARM64::H5, - ARM64::H6, ARM64::H7, ARM64::H8, ARM64::H9, ARM64::H10, ARM64::H11, - ARM64::H12, ARM64::H13, ARM64::H14, ARM64::H15, ARM64::H16, ARM64::H17, - ARM64::H18, ARM64::H19, ARM64::H20, ARM64::H21, ARM64::H22, ARM64::H23, - ARM64::H24, ARM64::H25, ARM64::H26, ARM64::H27, ARM64::H28, ARM64::H29, - ARM64::H30, ARM64::H31 + AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, + AArch64::H5, AArch64::H6, AArch64::H7, AArch64::H8, AArch64::H9, + AArch64::H10, AArch64::H11, AArch64::H12, AArch64::H13, AArch64::H14, + AArch64::H15, AArch64::H16, AArch64::H17, AArch64::H18, AArch64::H19, + AArch64::H20, AArch64::H21, AArch64::H22, AArch64::H23, AArch64::H24, + AArch64::H25, AArch64::H26, AArch64::H27, AArch64::H28, AArch64::H29, + AArch64::H30, AArch64::H31 }; static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, unsigned RegNo, @@ -342,12 +346,13 @@ static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, unsigned RegNo, } static const unsigned FPR8DecoderTable[] = { - ARM64::B0, ARM64::B1, ARM64::B2, ARM64::B3, ARM64::B4, ARM64::B5, - ARM64::B6, ARM64::B7, ARM64::B8, ARM64::B9, ARM64::B10, ARM64::B11, - ARM64::B12, ARM64::B13, ARM64::B14, ARM64::B15, ARM64::B16, ARM64::B17, - ARM64::B18, ARM64::B19, ARM64::B20, ARM64::B21, ARM64::B22, ARM64::B23, - ARM64::B24, ARM64::B25, ARM64::B26, ARM64::B27, ARM64::B28, ARM64::B29, - ARM64::B30, ARM64::B31 + AArch64::B0, AArch64::B1, AArch64::B2, AArch64::B3, AArch64::B4, + AArch64::B5, AArch64::B6, AArch64::B7, AArch64::B8, AArch64::B9, + AArch64::B10, AArch64::B11, AArch64::B12, AArch64::B13, AArch64::B14, + AArch64::B15, AArch64::B16, AArch64::B17, AArch64::B18, AArch64::B19, + AArch64::B20, AArch64::B21, AArch64::B22, AArch64::B23, AArch64::B24, + AArch64::B25, AArch64::B26, AArch64::B27, AArch64::B28, AArch64::B29, + AArch64::B30, AArch64::B31 }; static DecodeStatus DecodeFPR8RegisterClass(MCInst &Inst, unsigned RegNo, @@ -362,12 +367,13 @@ static DecodeStatus DecodeFPR8RegisterClass(MCInst &Inst, unsigned RegNo, } static const unsigned GPR64DecoderTable[] = { - ARM64::X0, ARM64::X1, ARM64::X2, ARM64::X3, ARM64::X4, ARM64::X5, - ARM64::X6, ARM64::X7, ARM64::X8, ARM64::X9, ARM64::X10, ARM64::X11, - ARM64::X12, ARM64::X13, ARM64::X14, ARM64::X15, ARM64::X16, ARM64::X17, - ARM64::X18, ARM64::X19, ARM64::X20, ARM64::X21, ARM64::X22, ARM64::X23, - ARM64::X24, ARM64::X25, ARM64::X26, ARM64::X27, ARM64::X28, ARM64::FP, - ARM64::LR, ARM64::XZR + AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, + AArch64::X5, AArch64::X6, AArch64::X7, AArch64::X8, AArch64::X9, + AArch64::X10, AArch64::X11, AArch64::X12, AArch64::X13, AArch64::X14, + AArch64::X15, AArch64::X16, AArch64::X17, AArch64::X18, AArch64::X19, + AArch64::X20, AArch64::X21, AArch64::X22, AArch64::X23, AArch64::X24, + AArch64::X25, AArch64::X26, AArch64::X27, AArch64::X28, AArch64::FP, + AArch64::LR, AArch64::XZR }; static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo, @@ -387,19 +393,20 @@ static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, unsigned RegNo, if (RegNo > 31) return Fail; unsigned Register = GPR64DecoderTable[RegNo]; - if (Register == ARM64::XZR) - Register = ARM64::SP; + if (Register == AArch64::XZR) + Register = AArch64::SP; Inst.addOperand(MCOperand::CreateReg(Register)); return Success; } static const unsigned GPR32DecoderTable[] = { - ARM64::W0, ARM64::W1, ARM64::W2, ARM64::W3, ARM64::W4, ARM64::W5, - ARM64::W6, ARM64::W7, ARM64::W8, ARM64::W9, ARM64::W10, ARM64::W11, - ARM64::W12, ARM64::W13, ARM64::W14, ARM64::W15, ARM64::W16, ARM64::W17, - ARM64::W18, ARM64::W19, ARM64::W20, ARM64::W21, ARM64::W22, ARM64::W23, - ARM64::W24, ARM64::W25, ARM64::W26, ARM64::W27, ARM64::W28, ARM64::W29, - ARM64::W30, ARM64::WZR + AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, + AArch64::W5, AArch64::W6, AArch64::W7, AArch64::W8, AArch64::W9, + AArch64::W10, AArch64::W11, AArch64::W12, AArch64::W13, AArch64::W14, + AArch64::W15, AArch64::W16, AArch64::W17, AArch64::W18, AArch64::W19, + AArch64::W20, AArch64::W21, AArch64::W22, AArch64::W23, AArch64::W24, + AArch64::W25, AArch64::W26, AArch64::W27, AArch64::W28, AArch64::W29, + AArch64::W30, AArch64::WZR }; static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo, @@ -420,19 +427,20 @@ static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst, unsigned RegNo, return Fail; unsigned Register = GPR32DecoderTable[RegNo]; - if (Register == ARM64::WZR) - Register = ARM64::WSP; + if (Register == AArch64::WZR) + Register = AArch64::WSP; Inst.addOperand(MCOperand::CreateReg(Register)); return Success; } static const unsigned VectorDecoderTable[] = { - ARM64::Q0, ARM64::Q1, ARM64::Q2, ARM64::Q3, ARM64::Q4, ARM64::Q5, - ARM64::Q6, ARM64::Q7, ARM64::Q8, ARM64::Q9, ARM64::Q10, ARM64::Q11, - ARM64::Q12, ARM64::Q13, ARM64::Q14, ARM64::Q15, ARM64::Q16, ARM64::Q17, - ARM64::Q18, ARM64::Q19, ARM64::Q20, ARM64::Q21, ARM64::Q22, ARM64::Q23, - ARM64::Q24, ARM64::Q25, ARM64::Q26, ARM64::Q27, ARM64::Q28, ARM64::Q29, - ARM64::Q30, ARM64::Q31 + AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, + AArch64::Q5, AArch64::Q6, AArch64::Q7, AArch64::Q8, AArch64::Q9, + AArch64::Q10, AArch64::Q11, AArch64::Q12, AArch64::Q13, AArch64::Q14, + AArch64::Q15, AArch64::Q16, AArch64::Q17, AArch64::Q18, AArch64::Q19, + AArch64::Q20, AArch64::Q21, AArch64::Q22, AArch64::Q23, AArch64::Q24, + AArch64::Q25, AArch64::Q26, AArch64::Q27, AArch64::Q28, AArch64::Q29, + AArch64::Q30, AArch64::Q31 }; static DecodeStatus DecodeVectorRegisterClass(MCInst &Inst, unsigned RegNo, @@ -447,14 +455,14 @@ static DecodeStatus DecodeVectorRegisterClass(MCInst &Inst, unsigned RegNo, } static const unsigned QQDecoderTable[] = { - ARM64::Q0_Q1, ARM64::Q1_Q2, ARM64::Q2_Q3, ARM64::Q3_Q4, - ARM64::Q4_Q5, ARM64::Q5_Q6, ARM64::Q6_Q7, ARM64::Q7_Q8, - ARM64::Q8_Q9, ARM64::Q9_Q10, ARM64::Q10_Q11, ARM64::Q11_Q12, - ARM64::Q12_Q13, ARM64::Q13_Q14, ARM64::Q14_Q15, ARM64::Q15_Q16, - ARM64::Q16_Q17, ARM64::Q17_Q18, ARM64::Q18_Q19, ARM64::Q19_Q20, - ARM64::Q20_Q21, ARM64::Q21_Q22, ARM64::Q22_Q23, ARM64::Q23_Q24, - ARM64::Q24_Q25, ARM64::Q25_Q26, ARM64::Q26_Q27, ARM64::Q27_Q28, - ARM64::Q28_Q29, ARM64::Q29_Q30, ARM64::Q30_Q31, ARM64::Q31_Q0 + AArch64::Q0_Q1, AArch64::Q1_Q2, AArch64::Q2_Q3, AArch64::Q3_Q4, + AArch64::Q4_Q5, AArch64::Q5_Q6, AArch64::Q6_Q7, AArch64::Q7_Q8, + AArch64::Q8_Q9, AArch64::Q9_Q10, AArch64::Q10_Q11, AArch64::Q11_Q12, + AArch64::Q12_Q13, AArch64::Q13_Q14, AArch64::Q14_Q15, AArch64::Q15_Q16, + AArch64::Q16_Q17, AArch64::Q17_Q18, AArch64::Q18_Q19, AArch64::Q19_Q20, + AArch64::Q20_Q21, AArch64::Q21_Q22, AArch64::Q22_Q23, AArch64::Q23_Q24, + AArch64::Q24_Q25, AArch64::Q25_Q26, AArch64::Q26_Q27, AArch64::Q27_Q28, + AArch64::Q28_Q29, AArch64::Q29_Q30, AArch64::Q30_Q31, AArch64::Q31_Q0 }; static DecodeStatus DecodeQQRegisterClass(MCInst &Inst, unsigned RegNo, @@ -467,17 +475,17 @@ static DecodeStatus DecodeQQRegisterClass(MCInst &Inst, unsigned RegNo, } static const unsigned QQQDecoderTable[] = { - ARM64::Q0_Q1_Q2, ARM64::Q1_Q2_Q3, ARM64::Q2_Q3_Q4, - ARM64::Q3_Q4_Q5, ARM64::Q4_Q5_Q6, ARM64::Q5_Q6_Q7, - ARM64::Q6_Q7_Q8, ARM64::Q7_Q8_Q9, ARM64::Q8_Q9_Q10, - ARM64::Q9_Q10_Q11, ARM64::Q10_Q11_Q12, ARM64::Q11_Q12_Q13, - ARM64::Q12_Q13_Q14, ARM64::Q13_Q14_Q15, ARM64::Q14_Q15_Q16, - ARM64::Q15_Q16_Q17, ARM64::Q16_Q17_Q18, ARM64::Q17_Q18_Q19, - ARM64::Q18_Q19_Q20, ARM64::Q19_Q20_Q21, ARM64::Q20_Q21_Q22, - ARM64::Q21_Q22_Q23, ARM64::Q22_Q23_Q24, ARM64::Q23_Q24_Q25, - ARM64::Q24_Q25_Q26, ARM64::Q25_Q26_Q27, ARM64::Q26_Q27_Q28, - ARM64::Q27_Q28_Q29, ARM64::Q28_Q29_Q30, ARM64::Q29_Q30_Q31, - ARM64::Q30_Q31_Q0, ARM64::Q31_Q0_Q1 + AArch64::Q0_Q1_Q2, AArch64::Q1_Q2_Q3, AArch64::Q2_Q3_Q4, + AArch64::Q3_Q4_Q5, AArch64::Q4_Q5_Q6, AArch64::Q5_Q6_Q7, + AArch64::Q6_Q7_Q8, AArch64::Q7_Q8_Q9, AArch64::Q8_Q9_Q10, + AArch64::Q9_Q10_Q11, AArch64::Q10_Q11_Q12, AArch64::Q11_Q12_Q13, + AArch64::Q12_Q13_Q14, AArch64::Q13_Q14_Q15, AArch64::Q14_Q15_Q16, + AArch64::Q15_Q16_Q17, AArch64::Q16_Q17_Q18, AArch64::Q17_Q18_Q19, + AArch64::Q18_Q19_Q20, AArch64::Q19_Q20_Q21, AArch64::Q20_Q21_Q22, + AArch64::Q21_Q22_Q23, AArch64::Q22_Q23_Q24, AArch64::Q23_Q24_Q25, + AArch64::Q24_Q25_Q26, AArch64::Q25_Q26_Q27, AArch64::Q26_Q27_Q28, + AArch64::Q27_Q28_Q29, AArch64::Q28_Q29_Q30, AArch64::Q29_Q30_Q31, + AArch64::Q30_Q31_Q0, AArch64::Q31_Q0_Q1 }; static DecodeStatus DecodeQQQRegisterClass(MCInst &Inst, unsigned RegNo, @@ -490,17 +498,17 @@ static DecodeStatus DecodeQQQRegisterClass(MCInst &Inst, unsigned RegNo, } static const unsigned QQQQDecoderTable[] = { - ARM64::Q0_Q1_Q2_Q3, ARM64::Q1_Q2_Q3_Q4, ARM64::Q2_Q3_Q4_Q5, - ARM64::Q3_Q4_Q5_Q6, ARM64::Q4_Q5_Q6_Q7, ARM64::Q5_Q6_Q7_Q8, - ARM64::Q6_Q7_Q8_Q9, ARM64::Q7_Q8_Q9_Q10, ARM64::Q8_Q9_Q10_Q11, - ARM64::Q9_Q10_Q11_Q12, ARM64::Q10_Q11_Q12_Q13, ARM64::Q11_Q12_Q13_Q14, - ARM64::Q12_Q13_Q14_Q15, ARM64::Q13_Q14_Q15_Q16, ARM64::Q14_Q15_Q16_Q17, - ARM64::Q15_Q16_Q17_Q18, ARM64::Q16_Q17_Q18_Q19, ARM64::Q17_Q18_Q19_Q20, - ARM64::Q18_Q19_Q20_Q21, ARM64::Q19_Q20_Q21_Q22, ARM64::Q20_Q21_Q22_Q23, - ARM64::Q21_Q22_Q23_Q24, ARM64::Q22_Q23_Q24_Q25, ARM64::Q23_Q24_Q25_Q26, - ARM64::Q24_Q25_Q26_Q27, ARM64::Q25_Q26_Q27_Q28, ARM64::Q26_Q27_Q28_Q29, - ARM64::Q27_Q28_Q29_Q30, ARM64::Q28_Q29_Q30_Q31, ARM64::Q29_Q30_Q31_Q0, - ARM64::Q30_Q31_Q0_Q1, ARM64::Q31_Q0_Q1_Q2 + AArch64::Q0_Q1_Q2_Q3, AArch64::Q1_Q2_Q3_Q4, AArch64::Q2_Q3_Q4_Q5, + AArch64::Q3_Q4_Q5_Q6, AArch64::Q4_Q5_Q6_Q7, AArch64::Q5_Q6_Q7_Q8, + AArch64::Q6_Q7_Q8_Q9, AArch64::Q7_Q8_Q9_Q10, AArch64::Q8_Q9_Q10_Q11, + AArch64::Q9_Q10_Q11_Q12, AArch64::Q10_Q11_Q12_Q13, AArch64::Q11_Q12_Q13_Q14, + AArch64::Q12_Q13_Q14_Q15, AArch64::Q13_Q14_Q15_Q16, AArch64::Q14_Q15_Q16_Q17, + AArch64::Q15_Q16_Q17_Q18, AArch64::Q16_Q17_Q18_Q19, AArch64::Q17_Q18_Q19_Q20, + AArch64::Q18_Q19_Q20_Q21, AArch64::Q19_Q20_Q21_Q22, AArch64::Q20_Q21_Q22_Q23, + AArch64::Q21_Q22_Q23_Q24, AArch64::Q22_Q23_Q24_Q25, AArch64::Q23_Q24_Q25_Q26, + AArch64::Q24_Q25_Q26_Q27, AArch64::Q25_Q26_Q27_Q28, AArch64::Q26_Q27_Q28_Q29, + AArch64::Q27_Q28_Q29_Q30, AArch64::Q28_Q29_Q30_Q31, AArch64::Q29_Q30_Q31_Q0, + AArch64::Q30_Q31_Q0_Q1, AArch64::Q31_Q0_Q1_Q2 }; static DecodeStatus DecodeQQQQRegisterClass(MCInst &Inst, unsigned RegNo, @@ -514,14 +522,14 @@ static DecodeStatus DecodeQQQQRegisterClass(MCInst &Inst, unsigned RegNo, } static const unsigned DDDecoderTable[] = { - ARM64::D0_D1, ARM64::D1_D2, ARM64::D2_D3, ARM64::D3_D4, - ARM64::D4_D5, ARM64::D5_D6, ARM64::D6_D7, ARM64::D7_D8, - ARM64::D8_D9, ARM64::D9_D10, ARM64::D10_D11, ARM64::D11_D12, - ARM64::D12_D13, ARM64::D13_D14, ARM64::D14_D15, ARM64::D15_D16, - ARM64::D16_D17, ARM64::D17_D18, ARM64::D18_D19, ARM64::D19_D20, - ARM64::D20_D21, ARM64::D21_D22, ARM64::D22_D23, ARM64::D23_D24, - ARM64::D24_D25, ARM64::D25_D26, ARM64::D26_D27, ARM64::D27_D28, - ARM64::D28_D29, ARM64::D29_D30, ARM64::D30_D31, ARM64::D31_D0 + AArch64::D0_D1, AArch64::D1_D2, AArch64::D2_D3, AArch64::D3_D4, + AArch64::D4_D5, AArch64::D5_D6, AArch64::D6_D7, AArch64::D7_D8, + AArch64::D8_D9, AArch64::D9_D10, AArch64::D10_D11, AArch64::D11_D12, + AArch64::D12_D13, AArch64::D13_D14, AArch64::D14_D15, AArch64::D15_D16, + AArch64::D16_D17, AArch64::D17_D18, AArch64::D18_D19, AArch64::D19_D20, + AArch64::D20_D21, AArch64::D21_D22, AArch64::D22_D23, AArch64::D23_D24, + AArch64::D24_D25, AArch64::D25_D26, AArch64::D26_D27, AArch64::D27_D28, + AArch64::D28_D29, AArch64::D29_D30, AArch64::D30_D31, AArch64::D31_D0 }; static DecodeStatus DecodeDDRegisterClass(MCInst &Inst, unsigned RegNo, @@ -534,17 +542,17 @@ static DecodeStatus DecodeDDRegisterClass(MCInst &Inst, unsigned RegNo, } static const unsigned DDDDecoderTable[] = { - ARM64::D0_D1_D2, ARM64::D1_D2_D3, ARM64::D2_D3_D4, - ARM64::D3_D4_D5, ARM64::D4_D5_D6, ARM64::D5_D6_D7, - ARM64::D6_D7_D8, ARM64::D7_D8_D9, ARM64::D8_D9_D10, - ARM64::D9_D10_D11, ARM64::D10_D11_D12, ARM64::D11_D12_D13, - ARM64::D12_D13_D14, ARM64::D13_D14_D15, ARM64::D14_D15_D16, - ARM64::D15_D16_D17, ARM64::D16_D17_D18, ARM64::D17_D18_D19, - ARM64::D18_D19_D20, ARM64::D19_D20_D21, ARM64::D20_D21_D22, - ARM64::D21_D22_D23, ARM64::D22_D23_D24, ARM64::D23_D24_D25, - ARM64::D24_D25_D26, ARM64::D25_D26_D27, ARM64::D26_D27_D28, - ARM64::D27_D28_D29, ARM64::D28_D29_D30, ARM64::D29_D30_D31, - ARM64::D30_D31_D0, ARM64::D31_D0_D1 + AArch64::D0_D1_D2, AArch64::D1_D2_D3, AArch64::D2_D3_D4, + AArch64::D3_D4_D5, AArch64::D4_D5_D6, AArch64::D5_D6_D7, + AArch64::D6_D7_D8, AArch64::D7_D8_D9, AArch64::D8_D9_D10, + AArch64::D9_D10_D11, AArch64::D10_D11_D12, AArch64::D11_D12_D13, + AArch64::D12_D13_D14, AArch64::D13_D14_D15, AArch64::D14_D15_D16, + AArch64::D15_D16_D17, AArch64::D16_D17_D18, AArch64::D17_D18_D19, + AArch64::D18_D19_D20, AArch64::D19_D20_D21, AArch64::D20_D21_D22, + AArch64::D21_D22_D23, AArch64::D22_D23_D24, AArch64::D23_D24_D25, + AArch64::D24_D25_D26, AArch64::D25_D26_D27, AArch64::D26_D27_D28, + AArch64::D27_D28_D29, AArch64::D28_D29_D30, AArch64::D29_D30_D31, + AArch64::D30_D31_D0, AArch64::D31_D0_D1 }; static DecodeStatus DecodeDDDRegisterClass(MCInst &Inst, unsigned RegNo, @@ -557,17 +565,17 @@ static DecodeStatus DecodeDDDRegisterClass(MCInst &Inst, unsigned RegNo, } static const unsigned DDDDDecoderTable[] = { - ARM64::D0_D1_D2_D3, ARM64::D1_D2_D3_D4, ARM64::D2_D3_D4_D5, - ARM64::D3_D4_D5_D6, ARM64::D4_D5_D6_D7, ARM64::D5_D6_D7_D8, - ARM64::D6_D7_D8_D9, ARM64::D7_D8_D9_D10, ARM64::D8_D9_D10_D11, - ARM64::D9_D10_D11_D12, ARM64::D10_D11_D12_D13, ARM64::D11_D12_D13_D14, - ARM64::D12_D13_D14_D15, ARM64::D13_D14_D15_D16, ARM64::D14_D15_D16_D17, - ARM64::D15_D16_D17_D18, ARM64::D16_D17_D18_D19, ARM64::D17_D18_D19_D20, - ARM64::D18_D19_D20_D21, ARM64::D19_D20_D21_D22, ARM64::D20_D21_D22_D23, - ARM64::D21_D22_D23_D24, ARM64::D22_D23_D24_D25, ARM64::D23_D24_D25_D26, - ARM64::D24_D25_D26_D27, ARM64::D25_D26_D27_D28, ARM64::D26_D27_D28_D29, - ARM64::D27_D28_D29_D30, ARM64::D28_D29_D30_D31, ARM64::D29_D30_D31_D0, - ARM64::D30_D31_D0_D1, ARM64::D31_D0_D1_D2 + AArch64::D0_D1_D2_D3, AArch64::D1_D2_D3_D4, AArch64::D2_D3_D4_D5, + AArch64::D3_D4_D5_D6, AArch64::D4_D5_D6_D7, AArch64::D5_D6_D7_D8, + AArch64::D6_D7_D8_D9, AArch64::D7_D8_D9_D10, AArch64::D8_D9_D10_D11, + AArch64::D9_D10_D11_D12, AArch64::D10_D11_D12_D13, AArch64::D11_D12_D13_D14, + AArch64::D12_D13_D14_D15, AArch64::D13_D14_D15_D16, AArch64::D14_D15_D16_D17, + AArch64::D15_D16_D17_D18, AArch64::D16_D17_D18_D19, AArch64::D17_D18_D19_D20, + AArch64::D18_D19_D20_D21, AArch64::D19_D20_D21_D22, AArch64::D20_D21_D22_D23, + AArch64::D21_D22_D23_D24, AArch64::D22_D23_D24_D25, AArch64::D23_D24_D25_D26, + AArch64::D24_D25_D26_D27, AArch64::D25_D26_D27_D28, AArch64::D26_D27_D28_D29, + AArch64::D27_D28_D29_D30, AArch64::D28_D29_D30_D31, AArch64::D29_D30_D31_D0, + AArch64::D30_D31_D0_D1, AArch64::D31_D0_D1_D2 }; static DecodeStatus DecodeDDDDRegisterClass(MCInst &Inst, unsigned RegNo, @@ -599,15 +607,15 @@ static DecodeStatus DecodeFixedPointScaleImm64(llvm::MCInst &Inst, unsigned Imm, static DecodeStatus DecodePCRelLabel19(llvm::MCInst &Inst, unsigned Imm, uint64_t Addr, const void *Decoder) { int64_t ImmVal = Imm; - const ARM64Disassembler *Dis = - static_cast(Decoder); + const AArch64Disassembler *Dis = + static_cast(Decoder); // Sign-extend 19-bit immediate. if (ImmVal & (1 << (19 - 1))) ImmVal |= ~((1LL << 19) - 1); if (!Dis->tryAddingSymbolicOperand(Inst, ImmVal << 2, Addr, - Inst.getOpcode() != ARM64::LDRXl, 0, 4)) + Inst.getOpcode() != AArch64::LDRXl, 0, 4)) Inst.addOperand(MCOperand::CreateImm(ImmVal)); return Success; } @@ -622,15 +630,16 @@ static DecodeStatus DecodeMemExtend(llvm::MCInst &Inst, unsigned Imm, static DecodeStatus DecodeMRSSystemRegister(llvm::MCInst &Inst, unsigned Imm, uint64_t Address, const void *Decoder) { - const ARM64Disassembler *Dis = - static_cast(Decoder); + const AArch64Disassembler *Dis = + static_cast(Decoder); const MCSubtargetInfo &STI = Dis->getSubtargetInfo(); Imm |= 0x8000; Inst.addOperand(MCOperand::CreateImm(Imm)); bool ValidNamed; - (void)ARM64SysReg::MRSMapper(STI.getFeatureBits()).toString(Imm, ValidNamed); + (void)AArch64SysReg::MRSMapper(STI.getFeatureBits()) + .toString(Imm, ValidNamed); return ValidNamed ? Success : Fail; } @@ -638,15 +647,16 @@ static DecodeStatus DecodeMRSSystemRegister(llvm::MCInst &Inst, unsigned Imm, static DecodeStatus DecodeMSRSystemRegister(llvm::MCInst &Inst, unsigned Imm, uint64_t Address, const void *Decoder) { - const ARM64Disassembler *Dis = - static_cast(Decoder); + const AArch64Disassembler *Dis = + static_cast(Decoder); const MCSubtargetInfo &STI = Dis->getSubtargetInfo(); Imm |= 0x8000; Inst.addOperand(MCOperand::CreateImm(Imm)); bool ValidNamed; - (void)ARM64SysReg::MSRMapper(STI.getFeatureBits()).toString(Imm, ValidNamed); + (void)AArch64SysReg::MSRMapper(STI.getFeatureBits()) + .toString(Imm, ValidNamed); return ValidNamed ? Success : Fail; } @@ -756,22 +766,22 @@ static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst, switch (Inst.getOpcode()) { default: return Fail; - case ARM64::ADDWrs: - case ARM64::ADDSWrs: - case ARM64::SUBWrs: - case ARM64::SUBSWrs: + case AArch64::ADDWrs: + case AArch64::ADDSWrs: + case AArch64::SUBWrs: + case AArch64::SUBSWrs: // if shift == '11' then ReservedValue() if (shiftHi == 0x3) return Fail; // Deliberate fallthrough - case ARM64::ANDWrs: - case ARM64::ANDSWrs: - case ARM64::BICWrs: - case ARM64::BICSWrs: - case ARM64::ORRWrs: - case ARM64::ORNWrs: - case ARM64::EORWrs: - case ARM64::EONWrs: { + case AArch64::ANDWrs: + case AArch64::ANDSWrs: + case AArch64::BICWrs: + case AArch64::BICSWrs: + case AArch64::ORRWrs: + case AArch64::ORNWrs: + case AArch64::EORWrs: + case AArch64::EONWrs: { // if sf == '0' and imm6<5> == '1' then ReservedValue() if (shiftLo >> 5 == 1) return Fail; @@ -780,22 +790,22 @@ static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst, DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder); break; } - case ARM64::ADDXrs: - case ARM64::ADDSXrs: - case ARM64::SUBXrs: - case ARM64::SUBSXrs: + case AArch64::ADDXrs: + case AArch64::ADDSXrs: + case AArch64::SUBXrs: + case AArch64::SUBSXrs: // if shift == '11' then ReservedValue() if (shiftHi == 0x3) return Fail; // Deliberate fallthrough - case ARM64::ANDXrs: - case ARM64::ANDSXrs: - case ARM64::BICXrs: - case ARM64::BICSXrs: - case ARM64::ORRXrs: - case ARM64::ORNXrs: - case ARM64::EORXrs: - case ARM64::EONXrs: + case AArch64::ANDXrs: + case AArch64::ANDSXrs: + case AArch64::BICXrs: + case AArch64::BICSXrs: + case AArch64::ORRXrs: + case AArch64::ORNXrs: + case AArch64::EORXrs: + case AArch64::EONXrs: DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder); DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder); @@ -816,21 +826,22 @@ static DecodeStatus DecodeMoveImmInstruction(llvm::MCInst &Inst, uint32_t insn, switch (Inst.getOpcode()) { default: return Fail; - case ARM64::MOVZWi: - case ARM64::MOVNWi: - case ARM64::MOVKWi: + case AArch64::MOVZWi: + case AArch64::MOVNWi: + case AArch64::MOVKWi: if (shift & (1U << 5)) return Fail; DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder); break; - case ARM64::MOVZXi: - case ARM64::MOVNXi: - case ARM64::MOVKXi: + case AArch64::MOVZXi: + case AArch64::MOVNXi: + case AArch64::MOVKXi: DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); break; } - if (Inst.getOpcode() == ARM64::MOVKWi || Inst.getOpcode() == ARM64::MOVKXi) + if (Inst.getOpcode() == AArch64::MOVKWi || + Inst.getOpcode() == AArch64::MOVKXi) Inst.addOperand(Inst.getOperand(0)); Inst.addOperand(MCOperand::CreateImm(imm)); @@ -844,51 +855,51 @@ static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst, unsigned Rt = fieldFromInstruction(insn, 0, 5); unsigned Rn = fieldFromInstruction(insn, 5, 5); unsigned offset = fieldFromInstruction(insn, 10, 12); - const ARM64Disassembler *Dis = - static_cast(Decoder); + const AArch64Disassembler *Dis = + static_cast(Decoder); switch (Inst.getOpcode()) { default: return Fail; - case ARM64::PRFMui: + case AArch64::PRFMui: // Rt is an immediate in prefetch. Inst.addOperand(MCOperand::CreateImm(Rt)); break; - case ARM64::STRBBui: - case ARM64::LDRBBui: - case ARM64::LDRSBWui: - case ARM64::STRHHui: - case ARM64::LDRHHui: - case ARM64::LDRSHWui: - case ARM64::STRWui: - case ARM64::LDRWui: + case AArch64::STRBBui: + case AArch64::LDRBBui: + case AArch64::LDRSBWui: + case AArch64::STRHHui: + case AArch64::LDRHHui: + case AArch64::LDRSHWui: + case AArch64::STRWui: + case AArch64::LDRWui: DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); break; - case ARM64::LDRSBXui: - case ARM64::LDRSHXui: - case ARM64::LDRSWui: - case ARM64::STRXui: - case ARM64::LDRXui: + case AArch64::LDRSBXui: + case AArch64::LDRSHXui: + case AArch64::LDRSWui: + case AArch64::STRXui: + case AArch64::LDRXui: DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); break; - case ARM64::LDRQui: - case ARM64::STRQui: + case AArch64::LDRQui: + case AArch64::STRQui: DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder); break; - case ARM64::LDRDui: - case ARM64::STRDui: + case AArch64::LDRDui: + case AArch64::STRDui: DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder); break; - case ARM64::LDRSui: - case ARM64::STRSui: + case AArch64::LDRSui: + case AArch64::STRSui: DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder); break; - case ARM64::LDRHui: - case ARM64::STRHui: + case AArch64::LDRHui: + case AArch64::STRHui: DecodeFPR16RegisterClass(Inst, Rt, Addr, Decoder); break; - case ARM64::LDRBui: - case ARM64::STRBui: + case AArch64::LDRBui: + case AArch64::STRBui: DecodeFPR8RegisterClass(Inst, Rt, Addr, Decoder); break; } @@ -915,52 +926,52 @@ static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst, switch (Inst.getOpcode()) { default: break; - case ARM64::LDRSBWpre: - case ARM64::LDRSHWpre: - case ARM64::STRBBpre: - case ARM64::LDRBBpre: - case ARM64::STRHHpre: - case ARM64::LDRHHpre: - case ARM64::STRWpre: - case ARM64::LDRWpre: - case ARM64::LDRSBWpost: - case ARM64::LDRSHWpost: - case ARM64::STRBBpost: - case ARM64::LDRBBpost: - case ARM64::STRHHpost: - case ARM64::LDRHHpost: - case ARM64::STRWpost: - case ARM64::LDRWpost: - case ARM64::LDRSBXpre: - case ARM64::LDRSHXpre: - case ARM64::STRXpre: - case ARM64::LDRSWpre: - case ARM64::LDRXpre: - case ARM64::LDRSBXpost: - case ARM64::LDRSHXpost: - case ARM64::STRXpost: - case ARM64::LDRSWpost: - case ARM64::LDRXpost: - case ARM64::LDRQpre: - case ARM64::STRQpre: - case ARM64::LDRQpost: - case ARM64::STRQpost: - case ARM64::LDRDpre: - case ARM64::STRDpre: - case ARM64::LDRDpost: - case ARM64::STRDpost: - case ARM64::LDRSpre: - case ARM64::STRSpre: - case ARM64::LDRSpost: - case ARM64::STRSpost: - case ARM64::LDRHpre: - case ARM64::STRHpre: - case ARM64::LDRHpost: - case ARM64::STRHpost: - case ARM64::LDRBpre: - case ARM64::STRBpre: - case ARM64::LDRBpost: - case ARM64::STRBpost: + case AArch64::LDRSBWpre: + case AArch64::LDRSHWpre: + case AArch64::STRBBpre: + case AArch64::LDRBBpre: + case AArch64::STRHHpre: + case AArch64::LDRHHpre: + case AArch64::STRWpre: + case AArch64::LDRWpre: + case AArch64::LDRSBWpost: + case AArch64::LDRSHWpost: + case AArch64::STRBBpost: + case AArch64::LDRBBpost: + case AArch64::STRHHpost: + case AArch64::LDRHHpost: + case AArch64::STRWpost: + case AArch64::LDRWpost: + case AArch64::LDRSBXpre: + case AArch64::LDRSHXpre: + case AArch64::STRXpre: + case AArch64::LDRSWpre: + case AArch64::LDRXpre: + case AArch64::LDRSBXpost: + case AArch64::LDRSHXpost: + case AArch64::STRXpost: + case AArch64::LDRSWpost: + case AArch64::LDRXpost: + case AArch64::LDRQpre: + case AArch64::STRQpre: + case AArch64::LDRQpost: + case AArch64::STRQpost: + case AArch64::LDRDpre: + case AArch64::STRDpre: + case AArch64::LDRDpost: + case AArch64::STRDpost: + case AArch64::LDRSpre: + case AArch64::STRSpre: + case AArch64::LDRSpost: + case AArch64::STRSpost: + case AArch64::LDRHpre: + case AArch64::STRHpre: + case AArch64::LDRHpost: + case AArch64::STRHpost: + case AArch64::LDRBpre: + case AArch64::STRBpre: + case AArch64::LDRBpost: + case AArch64::STRBpost: DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); break; } @@ -968,104 +979,104 @@ static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst, switch (Inst.getOpcode()) { default: return Fail; - case ARM64::PRFUMi: + case AArch64::PRFUMi: // Rt is an immediate in prefetch. Inst.addOperand(MCOperand::CreateImm(Rt)); break; - case ARM64::STURBBi: - case ARM64::LDURBBi: - case ARM64::LDURSBWi: - case ARM64::STURHHi: - case ARM64::LDURHHi: - case ARM64::LDURSHWi: - case ARM64::STURWi: - case ARM64::LDURWi: - case ARM64::LDTRSBWi: - case ARM64::LDTRSHWi: - case ARM64::STTRWi: - case ARM64::LDTRWi: - case ARM64::STTRHi: - case ARM64::LDTRHi: - case ARM64::LDTRBi: - case ARM64::STTRBi: - case ARM64::LDRSBWpre: - case ARM64::LDRSHWpre: - case ARM64::STRBBpre: - case ARM64::LDRBBpre: - case ARM64::STRHHpre: - case ARM64::LDRHHpre: - case ARM64::STRWpre: - case ARM64::LDRWpre: - case ARM64::LDRSBWpost: - case ARM64::LDRSHWpost: - case ARM64::STRBBpost: - case ARM64::LDRBBpost: - case ARM64::STRHHpost: - case ARM64::LDRHHpost: - case ARM64::STRWpost: - case ARM64::LDRWpost: + case AArch64::STURBBi: + case AArch64::LDURBBi: + case AArch64::LDURSBWi: + case AArch64::STURHHi: + case AArch64::LDURHHi: + case AArch64::LDURSHWi: + case AArch64::STURWi: + case AArch64::LDURWi: + case AArch64::LDTRSBWi: + case AArch64::LDTRSHWi: + case AArch64::STTRWi: + case AArch64::LDTRWi: + case AArch64::STTRHi: + case AArch64::LDTRHi: + case AArch64::LDTRBi: + case AArch64::STTRBi: + case AArch64::LDRSBWpre: + case AArch64::LDRSHWpre: + case AArch64::STRBBpre: + case AArch64::LDRBBpre: + case AArch64::STRHHpre: + case AArch64::LDRHHpre: + case AArch64::STRWpre: + case AArch64::LDRWpre: + case AArch64::LDRSBWpost: + case AArch64::LDRSHWpost: + case AArch64::STRBBpost: + case AArch64::LDRBBpost: + case AArch64::STRHHpost: + case AArch64::LDRHHpost: + case AArch64::STRWpost: + case AArch64::LDRWpost: DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); break; - case ARM64::LDURSBXi: - case ARM64::LDURSHXi: - case ARM64::LDURSWi: - case ARM64::STURXi: - case ARM64::LDURXi: - case ARM64::LDTRSBXi: - case ARM64::LDTRSHXi: - case ARM64::LDTRSWi: - case ARM64::STTRXi: - case ARM64::LDTRXi: - case ARM64::LDRSBXpre: - case ARM64::LDRSHXpre: - case ARM64::STRXpre: - case ARM64::LDRSWpre: - case ARM64::LDRXpre: - case ARM64::LDRSBXpost: - case ARM64::LDRSHXpost: - case ARM64::STRXpost: - case ARM64::LDRSWpost: - case ARM64::LDRXpost: + case AArch64::LDURSBXi: + case AArch64::LDURSHXi: + case AArch64::LDURSWi: + case AArch64::STURXi: + case AArch64::LDURXi: + case AArch64::LDTRSBXi: + case AArch64::LDTRSHXi: + case AArch64::LDTRSWi: + case AArch64::STTRXi: + case AArch64::LDTRXi: + case AArch64::LDRSBXpre: + case AArch64::LDRSHXpre: + case AArch64::STRXpre: + case AArch64::LDRSWpre: + case AArch64::LDRXpre: + case AArch64::LDRSBXpost: + case AArch64::LDRSHXpost: + case AArch64::STRXpost: + case AArch64::LDRSWpost: + case AArch64::LDRXpost: DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); break; - case ARM64::LDURQi: - case ARM64::STURQi: - case ARM64::LDRQpre: - case ARM64::STRQpre: - case ARM64::LDRQpost: - case ARM64::STRQpost: + case AArch64::LDURQi: + case AArch64::STURQi: + case AArch64::LDRQpre: + case AArch64::STRQpre: + case AArch64::LDRQpost: + case AArch64::STRQpost: DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder); break; - case ARM64::LDURDi: - case ARM64::STURDi: - case ARM64::LDRDpre: - case ARM64::STRDpre: - case ARM64::LDRDpost: - case ARM64::STRDpost: + case AArch64::LDURDi: + case AArch64::STURDi: + case AArch64::LDRDpre: + case AArch64::STRDpre: + case AArch64::LDRDpost: + case AArch64::STRDpost: DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder); break; - case ARM64::LDURSi: - case ARM64::STURSi: - case ARM64::LDRSpre: - case ARM64::STRSpre: - case ARM64::LDRSpost: - case ARM64::STRSpost: + case AArch64::LDURSi: + case AArch64::STURSi: + case AArch64::LDRSpre: + case AArch64::STRSpre: + case AArch64::LDRSpost: + case AArch64::STRSpost: DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder); break; - case ARM64::LDURHi: - case ARM64::STURHi: - case ARM64::LDRHpre: - case ARM64::STRHpre: - case ARM64::LDRHpost: - case ARM64::STRHpost: + case AArch64::LDURHi: + case AArch64::STURHi: + case AArch64::LDRHpre: + case AArch64::STRHpre: + case AArch64::LDRHpost: + case AArch64::STRHpost: DecodeFPR16RegisterClass(Inst, Rt, Addr, Decoder); break; - case ARM64::LDURBi: - case ARM64::STURBi: - case ARM64::LDRBpre: - case ARM64::STRBpre: - case ARM64::LDRBpost: - case ARM64::STRBpost: + case AArch64::LDURBi: + case AArch64::STURBi: + case AArch64::LDRBpre: + case AArch64::STRBpre: + case AArch64::LDRBpost: + case AArch64::STRBpost: DecodeFPR8RegisterClass(Inst, Rt, Addr, Decoder); break; } @@ -1096,53 +1107,53 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst, switch (Opcode) { default: return Fail; - case ARM64::STLXRW: - case ARM64::STLXRB: - case ARM64::STLXRH: - case ARM64::STXRW: - case ARM64::STXRB: - case ARM64::STXRH: + case AArch64::STLXRW: + case AArch64::STLXRB: + case AArch64::STLXRH: + case AArch64::STXRW: + case AArch64::STXRB: + case AArch64::STXRH: DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder); // FALLTHROUGH - case ARM64::LDARW: - case ARM64::LDARB: - case ARM64::LDARH: - case ARM64::LDAXRW: - case ARM64::LDAXRB: - case ARM64::LDAXRH: - case ARM64::LDXRW: - case ARM64::LDXRB: - case ARM64::LDXRH: - case ARM64::STLRW: - case ARM64::STLRB: - case ARM64::STLRH: + case AArch64::LDARW: + case AArch64::LDARB: + case AArch64::LDARH: + case AArch64::LDAXRW: + case AArch64::LDAXRB: + case AArch64::LDAXRH: + case AArch64::LDXRW: + case AArch64::LDXRB: + case AArch64::LDXRH: + case AArch64::STLRW: + case AArch64::STLRB: + case AArch64::STLRH: DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); break; - case ARM64::STLXRX: - case ARM64::STXRX: + case AArch64::STLXRX: + case AArch64::STXRX: DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder); // FALLTHROUGH - case ARM64::LDARX: - case ARM64::LDAXRX: - case ARM64::LDXRX: - case ARM64::STLRX: + case AArch64::LDARX: + case AArch64::LDAXRX: + case AArch64::LDXRX: + case AArch64::STLRX: DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); break; - case ARM64::STLXPW: - case ARM64::STXPW: + case AArch64::STLXPW: + case AArch64::STXPW: DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder); // FALLTHROUGH - case ARM64::LDAXPW: - case ARM64::LDXPW: + case AArch64::LDAXPW: + case AArch64::LDXPW: DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); DecodeGPR32RegisterClass(Inst, Rt2, Addr, Decoder); break; - case ARM64::STLXPX: - case ARM64::STXPX: + case AArch64::STLXPX: + case AArch64::STXPX: DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder); // FALLTHROUGH - case ARM64::LDAXPX: - case ARM64::LDXPX: + case AArch64::LDAXPX: + case AArch64::LDXPX: DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); DecodeGPR64RegisterClass(Inst, Rt2, Addr, Decoder); break; @@ -1151,8 +1162,8 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst, DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); // You shouldn't load to the same register twice in an instruction... - if ((Opcode == ARM64::LDAXPW || Opcode == ARM64::LDXPW || - Opcode == ARM64::LDAXPX || Opcode == ARM64::LDXPX) && + if ((Opcode == AArch64::LDAXPW || Opcode == AArch64::LDXPW || + Opcode == AArch64::LDAXPX || Opcode == AArch64::LDXPX) && Rt == Rt2) return SoftFail; @@ -1180,28 +1191,28 @@ static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn, switch (Opcode) { default: break; - case ARM64::LDPXpost: - case ARM64::STPXpost: - case ARM64::LDPSWpost: - case ARM64::LDPXpre: - case ARM64::STPXpre: - case ARM64::LDPSWpre: - case ARM64::LDPWpost: - case ARM64::STPWpost: - case ARM64::LDPWpre: - case ARM64::STPWpre: - case ARM64::LDPQpost: - case ARM64::STPQpost: - case ARM64::LDPQpre: - case ARM64::STPQpre: - case ARM64::LDPDpost: - case ARM64::STPDpost: - case ARM64::LDPDpre: - case ARM64::STPDpre: - case ARM64::LDPSpost: - case ARM64::STPSpost: - case ARM64::LDPSpre: - case ARM64::STPSpre: + case AArch64::LDPXpost: + case AArch64::STPXpost: + case AArch64::LDPSWpost: + case AArch64::LDPXpre: + case AArch64::STPXpre: + case AArch64::LDPSWpre: + case AArch64::LDPWpost: + case AArch64::STPWpost: + case AArch64::LDPWpre: + case AArch64::STPWpre: + case AArch64::LDPQpost: + case AArch64::STPQpost: + case AArch64::LDPQpre: + case AArch64::STPQpre: + case AArch64::LDPDpost: + case AArch64::STPDpost: + case AArch64::LDPDpre: + case AArch64::STPDpre: + case AArch64::LDPSpost: + case AArch64::STPSpost: + case AArch64::LDPSpre: + case AArch64::STPSpre: DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); break; } @@ -1209,65 +1220,65 @@ static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn, switch (Opcode) { default: return Fail; - case ARM64::LDPXpost: - case ARM64::STPXpost: - case ARM64::LDPSWpost: - case ARM64::LDPXpre: - case ARM64::STPXpre: - case ARM64::LDPSWpre: + case AArch64::LDPXpost: + case AArch64::STPXpost: + case AArch64::LDPSWpost: + case AArch64::LDPXpre: + case AArch64::STPXpre: + case AArch64::LDPSWpre: NeedsDisjointWritebackTransfer = true; // Fallthrough - case ARM64::LDNPXi: - case ARM64::STNPXi: - case ARM64::LDPXi: - case ARM64::STPXi: - case ARM64::LDPSWi: + case AArch64::LDNPXi: + case AArch64::STNPXi: + case AArch64::LDPXi: + case AArch64::STPXi: + case AArch64::LDPSWi: DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); DecodeGPR64RegisterClass(Inst, Rt2, Addr, Decoder); break; - case ARM64::LDPWpost: - case ARM64::STPWpost: - case ARM64::LDPWpre: - case ARM64::STPWpre: + case AArch64::LDPWpost: + case AArch64::STPWpost: + case AArch64::LDPWpre: + case AArch64::STPWpre: NeedsDisjointWritebackTransfer = true; // Fallthrough - case ARM64::LDNPWi: - case ARM64::STNPWi: - case ARM64::LDPWi: - case ARM64::STPWi: + case AArch64::LDNPWi: + case AArch64::STNPWi: + case AArch64::LDPWi: + case AArch64::STPWi: DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); DecodeGPR32RegisterClass(Inst, Rt2, Addr, Decoder); break; - case ARM64::LDNPQi: - case ARM64::STNPQi: - case ARM64::LDPQpost: - case ARM64::STPQpost: - case ARM64::LDPQi: - case ARM64::STPQi: - case ARM64::LDPQpre: - case ARM64::STPQpre: + case AArch64::LDNPQi: + case AArch64::STNPQi: + case AArch64::LDPQpost: + case AArch64::STPQpost: + case AArch64::LDPQi: + case AArch64::STPQi: + case AArch64::LDPQpre: + case AArch64::STPQpre: DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder); DecodeFPR128RegisterClass(Inst, Rt2, Addr, Decoder); break; - case ARM64::LDNPDi: - case ARM64::STNPDi: - case ARM64::LDPDpost: - case ARM64::STPDpost: - case ARM64::LDPDi: - case ARM64::STPDi: - case ARM64::LDPDpre: - case ARM64::STPDpre: + case AArch64::LDNPDi: + case AArch64::STNPDi: + case AArch64::LDPDpost: + case AArch64::STPDpost: + case AArch64::LDPDi: + case AArch64::STPDi: + case AArch64::LDPDpre: + case AArch64::STPDpre: DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder); DecodeFPR64RegisterClass(Inst, Rt2, Addr, Decoder); break; - case ARM64::LDNPSi: - case ARM64::STNPSi: - case ARM64::LDPSpost: - case ARM64::STPSpost: - case ARM64::LDPSi: - case ARM64::STPSi: - case ARM64::LDPSpre: - case ARM64::STPSpre: + case AArch64::LDNPSi: + case AArch64::STNPSi: + case AArch64::LDPSpost: + case AArch64::STPSpost: + case AArch64::LDPSi: + case AArch64::STPSi: + case AArch64::LDPSpre: + case AArch64::STPSpre: DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder); DecodeFPR32RegisterClass(Inst, Rt2, Addr, Decoder); break; @@ -1303,38 +1314,38 @@ static DecodeStatus DecodeAddSubERegInstruction(llvm::MCInst &Inst, switch (Inst.getOpcode()) { default: return Fail; - case ARM64::ADDWrx: - case ARM64::SUBWrx: + case AArch64::ADDWrx: + case AArch64::SUBWrx: DecodeGPR32spRegisterClass(Inst, Rd, Addr, Decoder); DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder); DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder); break; - case ARM64::ADDSWrx: - case ARM64::SUBSWrx: + case AArch64::ADDSWrx: + case AArch64::SUBSWrx: DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder); DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder); DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder); break; - case ARM64::ADDXrx: - case ARM64::SUBXrx: + case AArch64::ADDXrx: + case AArch64::SUBXrx: DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder); DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder); break; - case ARM64::ADDSXrx: - case ARM64::SUBSXrx: + case AArch64::ADDSXrx: + case AArch64::SUBSXrx: DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder); break; - case ARM64::ADDXrx64: - case ARM64::SUBXrx64: + case AArch64::ADDXrx64: + case AArch64::SUBXrx64: DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder); DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder); break; - case ARM64::SUBSXrx64: - case ARM64::ADDSXrx64: + case AArch64::SUBSXrx64: + case AArch64::ADDSXrx64: DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder); @@ -1354,22 +1365,22 @@ static DecodeStatus DecodeLogicalImmInstruction(llvm::MCInst &Inst, unsigned imm; if (Datasize) { - if (Inst.getOpcode() == ARM64::ANDSXri) + if (Inst.getOpcode() == AArch64::ANDSXri) DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); else DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder); DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder); imm = fieldFromInstruction(insn, 10, 13); - if (!ARM64_AM::isValidDecodeLogicalImmediate(imm, 64)) + if (!AArch64_AM::isValidDecodeLogicalImmediate(imm, 64)) return Fail; } else { - if (Inst.getOpcode() == ARM64::ANDSWri) + if (Inst.getOpcode() == AArch64::ANDSWri) DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder); else DecodeGPR32spRegisterClass(Inst, Rd, Addr, Decoder); DecodeGPR32RegisterClass(Inst, Rn, Addr, Decoder); imm = fieldFromInstruction(insn, 10, 12); - if (!ARM64_AM::isValidDecodeLogicalImmediate(imm, 32)) + if (!AArch64_AM::isValidDecodeLogicalImmediate(imm, 32)) return Fail; } Inst.addOperand(MCOperand::CreateImm(imm)); @@ -1384,7 +1395,7 @@ static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn, unsigned imm = fieldFromInstruction(insn, 16, 3) << 5; imm |= fieldFromInstruction(insn, 5, 5); - if (Inst.getOpcode() == ARM64::MOVID) + if (Inst.getOpcode() == AArch64::MOVID) DecodeFPR64RegisterClass(Inst, Rd, Addr, Decoder); else DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder); @@ -1394,20 +1405,20 @@ static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn, switch (Inst.getOpcode()) { default: break; - case ARM64::MOVIv4i16: - case ARM64::MOVIv8i16: - case ARM64::MVNIv4i16: - case ARM64::MVNIv8i16: - case ARM64::MOVIv2i32: - case ARM64::MOVIv4i32: - case ARM64::MVNIv2i32: - case ARM64::MVNIv4i32: + case AArch64::MOVIv4i16: + case AArch64::MOVIv8i16: + case AArch64::MVNIv4i16: + case AArch64::MVNIv8i16: + case AArch64::MOVIv2i32: + case AArch64::MOVIv4i32: + case AArch64::MVNIv2i32: + case AArch64::MVNIv4i32: Inst.addOperand(MCOperand::CreateImm((cmode & 6) << 2)); break; - case ARM64::MOVIv2s_msl: - case ARM64::MOVIv4s_msl: - case ARM64::MVNIv2s_msl: - case ARM64::MVNIv4s_msl: + case AArch64::MOVIv2s_msl: + case AArch64::MOVIv4s_msl: + case AArch64::MVNIv2s_msl: + case AArch64::MVNIv4s_msl: Inst.addOperand(MCOperand::CreateImm(cmode & 1 ? 0x110 : 0x108)); break; } @@ -1438,8 +1449,8 @@ static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn, unsigned Rd = fieldFromInstruction(insn, 0, 5); int64_t imm = fieldFromInstruction(insn, 5, 19) << 2; imm |= fieldFromInstruction(insn, 29, 2); - const ARM64Disassembler *Dis = - static_cast(Decoder); + const AArch64Disassembler *Dis = + static_cast(Decoder); // Sign-extend the 21-bit immediate. if (imm & (1 << (21 - 1))) @@ -1462,8 +1473,8 @@ static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn, unsigned ShifterVal = (Imm >> 12) & 3; unsigned ImmVal = Imm & 0xFFF; - const ARM64Disassembler *Dis = - static_cast(Decoder); + const AArch64Disassembler *Dis = + static_cast(Decoder); if (ShifterVal != 0 && ShifterVal != 1) return Fail; @@ -1492,8 +1503,8 @@ static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn, uint64_t Addr, const void *Decoder) { int64_t imm = fieldFromInstruction(insn, 0, 26); - const ARM64Disassembler *Dis = - static_cast(Decoder); + const AArch64Disassembler *Dis = + static_cast(Decoder); // Sign-extend the 26-bit immediate. if (imm & (1 << (26 - 1))) @@ -1518,7 +1529,7 @@ static DecodeStatus DecodeSystemPStateInstruction(llvm::MCInst &Inst, Inst.addOperand(MCOperand::CreateImm(crm)); bool ValidNamed; - (void)ARM64PState::PStateMapper().toString(pstate_field, ValidNamed); + (void)AArch64PState::PStateMapper().toString(pstate_field, ValidNamed); return ValidNamed ? Success : Fail; } @@ -1529,8 +1540,8 @@ static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn, uint64_t bit = fieldFromInstruction(insn, 31, 1) << 5; bit |= fieldFromInstruction(insn, 19, 5); int64_t dst = fieldFromInstruction(insn, 5, 14); - const ARM64Disassembler *Dis = - static_cast(Decoder); + const AArch64Disassembler *Dis = + static_cast(Decoder); // Sign-extend 14-bit immediate. if (dst & (1 << (14 - 1))) diff --git a/lib/Target/ARM64/Disassembler/ARM64Disassembler.h b/lib/Target/AArch64/Disassembler/AArch64Disassembler.h similarity index 75% rename from lib/Target/ARM64/Disassembler/ARM64Disassembler.h rename to lib/Target/AArch64/Disassembler/AArch64Disassembler.h index 8989925f36b8..68d4867977b0 100644 --- a/lib/Target/ARM64/Disassembler/ARM64Disassembler.h +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.h @@ -1,4 +1,4 @@ -//===- ARM64Disassembler.h - Disassembler for ARM64 -------------*- C++ -*-===// +//===- AArch64Disassembler.h - Disassembler for AArch64 ---------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARM64DISASSEMBLER_H -#define ARM64DISASSEMBLER_H +#ifndef AArch64DISASSEMBLER_H +#define AArch64DISASSEMBLER_H #include "llvm/MC/MCDisassembler.h" @@ -21,12 +21,12 @@ class MCInst; class MemoryObject; class raw_ostream; -class ARM64Disassembler : public MCDisassembler { +class AArch64Disassembler : public MCDisassembler { public: - ARM64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx) + AArch64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : MCDisassembler(STI, Ctx) {} - ~ARM64Disassembler() {} + ~AArch64Disassembler() {} /// getInstruction - See MCDisassembler. MCDisassembler::DecodeStatus diff --git a/lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.cpp b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp similarity index 86% rename from lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.cpp rename to lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp index 2f8e516d185d..24663684a3fd 100644 --- a/lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp @@ -1,4 +1,4 @@ -//===- ARM64ExternalSymbolizer.cpp - Symbolizer for ARM64 -------*- C++ -*-===// +//===- AArch64ExternalSymbolizer.cpp - Symbolizer for AArch64 ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "ARM64ExternalSymbolizer.h" -#include "ARM64Subtarget.h" -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "Utils/ARM64BaseInfo.h" +#include "AArch64ExternalSymbolizer.h" +#include "AArch64Subtarget.h" +#include "MCTargetDesc/AArch64AddressingModes.h" +#include "Utils/AArch64BaseInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -19,7 +19,7 @@ using namespace llvm; -#define DEBUG_TYPE "arm64-disassembler" +#define DEBUG_TYPE "aarch64-disassembler" static MCSymbolRefExpr::VariantKind getVariant(uint64_t LLVMDisassembler_VariantKind) { @@ -58,14 +58,9 @@ getVariant(uint64_t LLVMDisassembler_VariantKind) { /// a symbol look up is done to see it is returns a specific reference type /// to add to the comment stream. This function returns Success if it adds /// an operand to the MCInst and Fail otherwise. -bool ARM64ExternalSymbolizer::tryAddingSymbolicOperand( - MCInst &MI, - raw_ostream &CommentStream, - int64_t Value, - uint64_t Address, - bool IsBranch, - uint64_t Offset, - uint64_t InstSize) { +bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand( + MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address, + bool IsBranch, uint64_t Offset, uint64_t InstSize) { // FIXME: This method shares a lot of code with // MCExternalSymbolizer::tryAddingSymbolicOperand. It may be possible // refactor the MCExternalSymbolizer interface to allow more of this @@ -94,7 +89,7 @@ bool ARM64ExternalSymbolizer::tryAddingSymbolicOperand( else if (ReferenceType == LLVMDisassembler_ReferenceType_Out_Objc_Message) CommentStream << "Objc message: " << ReferenceName; - } else if (MI.getOpcode() == ARM64::ADRP) { + } else if (MI.getOpcode() == AArch64::ADRP) { ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP; // otool expects the fully encoded ADRP instruction to be passed in as // the value here, so reconstruct it: @@ -107,19 +102,19 @@ bool ARM64ExternalSymbolizer::tryAddingSymbolicOperand( &ReferenceName); CommentStream << format("0x%llx", 0xfffffffffffff000LL & (Address + Value)); - } else if (MI.getOpcode() == ARM64::ADDXri || - MI.getOpcode() == ARM64::LDRXui || - MI.getOpcode() == ARM64::LDRXl || - MI.getOpcode() == ARM64::ADR) { - if (MI.getOpcode() == ARM64::ADDXri) + } else if (MI.getOpcode() == AArch64::ADDXri || + MI.getOpcode() == AArch64::LDRXui || + MI.getOpcode() == AArch64::LDRXl || + MI.getOpcode() == AArch64::ADR) { + if (MI.getOpcode() == AArch64::ADDXri) ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri; - else if (MI.getOpcode() == ARM64::LDRXui) + else if (MI.getOpcode() == AArch64::LDRXui) ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui; - if (MI.getOpcode() == ARM64::LDRXl) { + if (MI.getOpcode() == AArch64::LDRXl) { ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl; SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, &ReferenceName); - } else if (MI.getOpcode() == ARM64::ADR) { + } else if (MI.getOpcode() == AArch64::ADR) { ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR; SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, &ReferenceName); @@ -128,7 +123,7 @@ bool ARM64ExternalSymbolizer::tryAddingSymbolicOperand( // otool expects the fully encoded ADD/LDR instruction to be passed in // as the value here, so reconstruct it: unsigned EncodedInst = - MI.getOpcode() == ARM64::ADDXri ? 0x91000000: 0xF9400000; + MI.getOpcode() == AArch64::ADDXri ? 0x91000000: 0xF9400000; EncodedInst |= Value << 10; // imm12 [+ shift:2 for ADD] EncodedInst |= MCRI.getEncodingValue(MI.getOperand(1).getReg()) << 5; // Rn diff --git a/lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.h b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h similarity index 50% rename from lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.h rename to lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h index 45f07a5e2587..171d31c48cd7 100644 --- a/lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.h +++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h @@ -1,4 +1,4 @@ -//===- ARM64ExternalSymbolizer.h - Symbolizer for ARM64 ---------*- C++ -*-===// +//===- AArch64ExternalSymbolizer.h - Symbolizer for AArch64 -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,25 +7,26 @@ // //===----------------------------------------------------------------------===// // -// Symbolize ARM64 assembly code during disassembly using callbacks. +// Symbolize AArch64 assembly code during disassembly using callbacks. // //===----------------------------------------------------------------------===// -#ifndef ARM64EXTERNALSYMBOLIZER_H -#define ARM64EXTERNALSYMBOLIZER_H +#ifndef AArch64EXTERNALSYMBOLIZER_H +#define AArch64EXTERNALSYMBOLIZER_H #include "llvm/MC/MCExternalSymbolizer.h" namespace llvm { -class ARM64ExternalSymbolizer : public MCExternalSymbolizer { +class AArch64ExternalSymbolizer : public MCExternalSymbolizer { public: - ARM64ExternalSymbolizer(MCContext &Ctx, - std::unique_ptr RelInfo, - LLVMOpInfoCallback GetOpInfo, - LLVMSymbolLookupCallback SymbolLookUp, void *DisInfo) - : MCExternalSymbolizer(Ctx, std::move(RelInfo), GetOpInfo, SymbolLookUp, - DisInfo) {} + AArch64ExternalSymbolizer(MCContext &Ctx, + std::unique_ptr RelInfo, + LLVMOpInfoCallback GetOpInfo, + LLVMSymbolLookupCallback SymbolLookUp, + void *DisInfo) + : MCExternalSymbolizer(Ctx, std::move(RelInfo), GetOpInfo, SymbolLookUp, + DisInfo) {} bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address, bool IsBranch, diff --git a/lib/Target/ARM64/Disassembler/CMakeLists.txt b/lib/Target/AArch64/Disassembler/CMakeLists.txt similarity index 66% rename from lib/Target/ARM64/Disassembler/CMakeLists.txt rename to lib/Target/AArch64/Disassembler/CMakeLists.txt index 43ade66be144..be4ccad6d1b9 100644 --- a/lib/Target/ARM64/Disassembler/CMakeLists.txt +++ b/lib/Target/AArch64/Disassembler/CMakeLists.txt @@ -1,8 +1,8 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) -add_llvm_library(LLVMARM64Disassembler - ARM64Disassembler.cpp - ARM64ExternalSymbolizer.cpp +add_llvm_library(LLVMAArch64Disassembler + AArch64Disassembler.cpp + AArch64ExternalSymbolizer.cpp ) # workaround for hanging compilation on MSVC8, 9 and 10 #if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 ) @@ -11,4 +11,4 @@ add_llvm_library(LLVMARM64Disassembler # PROPERTY COMPILE_FLAGS "/Od" # ) #endif() -add_dependencies(LLVMARM64Disassembler ARM64CommonTableGen) +add_dependencies(LLVMAArch64Disassembler AArch64CommonTableGen) diff --git a/lib/Target/ARM64/Disassembler/LLVMBuild.txt b/lib/Target/AArch64/Disassembler/LLVMBuild.txt similarity index 71% rename from lib/Target/ARM64/Disassembler/LLVMBuild.txt rename to lib/Target/AArch64/Disassembler/LLVMBuild.txt index 5bbe88ddb49a..a4224f4a2f53 100644 --- a/lib/Target/ARM64/Disassembler/LLVMBuild.txt +++ b/lib/Target/AArch64/Disassembler/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/ARM64/Disassembler/LLVMBuild.txt ------------*- Conf -*--===; +;===- ./lib/Target/AArch64/Disassembler/LLVMBuild.txt ------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -17,7 +17,7 @@ [component_0] type = Library -name = ARM64Disassembler -parent = ARM64 -required_libraries = ARM64Info ARM64Utils MC Support -add_to_library_groups = ARM64 +name = AArch64Disassembler +parent = AArch64 +required_libraries = AArch64Info AArch64Utils MC Support +add_to_library_groups = AArch64 diff --git a/lib/Target/ARM64/Disassembler/Makefile b/lib/Target/AArch64/Disassembler/Makefile similarity index 81% rename from lib/Target/ARM64/Disassembler/Makefile rename to lib/Target/AArch64/Disassembler/Makefile index 479d00c2494b..741bb817a633 100644 --- a/lib/Target/ARM64/Disassembler/Makefile +++ b/lib/Target/AArch64/Disassembler/Makefile @@ -1,4 +1,4 @@ -##===- lib/Target/ARM64/Disassembler/Makefile --------------*- Makefile -*-===## +##===- lib/Target/AArch64/Disassembler/Makefile ------------*- Makefile -*-===## # # The LLVM Compiler Infrastructure # @@ -8,7 +8,7 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. -LIBRARYNAME = LLVMARM64Disassembler +LIBRARYNAME = LLVMAArch64Disassembler # Hack: we need to include 'main' arm target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp new file mode 100644 index 000000000000..f484a5b1bdcc --- /dev/null +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -0,0 +1,1316 @@ +//==-- AArch64InstPrinter.cpp - Convert AArch64 MCInst to assembly syntax --==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an AArch64 MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#include "AArch64InstPrinter.h" +#include "MCTargetDesc/AArch64AddressingModes.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "asm-printer" + +#define GET_INSTRUCTION_NAME +#define PRINT_ALIAS_INSTR +#include "AArch64GenAsmWriter.inc" +#define GET_INSTRUCTION_NAME +#define PRINT_ALIAS_INSTR +#include "AArch64GenAsmWriter1.inc" + +AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI) + : MCInstPrinter(MAI, MII, MRI) { + // Initialize the set of available features. + setAvailableFeatures(STI.getFeatureBits()); +} + +AArch64AppleInstPrinter::AArch64AppleInstPrinter(const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI) + : AArch64InstPrinter(MAI, MII, MRI, STI) {} + +void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { + // This is for .cfi directives. + OS << getRegisterName(RegNo); +} + +void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { + // Check for special encodings and print the canonical alias instead. + + unsigned Opcode = MI->getOpcode(); + + if (Opcode == AArch64::SYSxt) + if (printSysAlias(MI, O)) { + printAnnotation(O, Annot); + return; + } + + // SBFM/UBFM should print to a nicer aliased form if possible. + if (Opcode == AArch64::SBFMXri || Opcode == AArch64::SBFMWri || + Opcode == AArch64::UBFMXri || Opcode == AArch64::UBFMWri) { + const MCOperand &Op0 = MI->getOperand(0); + const MCOperand &Op1 = MI->getOperand(1); + const MCOperand &Op2 = MI->getOperand(2); + const MCOperand &Op3 = MI->getOperand(3); + + bool IsSigned = (Opcode == AArch64::SBFMXri || Opcode == AArch64::SBFMWri); + bool Is64Bit = (Opcode == AArch64::SBFMXri || Opcode == AArch64::UBFMXri); + if (Op2.isImm() && Op2.getImm() == 0 && Op3.isImm()) { + const char *AsmMnemonic = nullptr; + + switch (Op3.getImm()) { + default: + break; + case 7: + if (IsSigned) + AsmMnemonic = "sxtb"; + else if (!Is64Bit) + AsmMnemonic = "uxtb"; + break; + case 15: + if (IsSigned) + AsmMnemonic = "sxth"; + else if (!Is64Bit) + AsmMnemonic = "uxth"; + break; + case 31: + // *xtw is only valid for signed 64-bit operations. + if (Is64Bit && IsSigned) + AsmMnemonic = "sxtw"; + break; + } + + if (AsmMnemonic) { + O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg()) + << ", " << getRegisterName(getWRegFromXReg(Op1.getReg())); + printAnnotation(O, Annot); + return; + } + } + + // All immediate shifts are aliases, implemented using the Bitfield + // instruction. In all cases the immediate shift amount shift must be in + // the range 0 to (reg.size -1). + if (Op2.isImm() && Op3.isImm()) { + const char *AsmMnemonic = nullptr; + int shift = 0; + int64_t immr = Op2.getImm(); + int64_t imms = Op3.getImm(); + if (Opcode == AArch64::UBFMWri && imms != 0x1F && ((imms + 1) == immr)) { + AsmMnemonic = "lsl"; + shift = 31 - imms; + } else if (Opcode == AArch64::UBFMXri && imms != 0x3f && + ((imms + 1 == immr))) { + AsmMnemonic = "lsl"; + shift = 63 - imms; + } else if (Opcode == AArch64::UBFMWri && imms == 0x1f) { + AsmMnemonic = "lsr"; + shift = immr; + } else if (Opcode == AArch64::UBFMXri && imms == 0x3f) { + AsmMnemonic = "lsr"; + shift = immr; + } else if (Opcode == AArch64::SBFMWri && imms == 0x1f) { + AsmMnemonic = "asr"; + shift = immr; + } else if (Opcode == AArch64::SBFMXri && imms == 0x3f) { + AsmMnemonic = "asr"; + shift = immr; + } + if (AsmMnemonic) { + O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg()) + << ", " << getRegisterName(Op1.getReg()) << ", #" << shift; + printAnnotation(O, Annot); + return; + } + } + + // SBFIZ/UBFIZ aliases + if (Op2.getImm() > Op3.getImm()) { + O << '\t' << (IsSigned ? "sbfiz" : "ubfiz") << '\t' + << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op1.getReg()) + << ", #" << (Is64Bit ? 64 : 32) - Op2.getImm() << ", #" << Op3.getImm() + 1; + printAnnotation(O, Annot); + return; + } + + // Otherwise SBFX/UBFX is the preferred form + O << '\t' << (IsSigned ? "sbfx" : "ubfx") << '\t' + << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op1.getReg()) + << ", #" << Op2.getImm() << ", #" << Op3.getImm() - Op2.getImm() + 1; + printAnnotation(O, Annot); + return; + } + + if (Opcode == AArch64::BFMXri || Opcode == AArch64::BFMWri) { + const MCOperand &Op0 = MI->getOperand(0); // Op1 == Op0 + const MCOperand &Op2 = MI->getOperand(2); + int ImmR = MI->getOperand(3).getImm(); + int ImmS = MI->getOperand(4).getImm(); + + // BFI alias + if (ImmS < ImmR) { + int BitWidth = Opcode == AArch64::BFMXri ? 64 : 32; + int LSB = (BitWidth - ImmR) % BitWidth; + int Width = ImmS + 1; + O << "\tbfi\t" << getRegisterName(Op0.getReg()) << ", " + << getRegisterName(Op2.getReg()) << ", #" << LSB << ", #" << Width; + printAnnotation(O, Annot); + return; + } + + int LSB = ImmR; + int Width = ImmS - ImmR + 1; + // Otherwise BFXIL the preferred form + O << "\tbfxil\t" + << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op2.getReg()) + << ", #" << LSB << ", #" << Width; + printAnnotation(O, Annot); + return; + } + + // Symbolic operands for MOVZ, MOVN and MOVK already imply a shift + // (e.g. :gottprel_g1: is always going to be "lsl #16") so it should not be + // printed. + if ((Opcode == AArch64::MOVZXi || Opcode == AArch64::MOVZWi || + Opcode == AArch64::MOVNXi || Opcode == AArch64::MOVNWi) && + MI->getOperand(1).isExpr()) { + if (Opcode == AArch64::MOVZXi || Opcode == AArch64::MOVZWi) + O << "\tmovz\t"; + else + O << "\tmovn\t"; + + O << getRegisterName(MI->getOperand(0).getReg()) << ", #" + << *MI->getOperand(1).getExpr(); + return; + } + + if ((Opcode == AArch64::MOVKXi || Opcode == AArch64::MOVKWi) && + MI->getOperand(2).isExpr()) { + O << "\tmovk\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #" + << *MI->getOperand(2).getExpr(); + return; + } + + if (!printAliasInstr(MI, O)) + printInstruction(MI, O); + + printAnnotation(O, Annot); +} + +static bool isTblTbxInstruction(unsigned Opcode, StringRef &Layout, + bool &IsTbx) { + switch (Opcode) { + case AArch64::TBXv8i8One: + case AArch64::TBXv8i8Two: + case AArch64::TBXv8i8Three: + case AArch64::TBXv8i8Four: + IsTbx = true; + Layout = ".8b"; + return true; + case AArch64::TBLv8i8One: + case AArch64::TBLv8i8Two: + case AArch64::TBLv8i8Three: + case AArch64::TBLv8i8Four: + IsTbx = false; + Layout = ".8b"; + return true; + case AArch64::TBXv16i8One: + case AArch64::TBXv16i8Two: + case AArch64::TBXv16i8Three: + case AArch64::TBXv16i8Four: + IsTbx = true; + Layout = ".16b"; + return true; + case AArch64::TBLv16i8One: + case AArch64::TBLv16i8Two: + case AArch64::TBLv16i8Three: + case AArch64::TBLv16i8Four: + IsTbx = false; + Layout = ".16b"; + return true; + default: + return false; + } +} + +struct LdStNInstrDesc { + unsigned Opcode; + const char *Mnemonic; + const char *Layout; + int ListOperand; + bool HasLane; + int NaturalOffset; +}; + +static LdStNInstrDesc LdStNInstInfo[] = { + { AArch64::LD1i8, "ld1", ".b", 1, true, 0 }, + { AArch64::LD1i16, "ld1", ".h", 1, true, 0 }, + { AArch64::LD1i32, "ld1", ".s", 1, true, 0 }, + { AArch64::LD1i64, "ld1", ".d", 1, true, 0 }, + { AArch64::LD1i8_POST, "ld1", ".b", 2, true, 1 }, + { AArch64::LD1i16_POST, "ld1", ".h", 2, true, 2 }, + { AArch64::LD1i32_POST, "ld1", ".s", 2, true, 4 }, + { AArch64::LD1i64_POST, "ld1", ".d", 2, true, 8 }, + { AArch64::LD1Rv16b, "ld1r", ".16b", 0, false, 0 }, + { AArch64::LD1Rv8h, "ld1r", ".8h", 0, false, 0 }, + { AArch64::LD1Rv4s, "ld1r", ".4s", 0, false, 0 }, + { AArch64::LD1Rv2d, "ld1r", ".2d", 0, false, 0 }, + { AArch64::LD1Rv8b, "ld1r", ".8b", 0, false, 0 }, + { AArch64::LD1Rv4h, "ld1r", ".4h", 0, false, 0 }, + { AArch64::LD1Rv2s, "ld1r", ".2s", 0, false, 0 }, + { AArch64::LD1Rv1d, "ld1r", ".1d", 0, false, 0 }, + { AArch64::LD1Rv16b_POST, "ld1r", ".16b", 1, false, 1 }, + { AArch64::LD1Rv8h_POST, "ld1r", ".8h", 1, false, 2 }, + { AArch64::LD1Rv4s_POST, "ld1r", ".4s", 1, false, 4 }, + { AArch64::LD1Rv2d_POST, "ld1r", ".2d", 1, false, 8 }, + { AArch64::LD1Rv8b_POST, "ld1r", ".8b", 1, false, 1 }, + { AArch64::LD1Rv4h_POST, "ld1r", ".4h", 1, false, 2 }, + { AArch64::LD1Rv2s_POST, "ld1r", ".2s", 1, false, 4 }, + { AArch64::LD1Rv1d_POST, "ld1r", ".1d", 1, false, 8 }, + { AArch64::LD1Onev16b, "ld1", ".16b", 0, false, 0 }, + { AArch64::LD1Onev8h, "ld1", ".8h", 0, false, 0 }, + { AArch64::LD1Onev4s, "ld1", ".4s", 0, false, 0 }, + { AArch64::LD1Onev2d, "ld1", ".2d", 0, false, 0 }, + { AArch64::LD1Onev8b, "ld1", ".8b", 0, false, 0 }, + { AArch64::LD1Onev4h, "ld1", ".4h", 0, false, 0 }, + { AArch64::LD1Onev2s, "ld1", ".2s", 0, false, 0 }, + { AArch64::LD1Onev1d, "ld1", ".1d", 0, false, 0 }, + { AArch64::LD1Onev16b_POST, "ld1", ".16b", 1, false, 16 }, + { AArch64::LD1Onev8h_POST, "ld1", ".8h", 1, false, 16 }, + { AArch64::LD1Onev4s_POST, "ld1", ".4s", 1, false, 16 }, + { AArch64::LD1Onev2d_POST, "ld1", ".2d", 1, false, 16 }, + { AArch64::LD1Onev8b_POST, "ld1", ".8b", 1, false, 8 }, + { AArch64::LD1Onev4h_POST, "ld1", ".4h", 1, false, 8 }, + { AArch64::LD1Onev2s_POST, "ld1", ".2s", 1, false, 8 }, + { AArch64::LD1Onev1d_POST, "ld1", ".1d", 1, false, 8 }, + { AArch64::LD1Twov16b, "ld1", ".16b", 0, false, 0 }, + { AArch64::LD1Twov8h, "ld1", ".8h", 0, false, 0 }, + { AArch64::LD1Twov4s, "ld1", ".4s", 0, false, 0 }, + { AArch64::LD1Twov2d, "ld1", ".2d", 0, false, 0 }, + { AArch64::LD1Twov8b, "ld1", ".8b", 0, false, 0 }, + { AArch64::LD1Twov4h, "ld1", ".4h", 0, false, 0 }, + { AArch64::LD1Twov2s, "ld1", ".2s", 0, false, 0 }, + { AArch64::LD1Twov1d, "ld1", ".1d", 0, false, 0 }, + { AArch64::LD1Twov16b_POST, "ld1", ".16b", 1, false, 32 }, + { AArch64::LD1Twov8h_POST, "ld1", ".8h", 1, false, 32 }, + { AArch64::LD1Twov4s_POST, "ld1", ".4s", 1, false, 32 }, + { AArch64::LD1Twov2d_POST, "ld1", ".2d", 1, false, 32 }, + { AArch64::LD1Twov8b_POST, "ld1", ".8b", 1, false, 16 }, + { AArch64::LD1Twov4h_POST, "ld1", ".4h", 1, false, 16 }, + { AArch64::LD1Twov2s_POST, "ld1", ".2s", 1, false, 16 }, + { AArch64::LD1Twov1d_POST, "ld1", ".1d", 1, false, 16 }, + { AArch64::LD1Threev16b, "ld1", ".16b", 0, false, 0 }, + { AArch64::LD1Threev8h, "ld1", ".8h", 0, false, 0 }, + { AArch64::LD1Threev4s, "ld1", ".4s", 0, false, 0 }, + { AArch64::LD1Threev2d, "ld1", ".2d", 0, false, 0 }, + { AArch64::LD1Threev8b, "ld1", ".8b", 0, false, 0 }, + { AArch64::LD1Threev4h, "ld1", ".4h", 0, false, 0 }, + { AArch64::LD1Threev2s, "ld1", ".2s", 0, false, 0 }, + { AArch64::LD1Threev1d, "ld1", ".1d", 0, false, 0 }, + { AArch64::LD1Threev16b_POST, "ld1", ".16b", 1, false, 48 }, + { AArch64::LD1Threev8h_POST, "ld1", ".8h", 1, false, 48 }, + { AArch64::LD1Threev4s_POST, "ld1", ".4s", 1, false, 48 }, + { AArch64::LD1Threev2d_POST, "ld1", ".2d", 1, false, 48 }, + { AArch64::LD1Threev8b_POST, "ld1", ".8b", 1, false, 24 }, + { AArch64::LD1Threev4h_POST, "ld1", ".4h", 1, false, 24 }, + { AArch64::LD1Threev2s_POST, "ld1", ".2s", 1, false, 24 }, + { AArch64::LD1Threev1d_POST, "ld1", ".1d", 1, false, 24 }, + { AArch64::LD1Fourv16b, "ld1", ".16b", 0, false, 0 }, + { AArch64::LD1Fourv8h, "ld1", ".8h", 0, false, 0 }, + { AArch64::LD1Fourv4s, "ld1", ".4s", 0, false, 0 }, + { AArch64::LD1Fourv2d, "ld1", ".2d", 0, false, 0 }, + { AArch64::LD1Fourv8b, "ld1", ".8b", 0, false, 0 }, + { AArch64::LD1Fourv4h, "ld1", ".4h", 0, false, 0 }, + { AArch64::LD1Fourv2s, "ld1", ".2s", 0, false, 0 }, + { AArch64::LD1Fourv1d, "ld1", ".1d", 0, false, 0 }, + { AArch64::LD1Fourv16b_POST, "ld1", ".16b", 1, false, 64 }, + { AArch64::LD1Fourv8h_POST, "ld1", ".8h", 1, false, 64 }, + { AArch64::LD1Fourv4s_POST, "ld1", ".4s", 1, false, 64 }, + { AArch64::LD1Fourv2d_POST, "ld1", ".2d", 1, false, 64 }, + { AArch64::LD1Fourv8b_POST, "ld1", ".8b", 1, false, 32 }, + { AArch64::LD1Fourv4h_POST, "ld1", ".4h", 1, false, 32 }, + { AArch64::LD1Fourv2s_POST, "ld1", ".2s", 1, false, 32 }, + { AArch64::LD1Fourv1d_POST, "ld1", ".1d", 1, false, 32 }, + { AArch64::LD2i8, "ld2", ".b", 1, true, 0 }, + { AArch64::LD2i16, "ld2", ".h", 1, true, 0 }, + { AArch64::LD2i32, "ld2", ".s", 1, true, 0 }, + { AArch64::LD2i64, "ld2", ".d", 1, true, 0 }, + { AArch64::LD2i8_POST, "ld2", ".b", 2, true, 2 }, + { AArch64::LD2i16_POST, "ld2", ".h", 2, true, 4 }, + { AArch64::LD2i32_POST, "ld2", ".s", 2, true, 8 }, + { AArch64::LD2i64_POST, "ld2", ".d", 2, true, 16 }, + { AArch64::LD2Rv16b, "ld2r", ".16b", 0, false, 0 }, + { AArch64::LD2Rv8h, "ld2r", ".8h", 0, false, 0 }, + { AArch64::LD2Rv4s, "ld2r", ".4s", 0, false, 0 }, + { AArch64::LD2Rv2d, "ld2r", ".2d", 0, false, 0 }, + { AArch64::LD2Rv8b, "ld2r", ".8b", 0, false, 0 }, + { AArch64::LD2Rv4h, "ld2r", ".4h", 0, false, 0 }, + { AArch64::LD2Rv2s, "ld2r", ".2s", 0, false, 0 }, + { AArch64::LD2Rv1d, "ld2r", ".1d", 0, false, 0 }, + { AArch64::LD2Rv16b_POST, "ld2r", ".16b", 1, false, 2 }, + { AArch64::LD2Rv8h_POST, "ld2r", ".8h", 1, false, 4 }, + { AArch64::LD2Rv4s_POST, "ld2r", ".4s", 1, false, 8 }, + { AArch64::LD2Rv2d_POST, "ld2r", ".2d", 1, false, 16 }, + { AArch64::LD2Rv8b_POST, "ld2r", ".8b", 1, false, 2 }, + { AArch64::LD2Rv4h_POST, "ld2r", ".4h", 1, false, 4 }, + { AArch64::LD2Rv2s_POST, "ld2r", ".2s", 1, false, 8 }, + { AArch64::LD2Rv1d_POST, "ld2r", ".1d", 1, false, 16 }, + { AArch64::LD2Twov16b, "ld2", ".16b", 0, false, 0 }, + { AArch64::LD2Twov8h, "ld2", ".8h", 0, false, 0 }, + { AArch64::LD2Twov4s, "ld2", ".4s", 0, false, 0 }, + { AArch64::LD2Twov2d, "ld2", ".2d", 0, false, 0 }, + { AArch64::LD2Twov8b, "ld2", ".8b", 0, false, 0 }, + { AArch64::LD2Twov4h, "ld2", ".4h", 0, false, 0 }, + { AArch64::LD2Twov2s, "ld2", ".2s", 0, false, 0 }, + { AArch64::LD2Twov16b_POST, "ld2", ".16b", 1, false, 32 }, + { AArch64::LD2Twov8h_POST, "ld2", ".8h", 1, false, 32 }, + { AArch64::LD2Twov4s_POST, "ld2", ".4s", 1, false, 32 }, + { AArch64::LD2Twov2d_POST, "ld2", ".2d", 1, false, 32 }, + { AArch64::LD2Twov8b_POST, "ld2", ".8b", 1, false, 16 }, + { AArch64::LD2Twov4h_POST, "ld2", ".4h", 1, false, 16 }, + { AArch64::LD2Twov2s_POST, "ld2", ".2s", 1, false, 16 }, + { AArch64::LD3i8, "ld3", ".b", 1, true, 0 }, + { AArch64::LD3i16, "ld3", ".h", 1, true, 0 }, + { AArch64::LD3i32, "ld3", ".s", 1, true, 0 }, + { AArch64::LD3i64, "ld3", ".d", 1, true, 0 }, + { AArch64::LD3i8_POST, "ld3", ".b", 2, true, 3 }, + { AArch64::LD3i16_POST, "ld3", ".h", 2, true, 6 }, + { AArch64::LD3i32_POST, "ld3", ".s", 2, true, 12 }, + { AArch64::LD3i64_POST, "ld3", ".d", 2, true, 24 }, + { AArch64::LD3Rv16b, "ld3r", ".16b", 0, false, 0 }, + { AArch64::LD3Rv8h, "ld3r", ".8h", 0, false, 0 }, + { AArch64::LD3Rv4s, "ld3r", ".4s", 0, false, 0 }, + { AArch64::LD3Rv2d, "ld3r", ".2d", 0, false, 0 }, + { AArch64::LD3Rv8b, "ld3r", ".8b", 0, false, 0 }, + { AArch64::LD3Rv4h, "ld3r", ".4h", 0, false, 0 }, + { AArch64::LD3Rv2s, "ld3r", ".2s", 0, false, 0 }, + { AArch64::LD3Rv1d, "ld3r", ".1d", 0, false, 0 }, + { AArch64::LD3Rv16b_POST, "ld3r", ".16b", 1, false, 3 }, + { AArch64::LD3Rv8h_POST, "ld3r", ".8h", 1, false, 6 }, + { AArch64::LD3Rv4s_POST, "ld3r", ".4s", 1, false, 12 }, + { AArch64::LD3Rv2d_POST, "ld3r", ".2d", 1, false, 24 }, + { AArch64::LD3Rv8b_POST, "ld3r", ".8b", 1, false, 3 }, + { AArch64::LD3Rv4h_POST, "ld3r", ".4h", 1, false, 6 }, + { AArch64::LD3Rv2s_POST, "ld3r", ".2s", 1, false, 12 }, + { AArch64::LD3Rv1d_POST, "ld3r", ".1d", 1, false, 24 }, + { AArch64::LD3Threev16b, "ld3", ".16b", 0, false, 0 }, + { AArch64::LD3Threev8h, "ld3", ".8h", 0, false, 0 }, + { AArch64::LD3Threev4s, "ld3", ".4s", 0, false, 0 }, + { AArch64::LD3Threev2d, "ld3", ".2d", 0, false, 0 }, + { AArch64::LD3Threev8b, "ld3", ".8b", 0, false, 0 }, + { AArch64::LD3Threev4h, "ld3", ".4h", 0, false, 0 }, + { AArch64::LD3Threev2s, "ld3", ".2s", 0, false, 0 }, + { AArch64::LD3Threev16b_POST, "ld3", ".16b", 1, false, 48 }, + { AArch64::LD3Threev8h_POST, "ld3", ".8h", 1, false, 48 }, + { AArch64::LD3Threev4s_POST, "ld3", ".4s", 1, false, 48 }, + { AArch64::LD3Threev2d_POST, "ld3", ".2d", 1, false, 48 }, + { AArch64::LD3Threev8b_POST, "ld3", ".8b", 1, false, 24 }, + { AArch64::LD3Threev4h_POST, "ld3", ".4h", 1, false, 24 }, + { AArch64::LD3Threev2s_POST, "ld3", ".2s", 1, false, 24 }, + { AArch64::LD4i8, "ld4", ".b", 1, true, 0 }, + { AArch64::LD4i16, "ld4", ".h", 1, true, 0 }, + { AArch64::LD4i32, "ld4", ".s", 1, true, 0 }, + { AArch64::LD4i64, "ld4", ".d", 1, true, 0 }, + { AArch64::LD4i8_POST, "ld4", ".b", 2, true, 4 }, + { AArch64::LD4i16_POST, "ld4", ".h", 2, true, 8 }, + { AArch64::LD4i32_POST, "ld4", ".s", 2, true, 16 }, + { AArch64::LD4i64_POST, "ld4", ".d", 2, true, 32 }, + { AArch64::LD4Rv16b, "ld4r", ".16b", 0, false, 0 }, + { AArch64::LD4Rv8h, "ld4r", ".8h", 0, false, 0 }, + { AArch64::LD4Rv4s, "ld4r", ".4s", 0, false, 0 }, + { AArch64::LD4Rv2d, "ld4r", ".2d", 0, false, 0 }, + { AArch64::LD4Rv8b, "ld4r", ".8b", 0, false, 0 }, + { AArch64::LD4Rv4h, "ld4r", ".4h", 0, false, 0 }, + { AArch64::LD4Rv2s, "ld4r", ".2s", 0, false, 0 }, + { AArch64::LD4Rv1d, "ld4r", ".1d", 0, false, 0 }, + { AArch64::LD4Rv16b_POST, "ld4r", ".16b", 1, false, 4 }, + { AArch64::LD4Rv8h_POST, "ld4r", ".8h", 1, false, 8 }, + { AArch64::LD4Rv4s_POST, "ld4r", ".4s", 1, false, 16 }, + { AArch64::LD4Rv2d_POST, "ld4r", ".2d", 1, false, 32 }, + { AArch64::LD4Rv8b_POST, "ld4r", ".8b", 1, false, 4 }, + { AArch64::LD4Rv4h_POST, "ld4r", ".4h", 1, false, 8 }, + { AArch64::LD4Rv2s_POST, "ld4r", ".2s", 1, false, 16 }, + { AArch64::LD4Rv1d_POST, "ld4r", ".1d", 1, false, 32 }, + { AArch64::LD4Fourv16b, "ld4", ".16b", 0, false, 0 }, + { AArch64::LD4Fourv8h, "ld4", ".8h", 0, false, 0 }, + { AArch64::LD4Fourv4s, "ld4", ".4s", 0, false, 0 }, + { AArch64::LD4Fourv2d, "ld4", ".2d", 0, false, 0 }, + { AArch64::LD4Fourv8b, "ld4", ".8b", 0, false, 0 }, + { AArch64::LD4Fourv4h, "ld4", ".4h", 0, false, 0 }, + { AArch64::LD4Fourv2s, "ld4", ".2s", 0, false, 0 }, + { AArch64::LD4Fourv16b_POST, "ld4", ".16b", 1, false, 64 }, + { AArch64::LD4Fourv8h_POST, "ld4", ".8h", 1, false, 64 }, + { AArch64::LD4Fourv4s_POST, "ld4", ".4s", 1, false, 64 }, + { AArch64::LD4Fourv2d_POST, "ld4", ".2d", 1, false, 64 }, + { AArch64::LD4Fourv8b_POST, "ld4", ".8b", 1, false, 32 }, + { AArch64::LD4Fourv4h_POST, "ld4", ".4h", 1, false, 32 }, + { AArch64::LD4Fourv2s_POST, "ld4", ".2s", 1, false, 32 }, + { AArch64::ST1i8, "st1", ".b", 0, true, 0 }, + { AArch64::ST1i16, "st1", ".h", 0, true, 0 }, + { AArch64::ST1i32, "st1", ".s", 0, true, 0 }, + { AArch64::ST1i64, "st1", ".d", 0, true, 0 }, + { AArch64::ST1i8_POST, "st1", ".b", 1, true, 1 }, + { AArch64::ST1i16_POST, "st1", ".h", 1, true, 2 }, + { AArch64::ST1i32_POST, "st1", ".s", 1, true, 4 }, + { AArch64::ST1i64_POST, "st1", ".d", 1, true, 8 }, + { AArch64::ST1Onev16b, "st1", ".16b", 0, false, 0 }, + { AArch64::ST1Onev8h, "st1", ".8h", 0, false, 0 }, + { AArch64::ST1Onev4s, "st1", ".4s", 0, false, 0 }, + { AArch64::ST1Onev2d, "st1", ".2d", 0, false, 0 }, + { AArch64::ST1Onev8b, "st1", ".8b", 0, false, 0 }, + { AArch64::ST1Onev4h, "st1", ".4h", 0, false, 0 }, + { AArch64::ST1Onev2s, "st1", ".2s", 0, false, 0 }, + { AArch64::ST1Onev1d, "st1", ".1d", 0, false, 0 }, + { AArch64::ST1Onev16b_POST, "st1", ".16b", 1, false, 16 }, + { AArch64::ST1Onev8h_POST, "st1", ".8h", 1, false, 16 }, + { AArch64::ST1Onev4s_POST, "st1", ".4s", 1, false, 16 }, + { AArch64::ST1Onev2d_POST, "st1", ".2d", 1, false, 16 }, + { AArch64::ST1Onev8b_POST, "st1", ".8b", 1, false, 8 }, + { AArch64::ST1Onev4h_POST, "st1", ".4h", 1, false, 8 }, + { AArch64::ST1Onev2s_POST, "st1", ".2s", 1, false, 8 }, + { AArch64::ST1Onev1d_POST, "st1", ".1d", 1, false, 8 }, + { AArch64::ST1Twov16b, "st1", ".16b", 0, false, 0 }, + { AArch64::ST1Twov8h, "st1", ".8h", 0, false, 0 }, + { AArch64::ST1Twov4s, "st1", ".4s", 0, false, 0 }, + { AArch64::ST1Twov2d, "st1", ".2d", 0, false, 0 }, + { AArch64::ST1Twov8b, "st1", ".8b", 0, false, 0 }, + { AArch64::ST1Twov4h, "st1", ".4h", 0, false, 0 }, + { AArch64::ST1Twov2s, "st1", ".2s", 0, false, 0 }, + { AArch64::ST1Twov1d, "st1", ".1d", 0, false, 0 }, + { AArch64::ST1Twov16b_POST, "st1", ".16b", 1, false, 32 }, + { AArch64::ST1Twov8h_POST, "st1", ".8h", 1, false, 32 }, + { AArch64::ST1Twov4s_POST, "st1", ".4s", 1, false, 32 }, + { AArch64::ST1Twov2d_POST, "st1", ".2d", 1, false, 32 }, + { AArch64::ST1Twov8b_POST, "st1", ".8b", 1, false, 16 }, + { AArch64::ST1Twov4h_POST, "st1", ".4h", 1, false, 16 }, + { AArch64::ST1Twov2s_POST, "st1", ".2s", 1, false, 16 }, + { AArch64::ST1Twov1d_POST, "st1", ".1d", 1, false, 16 }, + { AArch64::ST1Threev16b, "st1", ".16b", 0, false, 0 }, + { AArch64::ST1Threev8h, "st1", ".8h", 0, false, 0 }, + { AArch64::ST1Threev4s, "st1", ".4s", 0, false, 0 }, + { AArch64::ST1Threev2d, "st1", ".2d", 0, false, 0 }, + { AArch64::ST1Threev8b, "st1", ".8b", 0, false, 0 }, + { AArch64::ST1Threev4h, "st1", ".4h", 0, false, 0 }, + { AArch64::ST1Threev2s, "st1", ".2s", 0, false, 0 }, + { AArch64::ST1Threev1d, "st1", ".1d", 0, false, 0 }, + { AArch64::ST1Threev16b_POST, "st1", ".16b", 1, false, 48 }, + { AArch64::ST1Threev8h_POST, "st1", ".8h", 1, false, 48 }, + { AArch64::ST1Threev4s_POST, "st1", ".4s", 1, false, 48 }, + { AArch64::ST1Threev2d_POST, "st1", ".2d", 1, false, 48 }, + { AArch64::ST1Threev8b_POST, "st1", ".8b", 1, false, 24 }, + { AArch64::ST1Threev4h_POST, "st1", ".4h", 1, false, 24 }, + { AArch64::ST1Threev2s_POST, "st1", ".2s", 1, false, 24 }, + { AArch64::ST1Threev1d_POST, "st1", ".1d", 1, false, 24 }, + { AArch64::ST1Fourv16b, "st1", ".16b", 0, false, 0 }, + { AArch64::ST1Fourv8h, "st1", ".8h", 0, false, 0 }, + { AArch64::ST1Fourv4s, "st1", ".4s", 0, false, 0 }, + { AArch64::ST1Fourv2d, "st1", ".2d", 0, false, 0 }, + { AArch64::ST1Fourv8b, "st1", ".8b", 0, false, 0 }, + { AArch64::ST1Fourv4h, "st1", ".4h", 0, false, 0 }, + { AArch64::ST1Fourv2s, "st1", ".2s", 0, false, 0 }, + { AArch64::ST1Fourv1d, "st1", ".1d", 0, false, 0 }, + { AArch64::ST1Fourv16b_POST, "st1", ".16b", 1, false, 64 }, + { AArch64::ST1Fourv8h_POST, "st1", ".8h", 1, false, 64 }, + { AArch64::ST1Fourv4s_POST, "st1", ".4s", 1, false, 64 }, + { AArch64::ST1Fourv2d_POST, "st1", ".2d", 1, false, 64 }, + { AArch64::ST1Fourv8b_POST, "st1", ".8b", 1, false, 32 }, + { AArch64::ST1Fourv4h_POST, "st1", ".4h", 1, false, 32 }, + { AArch64::ST1Fourv2s_POST, "st1", ".2s", 1, false, 32 }, + { AArch64::ST1Fourv1d_POST, "st1", ".1d", 1, false, 32 }, + { AArch64::ST2i8, "st2", ".b", 0, true, 0 }, + { AArch64::ST2i16, "st2", ".h", 0, true, 0 }, + { AArch64::ST2i32, "st2", ".s", 0, true, 0 }, + { AArch64::ST2i64, "st2", ".d", 0, true, 0 }, + { AArch64::ST2i8_POST, "st2", ".b", 1, true, 2 }, + { AArch64::ST2i16_POST, "st2", ".h", 1, true, 4 }, + { AArch64::ST2i32_POST, "st2", ".s", 1, true, 8 }, + { AArch64::ST2i64_POST, "st2", ".d", 1, true, 16 }, + { AArch64::ST2Twov16b, "st2", ".16b", 0, false, 0 }, + { AArch64::ST2Twov8h, "st2", ".8h", 0, false, 0 }, + { AArch64::ST2Twov4s, "st2", ".4s", 0, false, 0 }, + { AArch64::ST2Twov2d, "st2", ".2d", 0, false, 0 }, + { AArch64::ST2Twov8b, "st2", ".8b", 0, false, 0 }, + { AArch64::ST2Twov4h, "st2", ".4h", 0, false, 0 }, + { AArch64::ST2Twov2s, "st2", ".2s", 0, false, 0 }, + { AArch64::ST2Twov16b_POST, "st2", ".16b", 1, false, 32 }, + { AArch64::ST2Twov8h_POST, "st2", ".8h", 1, false, 32 }, + { AArch64::ST2Twov4s_POST, "st2", ".4s", 1, false, 32 }, + { AArch64::ST2Twov2d_POST, "st2", ".2d", 1, false, 32 }, + { AArch64::ST2Twov8b_POST, "st2", ".8b", 1, false, 16 }, + { AArch64::ST2Twov4h_POST, "st2", ".4h", 1, false, 16 }, + { AArch64::ST2Twov2s_POST, "st2", ".2s", 1, false, 16 }, + { AArch64::ST3i8, "st3", ".b", 0, true, 0 }, + { AArch64::ST3i16, "st3", ".h", 0, true, 0 }, + { AArch64::ST3i32, "st3", ".s", 0, true, 0 }, + { AArch64::ST3i64, "st3", ".d", 0, true, 0 }, + { AArch64::ST3i8_POST, "st3", ".b", 1, true, 3 }, + { AArch64::ST3i16_POST, "st3", ".h", 1, true, 6 }, + { AArch64::ST3i32_POST, "st3", ".s", 1, true, 12 }, + { AArch64::ST3i64_POST, "st3", ".d", 1, true, 24 }, + { AArch64::ST3Threev16b, "st3", ".16b", 0, false, 0 }, + { AArch64::ST3Threev8h, "st3", ".8h", 0, false, 0 }, + { AArch64::ST3Threev4s, "st3", ".4s", 0, false, 0 }, + { AArch64::ST3Threev2d, "st3", ".2d", 0, false, 0 }, + { AArch64::ST3Threev8b, "st3", ".8b", 0, false, 0 }, + { AArch64::ST3Threev4h, "st3", ".4h", 0, false, 0 }, + { AArch64::ST3Threev2s, "st3", ".2s", 0, false, 0 }, + { AArch64::ST3Threev16b_POST, "st3", ".16b", 1, false, 48 }, + { AArch64::ST3Threev8h_POST, "st3", ".8h", 1, false, 48 }, + { AArch64::ST3Threev4s_POST, "st3", ".4s", 1, false, 48 }, + { AArch64::ST3Threev2d_POST, "st3", ".2d", 1, false, 48 }, + { AArch64::ST3Threev8b_POST, "st3", ".8b", 1, false, 24 }, + { AArch64::ST3Threev4h_POST, "st3", ".4h", 1, false, 24 }, + { AArch64::ST3Threev2s_POST, "st3", ".2s", 1, false, 24 }, + { AArch64::ST4i8, "st4", ".b", 0, true, 0 }, + { AArch64::ST4i16, "st4", ".h", 0, true, 0 }, + { AArch64::ST4i32, "st4", ".s", 0, true, 0 }, + { AArch64::ST4i64, "st4", ".d", 0, true, 0 }, + { AArch64::ST4i8_POST, "st4", ".b", 1, true, 4 }, + { AArch64::ST4i16_POST, "st4", ".h", 1, true, 8 }, + { AArch64::ST4i32_POST, "st4", ".s", 1, true, 16 }, + { AArch64::ST4i64_POST, "st4", ".d", 1, true, 32 }, + { AArch64::ST4Fourv16b, "st4", ".16b", 0, false, 0 }, + { AArch64::ST4Fourv8h, "st4", ".8h", 0, false, 0 }, + { AArch64::ST4Fourv4s, "st4", ".4s", 0, false, 0 }, + { AArch64::ST4Fourv2d, "st4", ".2d", 0, false, 0 }, + { AArch64::ST4Fourv8b, "st4", ".8b", 0, false, 0 }, + { AArch64::ST4Fourv4h, "st4", ".4h", 0, false, 0 }, + { AArch64::ST4Fourv2s, "st4", ".2s", 0, false, 0 }, + { AArch64::ST4Fourv16b_POST, "st4", ".16b", 1, false, 64 }, + { AArch64::ST4Fourv8h_POST, "st4", ".8h", 1, false, 64 }, + { AArch64::ST4Fourv4s_POST, "st4", ".4s", 1, false, 64 }, + { AArch64::ST4Fourv2d_POST, "st4", ".2d", 1, false, 64 }, + { AArch64::ST4Fourv8b_POST, "st4", ".8b", 1, false, 32 }, + { AArch64::ST4Fourv4h_POST, "st4", ".4h", 1, false, 32 }, + { AArch64::ST4Fourv2s_POST, "st4", ".2s", 1, false, 32 }, +}; + +static LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) { + unsigned Idx; + for (Idx = 0; Idx != array_lengthof(LdStNInstInfo); ++Idx) + if (LdStNInstInfo[Idx].Opcode == Opcode) + return &LdStNInstInfo[Idx]; + + return nullptr; +} + +void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { + unsigned Opcode = MI->getOpcode(); + StringRef Layout, Mnemonic; + + bool IsTbx; + if (isTblTbxInstruction(MI->getOpcode(), Layout, IsTbx)) { + O << "\t" << (IsTbx ? "tbx" : "tbl") << Layout << '\t' + << getRegisterName(MI->getOperand(0).getReg(), AArch64::vreg) << ", "; + + unsigned ListOpNum = IsTbx ? 2 : 1; + printVectorList(MI, ListOpNum, O, ""); + + O << ", " + << getRegisterName(MI->getOperand(ListOpNum + 1).getReg(), AArch64::vreg); + printAnnotation(O, Annot); + return; + } + + if (LdStNInstrDesc *LdStDesc = getLdStNInstrDesc(Opcode)) { + O << "\t" << LdStDesc->Mnemonic << LdStDesc->Layout << '\t'; + + // Now onto the operands: first a vector list with possible lane + // specifier. E.g. { v0 }[2] + int OpNum = LdStDesc->ListOperand; + printVectorList(MI, OpNum++, O, ""); + + if (LdStDesc->HasLane) + O << '[' << MI->getOperand(OpNum++).getImm() << ']'; + + // Next the address: [xN] + unsigned AddrReg = MI->getOperand(OpNum++).getReg(); + O << ", [" << getRegisterName(AddrReg) << ']'; + + // Finally, there might be a post-indexed offset. + if (LdStDesc->NaturalOffset != 0) { + unsigned Reg = MI->getOperand(OpNum++).getReg(); + if (Reg != AArch64::XZR) + O << ", " << getRegisterName(Reg); + else { + assert(LdStDesc->NaturalOffset && "no offset on post-inc instruction?"); + O << ", #" << LdStDesc->NaturalOffset; + } + } + + printAnnotation(O, Annot); + return; + } + + AArch64InstPrinter::printInst(MI, O, Annot); +} + +bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) { +#ifndef NDEBUG + unsigned Opcode = MI->getOpcode(); + assert(Opcode == AArch64::SYSxt && "Invalid opcode for SYS alias!"); +#endif + + const char *Asm = nullptr; + const MCOperand &Op1 = MI->getOperand(0); + const MCOperand &Cn = MI->getOperand(1); + const MCOperand &Cm = MI->getOperand(2); + const MCOperand &Op2 = MI->getOperand(3); + + unsigned Op1Val = Op1.getImm(); + unsigned CnVal = Cn.getImm(); + unsigned CmVal = Cm.getImm(); + unsigned Op2Val = Op2.getImm(); + + if (CnVal == 7) { + switch (CmVal) { + default: + break; + + // IC aliases + case 1: + if (Op1Val == 0 && Op2Val == 0) + Asm = "ic\tialluis"; + break; + case 5: + if (Op1Val == 0 && Op2Val == 0) + Asm = "ic\tiallu"; + else if (Op1Val == 3 && Op2Val == 1) + Asm = "ic\tivau"; + break; + + // DC aliases + case 4: + if (Op1Val == 3 && Op2Val == 1) + Asm = "dc\tzva"; + break; + case 6: + if (Op1Val == 0 && Op2Val == 1) + Asm = "dc\tivac"; + if (Op1Val == 0 && Op2Val == 2) + Asm = "dc\tisw"; + break; + case 10: + if (Op1Val == 3 && Op2Val == 1) + Asm = "dc\tcvac"; + else if (Op1Val == 0 && Op2Val == 2) + Asm = "dc\tcsw"; + break; + case 11: + if (Op1Val == 3 && Op2Val == 1) + Asm = "dc\tcvau"; + break; + case 14: + if (Op1Val == 3 && Op2Val == 1) + Asm = "dc\tcivac"; + else if (Op1Val == 0 && Op2Val == 2) + Asm = "dc\tcisw"; + break; + + // AT aliases + case 8: + switch (Op1Val) { + default: + break; + case 0: + switch (Op2Val) { + default: + break; + case 0: Asm = "at\ts1e1r"; break; + case 1: Asm = "at\ts1e1w"; break; + case 2: Asm = "at\ts1e0r"; break; + case 3: Asm = "at\ts1e0w"; break; + } + break; + case 4: + switch (Op2Val) { + default: + break; + case 0: Asm = "at\ts1e2r"; break; + case 1: Asm = "at\ts1e2w"; break; + case 4: Asm = "at\ts12e1r"; break; + case 5: Asm = "at\ts12e1w"; break; + case 6: Asm = "at\ts12e0r"; break; + case 7: Asm = "at\ts12e0w"; break; + } + break; + case 6: + switch (Op2Val) { + default: + break; + case 0: Asm = "at\ts1e3r"; break; + case 1: Asm = "at\ts1e3w"; break; + } + break; + } + break; + } + } else if (CnVal == 8) { + // TLBI aliases + switch (CmVal) { + default: + break; + case 3: + switch (Op1Val) { + default: + break; + case 0: + switch (Op2Val) { + default: + break; + case 0: Asm = "tlbi\tvmalle1is"; break; + case 1: Asm = "tlbi\tvae1is"; break; + case 2: Asm = "tlbi\taside1is"; break; + case 3: Asm = "tlbi\tvaae1is"; break; + case 5: Asm = "tlbi\tvale1is"; break; + case 7: Asm = "tlbi\tvaale1is"; break; + } + break; + case 4: + switch (Op2Val) { + default: + break; + case 0: Asm = "tlbi\talle2is"; break; + case 1: Asm = "tlbi\tvae2is"; break; + case 4: Asm = "tlbi\talle1is"; break; + case 5: Asm = "tlbi\tvale2is"; break; + case 6: Asm = "tlbi\tvmalls12e1is"; break; + } + break; + case 6: + switch (Op2Val) { + default: + break; + case 0: Asm = "tlbi\talle3is"; break; + case 1: Asm = "tlbi\tvae3is"; break; + case 5: Asm = "tlbi\tvale3is"; break; + } + break; + } + break; + case 0: + switch (Op1Val) { + default: + break; + case 4: + switch (Op2Val) { + default: + break; + case 1: Asm = "tlbi\tipas2e1is"; break; + case 5: Asm = "tlbi\tipas2le1is"; break; + } + break; + } + break; + case 4: + switch (Op1Val) { + default: + break; + case 4: + switch (Op2Val) { + default: + break; + case 1: Asm = "tlbi\tipas2e1"; break; + case 5: Asm = "tlbi\tipas2le1"; break; + } + break; + } + break; + case 7: + switch (Op1Val) { + default: + break; + case 0: + switch (Op2Val) { + default: + break; + case 0: Asm = "tlbi\tvmalle1"; break; + case 1: Asm = "tlbi\tvae1"; break; + case 2: Asm = "tlbi\taside1"; break; + case 3: Asm = "tlbi\tvaae1"; break; + case 5: Asm = "tlbi\tvale1"; break; + case 7: Asm = "tlbi\tvaale1"; break; + } + break; + case 4: + switch (Op2Val) { + default: + break; + case 0: Asm = "tlbi\talle2"; break; + case 1: Asm = "tlbi\tvae2"; break; + case 4: Asm = "tlbi\talle1"; break; + case 5: Asm = "tlbi\tvale2"; break; + case 6: Asm = "tlbi\tvmalls12e1"; break; + } + break; + case 6: + switch (Op2Val) { + default: + break; + case 0: Asm = "tlbi\talle3"; break; + case 1: Asm = "tlbi\tvae3"; break; + case 5: Asm = "tlbi\tvale3"; break; + } + break; + } + break; + } + } + + if (Asm) { + unsigned Reg = MI->getOperand(4).getReg(); + + O << '\t' << Asm; + if (StringRef(Asm).lower().find("all") == StringRef::npos) + O << ", " << getRegisterName(Reg); + } + + return Asm != nullptr; +} + +void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + unsigned Reg = Op.getReg(); + O << getRegisterName(Reg); + } else if (Op.isImm()) { + O << '#' << Op.getImm(); + } else { + assert(Op.isExpr() && "unknown operand kind in printOperand"); + O << *Op.getExpr(); + } +} + +void AArch64InstPrinter::printHexImm(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + O << format("#%#llx", Op.getImm()); +} + +void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo, + unsigned Imm, raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + unsigned Reg = Op.getReg(); + if (Reg == AArch64::XZR) + O << "#" << Imm; + else + O << getRegisterName(Reg); + } else + assert(0 && "unknown operand kind in printPostIncOperand64"); +} + +void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + assert(Op.isReg() && "Non-register vreg operand!"); + unsigned Reg = Op.getReg(); + O << getRegisterName(Reg, AArch64::vreg); +} + +void AArch64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + assert(Op.isImm() && "System instruction C[nm] operands must be immediates!"); + O << "c" << Op.getImm(); +} + +void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + if (MO.isImm()) { + unsigned Val = (MO.getImm() & 0xfff); + assert(Val == MO.getImm() && "Add/sub immediate out of range!"); + unsigned Shift = + AArch64_AM::getShiftValue(MI->getOperand(OpNum + 1).getImm()); + O << '#' << Val; + if (Shift != 0) + printShifter(MI, OpNum + 1, O); + + if (CommentStream) + *CommentStream << '=' << (Val << Shift) << '\n'; + } else { + assert(MO.isExpr() && "Unexpected operand type!"); + O << *MO.getExpr(); + printShifter(MI, OpNum + 1, O); + } +} + +void AArch64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + uint64_t Val = MI->getOperand(OpNum).getImm(); + O << "#0x"; + O.write_hex(AArch64_AM::decodeLogicalImmediate(Val, 32)); +} + +void AArch64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + uint64_t Val = MI->getOperand(OpNum).getImm(); + O << "#0x"; + O.write_hex(AArch64_AM::decodeLogicalImmediate(Val, 64)); +} + +void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned Val = MI->getOperand(OpNum).getImm(); + // LSL #0 should not be printed. + if (AArch64_AM::getShiftType(Val) == AArch64_AM::LSL && + AArch64_AM::getShiftValue(Val) == 0) + return; + O << ", " << AArch64_AM::getShiftExtendName(AArch64_AM::getShiftType(Val)) + << " #" << AArch64_AM::getShiftValue(Val); +} + +void AArch64InstPrinter::printShiftedRegister(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << getRegisterName(MI->getOperand(OpNum).getReg()); + printShifter(MI, OpNum + 1, O); +} + +void AArch64InstPrinter::printExtendedRegister(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << getRegisterName(MI->getOperand(OpNum).getReg()); + printArithExtend(MI, OpNum + 1, O); +} + +void AArch64InstPrinter::printArithExtend(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned Val = MI->getOperand(OpNum).getImm(); + AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getArithExtendType(Val); + unsigned ShiftVal = AArch64_AM::getArithShiftValue(Val); + + // If the destination or first source register operand is [W]SP, print + // UXTW/UXTX as LSL, and if the shift amount is also zero, print nothing at + // all. + if (ExtType == AArch64_AM::UXTW || ExtType == AArch64_AM::UXTX) { + unsigned Dest = MI->getOperand(0).getReg(); + unsigned Src1 = MI->getOperand(1).getReg(); + if ( ((Dest == AArch64::SP || Src1 == AArch64::SP) && + ExtType == AArch64_AM::UXTX) || + ((Dest == AArch64::WSP || Src1 == AArch64::WSP) && + ExtType == AArch64_AM::UXTW) ) { + if (ShiftVal != 0) + O << ", lsl #" << ShiftVal; + return; + } + } + O << ", " << AArch64_AM::getShiftExtendName(ExtType); + if (ShiftVal != 0) + O << " #" << ShiftVal; +} + +void AArch64InstPrinter::printMemExtend(const MCInst *MI, unsigned OpNum, + raw_ostream &O, char SrcRegKind, + unsigned Width) { + unsigned SignExtend = MI->getOperand(OpNum).getImm(); + unsigned DoShift = MI->getOperand(OpNum + 1).getImm(); + + // sxtw, sxtx, uxtw or lsl (== uxtx) + bool IsLSL = !SignExtend && SrcRegKind == 'x'; + if (IsLSL) + O << "lsl"; + else + O << (SignExtend ? 's' : 'u') << "xt" << SrcRegKind; + + if (DoShift || IsLSL) + O << " #" << Log2_32(Width / 8); +} + +void AArch64InstPrinter::printCondCode(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm(); + O << AArch64CC::getCondCodeName(CC); +} + +void AArch64InstPrinter::printInverseCondCode(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm(); + O << AArch64CC::getCondCodeName(AArch64CC::getInvertedCondCode(CC)); +} + +void AArch64InstPrinter::printAMNoIndex(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']'; +} + +template +void AArch64InstPrinter::printImmScale(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << '#' << Scale * MI->getOperand(OpNum).getImm(); +} + +void AArch64InstPrinter::printUImm12Offset(const MCInst *MI, unsigned OpNum, + unsigned Scale, raw_ostream &O) { + const MCOperand MO = MI->getOperand(OpNum); + if (MO.isImm()) { + O << "#" << (MO.getImm() * Scale); + } else { + assert(MO.isExpr() && "Unexpected operand type!"); + O << *MO.getExpr(); + } +} + +void AArch64InstPrinter::printAMIndexedWB(const MCInst *MI, unsigned OpNum, + unsigned Scale, raw_ostream &O) { + const MCOperand MO1 = MI->getOperand(OpNum + 1); + O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()); + if (MO1.isImm()) { + O << ", #" << (MO1.getImm() * Scale); + } else { + assert(MO1.isExpr() && "Unexpected operand type!"); + O << ", " << *MO1.getExpr(); + } + O << ']'; +} + +void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned prfop = MI->getOperand(OpNum).getImm(); + bool Valid; + StringRef Name = AArch64PRFM::PRFMMapper().toString(prfop, Valid); + if (Valid) + O << Name; + else + O << '#' << prfop; +} + +void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + float FPImm = + MO.isFPImm() ? MO.getFPImm() : AArch64_AM::getFPImmFloat(MO.getImm()); + + // 8 decimal places are enough to perfectly represent permitted floats. + O << format("#%.8f", FPImm); +} + +static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) { + while (Stride--) { + switch (Reg) { + default: + assert(0 && "Vector register expected!"); + case AArch64::Q0: Reg = AArch64::Q1; break; + case AArch64::Q1: Reg = AArch64::Q2; break; + case AArch64::Q2: Reg = AArch64::Q3; break; + case AArch64::Q3: Reg = AArch64::Q4; break; + case AArch64::Q4: Reg = AArch64::Q5; break; + case AArch64::Q5: Reg = AArch64::Q6; break; + case AArch64::Q6: Reg = AArch64::Q7; break; + case AArch64::Q7: Reg = AArch64::Q8; break; + case AArch64::Q8: Reg = AArch64::Q9; break; + case AArch64::Q9: Reg = AArch64::Q10; break; + case AArch64::Q10: Reg = AArch64::Q11; break; + case AArch64::Q11: Reg = AArch64::Q12; break; + case AArch64::Q12: Reg = AArch64::Q13; break; + case AArch64::Q13: Reg = AArch64::Q14; break; + case AArch64::Q14: Reg = AArch64::Q15; break; + case AArch64::Q15: Reg = AArch64::Q16; break; + case AArch64::Q16: Reg = AArch64::Q17; break; + case AArch64::Q17: Reg = AArch64::Q18; break; + case AArch64::Q18: Reg = AArch64::Q19; break; + case AArch64::Q19: Reg = AArch64::Q20; break; + case AArch64::Q20: Reg = AArch64::Q21; break; + case AArch64::Q21: Reg = AArch64::Q22; break; + case AArch64::Q22: Reg = AArch64::Q23; break; + case AArch64::Q23: Reg = AArch64::Q24; break; + case AArch64::Q24: Reg = AArch64::Q25; break; + case AArch64::Q25: Reg = AArch64::Q26; break; + case AArch64::Q26: Reg = AArch64::Q27; break; + case AArch64::Q27: Reg = AArch64::Q28; break; + case AArch64::Q28: Reg = AArch64::Q29; break; + case AArch64::Q29: Reg = AArch64::Q30; break; + case AArch64::Q30: Reg = AArch64::Q31; break; + // Vector lists can wrap around. + case AArch64::Q31: + Reg = AArch64::Q0; + break; + } + } + return Reg; +} + +void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum, + raw_ostream &O, + StringRef LayoutSuffix) { + unsigned Reg = MI->getOperand(OpNum).getReg(); + + O << "{ "; + + // Work out how many registers there are in the list (if there is an actual + // list). + unsigned NumRegs = 1; + if (MRI.getRegClass(AArch64::DDRegClassID).contains(Reg) || + MRI.getRegClass(AArch64::QQRegClassID).contains(Reg)) + NumRegs = 2; + else if (MRI.getRegClass(AArch64::DDDRegClassID).contains(Reg) || + MRI.getRegClass(AArch64::QQQRegClassID).contains(Reg)) + NumRegs = 3; + else if (MRI.getRegClass(AArch64::DDDDRegClassID).contains(Reg) || + MRI.getRegClass(AArch64::QQQQRegClassID).contains(Reg)) + NumRegs = 4; + + // Now forget about the list and find out what the first register is. + if (unsigned FirstReg = MRI.getSubReg(Reg, AArch64::dsub0)) + Reg = FirstReg; + else if (unsigned FirstReg = MRI.getSubReg(Reg, AArch64::qsub0)) + Reg = FirstReg; + + // If it's a D-reg, we need to promote it to the equivalent Q-reg before + // printing (otherwise getRegisterName fails). + if (MRI.getRegClass(AArch64::FPR64RegClassID).contains(Reg)) { + const MCRegisterClass &FPR128RC = + MRI.getRegClass(AArch64::FPR128RegClassID); + Reg = MRI.getMatchingSuperReg(Reg, AArch64::dsub, &FPR128RC); + } + + for (unsigned i = 0; i < NumRegs; ++i, Reg = getNextVectorRegister(Reg)) { + O << getRegisterName(Reg, AArch64::vreg) << LayoutSuffix; + if (i + 1 != NumRegs) + O << ", "; + } + + O << " }"; +} + +void AArch64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + printVectorList(MI, OpNum, O, ""); +} + +template +void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + std::string Suffix("."); + if (NumLanes) + Suffix += itostr(NumLanes) + LaneKind; + else + Suffix += LaneKind; + + printVectorList(MI, OpNum, O, Suffix); +} + +void AArch64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "[" << MI->getOperand(OpNum).getImm() << "]"; +} + +void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNum); + + // If the label has already been resolved to an immediate offset (say, when + // we're running the disassembler), just print the immediate. + if (Op.isImm()) { + O << "#" << (Op.getImm() << 2); + return; + } + + // If the branch target is simply an address then print it in hex. + const MCConstantExpr *BranchTarget = + dyn_cast(MI->getOperand(OpNum).getExpr()); + int64_t Address; + if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { + O << "0x"; + O.write_hex(Address); + } else { + // Otherwise, just print the expression. + O << *MI->getOperand(OpNum).getExpr(); + } +} + +void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNum); + + // If the label has already been resolved to an immediate offset (say, when + // we're running the disassembler), just print the immediate. + if (Op.isImm()) { + O << "#" << (Op.getImm() << 12); + return; + } + + // Otherwise, just print the expression. + O << *MI->getOperand(OpNum).getExpr(); +} + +void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned Val = MI->getOperand(OpNo).getImm(); + unsigned Opcode = MI->getOpcode(); + + bool Valid; + StringRef Name; + if (Opcode == AArch64::ISB) + Name = AArch64ISB::ISBMapper().toString(Val, Valid); + else + Name = AArch64DB::DBarrierMapper().toString(Val, Valid); + if (Valid) + O << Name; + else + O << "#" << Val; +} + +void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned Val = MI->getOperand(OpNo).getImm(); + + bool Valid; + auto Mapper = AArch64SysReg::MRSMapper(getAvailableFeatures()); + std::string Name = Mapper.toString(Val, Valid); + + if (Valid) + O << StringRef(Name).upper(); +} + +void AArch64InstPrinter::printMSRSystemRegister(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned Val = MI->getOperand(OpNo).getImm(); + + bool Valid; + auto Mapper = AArch64SysReg::MSRMapper(getAvailableFeatures()); + std::string Name = Mapper.toString(Val, Valid); + + if (Valid) + O << StringRef(Name).upper(); +} + +void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned Val = MI->getOperand(OpNo).getImm(); + + bool Valid; + StringRef Name = AArch64PState::PStateMapper().toString(Val, Valid); + if (Valid) + O << StringRef(Name.str()).upper(); + else + O << "#" << Val; +} + +void AArch64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned RawVal = MI->getOperand(OpNo).getImm(); + uint64_t Val = AArch64_AM::decodeAdvSIMDModImmType10(RawVal); + O << format("#%#016llx", Val); +} diff --git a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h similarity index 88% rename from lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h rename to lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h index 0fd6f1007121..fe7666e5cadb 100644 --- a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -1,4 +1,4 @@ -//===-- ARM64InstPrinter.h - Convert ARM64 MCInst to assembly syntax ------===// +//===-- AArch64InstPrinter.h - Convert AArch64 MCInst to assembly syntax --===// // // The LLVM Compiler Infrastructure // @@ -7,14 +7,14 @@ // //===----------------------------------------------------------------------===// // -// This class prints an ARM64 MCInst to a .s file. +// This class prints an AArch64 MCInst to a .s file. // //===----------------------------------------------------------------------===// -#ifndef ARM64INSTPRINTER_H -#define ARM64INSTPRINTER_H +#ifndef AArch64INSTPRINTER_H +#define AArch64INSTPRINTER_H -#include "MCTargetDesc/ARM64MCTargetDesc.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -23,10 +23,10 @@ namespace llvm { class MCOperand; -class ARM64InstPrinter : public MCInstPrinter { +class AArch64InstPrinter : public MCInstPrinter { public: - ARM64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); + AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; void printRegName(raw_ostream &OS, unsigned RegNo) const override; @@ -40,7 +40,7 @@ class ARM64InstPrinter : public MCInstPrinter { return getRegisterName(RegNo); } static const char *getRegisterName(unsigned RegNo, - unsigned AltIdx = ARM64::NoRegAltName); + unsigned AltIdx = AArch64::NoRegAltName); protected: bool printSysAlias(const MCInst *MI, raw_ostream &O); @@ -118,9 +118,9 @@ class ARM64InstPrinter : public MCInstPrinter { void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); }; -class ARM64AppleInstPrinter : public ARM64InstPrinter { +class AArch64AppleInstPrinter : public AArch64InstPrinter { public: - ARM64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + AArch64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; @@ -133,7 +133,7 @@ class ARM64AppleInstPrinter : public ARM64InstPrinter { return getRegisterName(RegNo); } static const char *getRegisterName(unsigned RegNo, - unsigned AltIdx = ARM64::NoRegAltName); + unsigned AltIdx = AArch64::NoRegAltName); }; } diff --git a/lib/Target/AArch64/InstPrinter/CMakeLists.txt b/lib/Target/AArch64/InstPrinter/CMakeLists.txt new file mode 100644 index 000000000000..363f50258d77 --- /dev/null +++ b/lib/Target/AArch64/InstPrinter/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMAArch64AsmPrinter + AArch64InstPrinter.cpp + ) + +add_dependencies(LLVMAArch64AsmPrinter AArch64CommonTableGen) diff --git a/lib/Target/ARM64/InstPrinter/LLVMBuild.txt b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt similarity index 73% rename from lib/Target/ARM64/InstPrinter/LLVMBuild.txt rename to lib/Target/AArch64/InstPrinter/LLVMBuild.txt index 7ab439249210..a13e842cdd3b 100644 --- a/lib/Target/ARM64/InstPrinter/LLVMBuild.txt +++ b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/ARM64/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===; +;===- ./lib/Target/AArch64/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -17,8 +17,8 @@ [component_0] type = Library -name = ARM64AsmPrinter -parent = ARM64 -required_libraries = ARM64Utils MC Support -add_to_library_groups = ARM64 +name = AArch64AsmPrinter +parent = AArch64 +required_libraries = AArch64Utils MC Support +add_to_library_groups = AArch64 diff --git a/lib/Target/ARM64/InstPrinter/Makefile b/lib/Target/AArch64/InstPrinter/Makefile similarity index 82% rename from lib/Target/ARM64/InstPrinter/Makefile rename to lib/Target/AArch64/InstPrinter/Makefile index a59efb08465f..b17e8d080119 100644 --- a/lib/Target/ARM64/InstPrinter/Makefile +++ b/lib/Target/AArch64/InstPrinter/Makefile @@ -1,4 +1,4 @@ -##===- lib/Target/ARM64/AsmPrinter/Makefile ----------------*- Makefile -*-===## +##===- lib/Target/AArch64/AsmPrinter/Makefile --------------*- Makefile -*-===## # # The LLVM Compiler Infrastructure # @@ -7,7 +7,7 @@ # ##===----------------------------------------------------------------------===## LEVEL = ../../../.. -LIBRARYNAME = LLVMARM64AsmPrinter +LIBRARYNAME = LLVMAArch64AsmPrinter # Hack: we need to include 'main' arm target directory to grab private headers CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/ARM64/LLVMBuild.txt b/lib/Target/AArch64/LLVMBuild.txt similarity index 70% rename from lib/Target/ARM64/LLVMBuild.txt rename to lib/Target/AArch64/LLVMBuild.txt index 3d1e56e7ca65..642c18394a67 100644 --- a/lib/Target/ARM64/LLVMBuild.txt +++ b/lib/Target/AArch64/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/ARM64/LLVMBuild.txt -------------------------*- Conf -*--===; +;===- ./lib/Target/AArch64/LLVMBuild.txt -------------------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -20,7 +20,7 @@ subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo Util [component_0] type = TargetGroup -name = ARM64 +name = AArch64 parent = Target has_asmparser = 1 has_asmprinter = 1 @@ -29,7 +29,7 @@ has_jit = 1 [component_1] type = Library -name = ARM64CodeGen -parent = ARM64 -required_libraries = ARM64AsmPrinter ARM64Desc ARM64Info ARM64Utils Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target -add_to_library_groups = ARM64 +name = AArch64CodeGen +parent = AArch64 +required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info AArch64Utils Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target +add_to_library_groups = AArch64 diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64AddressingModes.h b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h similarity index 89% rename from lib/Target/ARM64/MCTargetDesc/ARM64AddressingModes.h rename to lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h index 53bd3545a594..8b1e44e26e93 100644 --- a/lib/Target/ARM64/MCTargetDesc/ARM64AddressingModes.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h @@ -1,4 +1,4 @@ -//===- ARM64AddressingModes.h - ARM64 Addressing Modes ----------*- C++ -*-===// +//===- AArch64AddressingModes.h - AArch64 Addressing Modes ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,12 +7,12 @@ // //===----------------------------------------------------------------------===// // -// This file contains the ARM64 addressing mode implementation stuff. +// This file contains the AArch64 addressing mode implementation stuff. // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_ARM64_ARM64ADDRESSINGMODES_H -#define LLVM_TARGET_ARM64_ARM64ADDRESSINGMODES_H +#ifndef LLVM_TARGET_AArch64_AArch64ADDRESSINGMODES_H +#define LLVM_TARGET_AArch64_AArch64ADDRESSINGMODES_H #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" @@ -22,8 +22,8 @@ namespace llvm { -/// ARM64_AM - ARM64 Addressing Mode Stuff -namespace ARM64_AM { +/// AArch64_AM - AArch64 Addressing Mode Stuff +namespace AArch64_AM { //===----------------------------------------------------------------------===// // Shifts @@ -49,35 +49,35 @@ enum ShiftExtendType { }; /// getShiftName - Get the string encoding for the shift type. -static inline const char *getShiftExtendName(ARM64_AM::ShiftExtendType ST) { +static inline const char *getShiftExtendName(AArch64_AM::ShiftExtendType ST) { switch (ST) { default: assert(false && "unhandled shift type!"); - case ARM64_AM::LSL: return "lsl"; - case ARM64_AM::LSR: return "lsr"; - case ARM64_AM::ASR: return "asr"; - case ARM64_AM::ROR: return "ror"; - case ARM64_AM::MSL: return "msl"; - case ARM64_AM::UXTB: return "uxtb"; - case ARM64_AM::UXTH: return "uxth"; - case ARM64_AM::UXTW: return "uxtw"; - case ARM64_AM::UXTX: return "uxtx"; - case ARM64_AM::SXTB: return "sxtb"; - case ARM64_AM::SXTH: return "sxth"; - case ARM64_AM::SXTW: return "sxtw"; - case ARM64_AM::SXTX: return "sxtx"; + case AArch64_AM::LSL: return "lsl"; + case AArch64_AM::LSR: return "lsr"; + case AArch64_AM::ASR: return "asr"; + case AArch64_AM::ROR: return "ror"; + case AArch64_AM::MSL: return "msl"; + case AArch64_AM::UXTB: return "uxtb"; + case AArch64_AM::UXTH: return "uxth"; + case AArch64_AM::UXTW: return "uxtw"; + case AArch64_AM::UXTX: return "uxtx"; + case AArch64_AM::SXTB: return "sxtb"; + case AArch64_AM::SXTH: return "sxth"; + case AArch64_AM::SXTW: return "sxtw"; + case AArch64_AM::SXTX: return "sxtx"; } return nullptr; } /// getShiftType - Extract the shift type. -static inline ARM64_AM::ShiftExtendType getShiftType(unsigned Imm) { +static inline AArch64_AM::ShiftExtendType getShiftType(unsigned Imm) { switch ((Imm >> 6) & 0x7) { - default: return ARM64_AM::InvalidShiftExtend; - case 0: return ARM64_AM::LSL; - case 1: return ARM64_AM::LSR; - case 2: return ARM64_AM::ASR; - case 3: return ARM64_AM::ROR; - case 4: return ARM64_AM::MSL; + default: return AArch64_AM::InvalidShiftExtend; + case 0: return AArch64_AM::LSL; + case 1: return AArch64_AM::LSR; + case 2: return AArch64_AM::ASR; + case 3: return AArch64_AM::ROR; + case 4: return AArch64_AM::MSL; } } @@ -95,17 +95,17 @@ static inline unsigned getShiftValue(unsigned Imm) { /// 100 ==> msl /// {8-6} = shifter /// {5-0} = imm -static inline unsigned getShifterImm(ARM64_AM::ShiftExtendType ST, +static inline unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm) { assert((Imm & 0x3f) == Imm && "Illegal shifted immedate value!"); unsigned STEnc = 0; switch (ST) { default: llvm_unreachable("Invalid shift requested"); - case ARM64_AM::LSL: STEnc = 0; break; - case ARM64_AM::LSR: STEnc = 1; break; - case ARM64_AM::ASR: STEnc = 2; break; - case ARM64_AM::ROR: STEnc = 3; break; - case ARM64_AM::MSL: STEnc = 4; break; + case AArch64_AM::LSL: STEnc = 0; break; + case AArch64_AM::LSR: STEnc = 1; break; + case AArch64_AM::ASR: STEnc = 2; break; + case AArch64_AM::ROR: STEnc = 3; break; + case AArch64_AM::MSL: STEnc = 4; break; } return (STEnc << 6) | (Imm & 0x3f); } @@ -120,22 +120,22 @@ static inline unsigned getArithShiftValue(unsigned Imm) { } /// getExtendType - Extract the extend type for operands of arithmetic ops. -static inline ARM64_AM::ShiftExtendType getExtendType(unsigned Imm) { +static inline AArch64_AM::ShiftExtendType getExtendType(unsigned Imm) { assert((Imm & 0x7) == Imm && "invalid immediate!"); switch (Imm) { default: llvm_unreachable("Compiler bug!"); - case 0: return ARM64_AM::UXTB; - case 1: return ARM64_AM::UXTH; - case 2: return ARM64_AM::UXTW; - case 3: return ARM64_AM::UXTX; - case 4: return ARM64_AM::SXTB; - case 5: return ARM64_AM::SXTH; - case 6: return ARM64_AM::SXTW; - case 7: return ARM64_AM::SXTX; + case 0: return AArch64_AM::UXTB; + case 1: return AArch64_AM::UXTH; + case 2: return AArch64_AM::UXTW; + case 3: return AArch64_AM::UXTX; + case 4: return AArch64_AM::SXTB; + case 5: return AArch64_AM::SXTH; + case 6: return AArch64_AM::SXTW; + case 7: return AArch64_AM::SXTX; } } -static inline ARM64_AM::ShiftExtendType getArithExtendType(unsigned Imm) { +static inline AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm) { return getExtendType((Imm >> 3) & 0x7); } @@ -148,17 +148,17 @@ static inline ARM64_AM::ShiftExtendType getArithExtendType(unsigned Imm) { /// 101 ==> sxth /// 110 ==> sxtw /// 111 ==> sxtx -inline unsigned getExtendEncoding(ARM64_AM::ShiftExtendType ET) { +inline unsigned getExtendEncoding(AArch64_AM::ShiftExtendType ET) { switch (ET) { default: llvm_unreachable("Invalid extend type requested"); - case ARM64_AM::UXTB: return 0; break; - case ARM64_AM::UXTH: return 1; break; - case ARM64_AM::UXTW: return 2; break; - case ARM64_AM::UXTX: return 3; break; - case ARM64_AM::SXTB: return 4; break; - case ARM64_AM::SXTH: return 5; break; - case ARM64_AM::SXTW: return 6; break; - case ARM64_AM::SXTX: return 7; break; + case AArch64_AM::UXTB: return 0; break; + case AArch64_AM::UXTH: return 1; break; + case AArch64_AM::UXTW: return 2; break; + case AArch64_AM::UXTX: return 3; break; + case AArch64_AM::SXTB: return 4; break; + case AArch64_AM::SXTH: return 5; break; + case AArch64_AM::SXTW: return 6; break; + case AArch64_AM::SXTX: return 7; break; } } @@ -167,7 +167,7 @@ inline unsigned getExtendEncoding(ARM64_AM::ShiftExtendType ET) { /// imm: 3-bit extend amount /// {5-3} = shifter /// {2-0} = imm3 -static inline unsigned getArithExtendImm(ARM64_AM::ShiftExtendType ET, +static inline unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm) { assert((Imm & 0x7) == Imm && "Illegal shifted immedate value!"); return (getExtendEncoding(ET) << 3) | (Imm & 0x7); @@ -181,7 +181,7 @@ static inline bool getMemDoShift(unsigned Imm) { /// getExtendType - Extract the extend type for the offset operand of /// loads/stores. -static inline ARM64_AM::ShiftExtendType getMemExtendType(unsigned Imm) { +static inline AArch64_AM::ShiftExtendType getMemExtendType(unsigned Imm) { return getExtendType((Imm >> 1) & 0x7); } @@ -197,7 +197,7 @@ static inline ARM64_AM::ShiftExtendType getMemExtendType(unsigned Imm) { /// 111 ==> sxtx /// {3-1} = shifter /// {0} = doshift -static inline unsigned getMemExtendImm(ARM64_AM::ShiftExtendType ET, +static inline unsigned getMemExtendImm(AArch64_AM::ShiftExtendType ET, bool DoShift) { return (getExtendEncoding(ET) << 1) | unsigned(DoShift); } @@ -731,7 +731,7 @@ static inline uint64_t decodeAdvSIMDModImmType12(uint8_t Imm) { return (EncVal << 32) | EncVal; } -} // end namespace ARM64_AM +} // end namespace AArch64_AM } // end namespace llvm diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp similarity index 66% rename from lib/Target/ARM64/MCTargetDesc/ARM64AsmBackend.cpp rename to lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index ba5025ab620c..d8900d4fceb2 100644 --- a/lib/Target/ARM64/MCTargetDesc/ARM64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -1,4 +1,4 @@ -//===-- ARM64AsmBackend.cpp - ARM64 Assembler Backend ---------------------===// +//===-- AArch64AsmBackend.cpp - AArch64 Assembler Backend -----------------===// // // The LLVM Compiler Infrastructure // @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "ARM64.h" -#include "ARM64RegisterInfo.h" -#include "MCTargetDesc/ARM64FixupKinds.h" +#include "AArch64.h" +#include "AArch64RegisterInfo.h" +#include "MCTargetDesc/AArch64FixupKinds.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCDirectives.h" @@ -23,38 +23,38 @@ using namespace llvm; namespace { -class ARM64AsmBackend : public MCAsmBackend { +class AArch64AsmBackend : public MCAsmBackend { static const unsigned PCRelFlagVal = MCFixupKindInfo::FKF_IsAlignedDownTo32Bits | MCFixupKindInfo::FKF_IsPCRel; public: - ARM64AsmBackend(const Target &T) : MCAsmBackend() {} + AArch64AsmBackend(const Target &T) : MCAsmBackend() {} unsigned getNumFixupKinds() const override { - return ARM64::NumTargetFixupKinds; + return AArch64::NumTargetFixupKinds; } const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override { - const static MCFixupKindInfo Infos[ARM64::NumTargetFixupKinds] = { + const static MCFixupKindInfo Infos[AArch64::NumTargetFixupKinds] = { // This table *must* be in the order that the fixup_* kinds are defined in - // ARM64FixupKinds.h. + // AArch64FixupKinds.h. // // Name Offset (bits) Size (bits) Flags - { "fixup_arm64_pcrel_adr_imm21", 0, 32, PCRelFlagVal }, - { "fixup_arm64_pcrel_adrp_imm21", 0, 32, PCRelFlagVal }, - { "fixup_arm64_add_imm12", 10, 12, 0 }, - { "fixup_arm64_ldst_imm12_scale1", 10, 12, 0 }, - { "fixup_arm64_ldst_imm12_scale2", 10, 12, 0 }, - { "fixup_arm64_ldst_imm12_scale4", 10, 12, 0 }, - { "fixup_arm64_ldst_imm12_scale8", 10, 12, 0 }, - { "fixup_arm64_ldst_imm12_scale16", 10, 12, 0 }, - { "fixup_arm64_ldr_pcrel_imm19", 5, 19, PCRelFlagVal }, - { "fixup_arm64_movw", 5, 16, 0 }, - { "fixup_arm64_pcrel_branch14", 5, 14, PCRelFlagVal }, - { "fixup_arm64_pcrel_branch19", 5, 19, PCRelFlagVal }, - { "fixup_arm64_pcrel_branch26", 0, 26, PCRelFlagVal }, - { "fixup_arm64_pcrel_call26", 0, 26, PCRelFlagVal }, - { "fixup_arm64_tlsdesc_call", 0, 0, 0 } + { "fixup_aarch64_pcrel_adr_imm21", 0, 32, PCRelFlagVal }, + { "fixup_aarch64_pcrel_adrp_imm21", 0, 32, PCRelFlagVal }, + { "fixup_aarch64_add_imm12", 10, 12, 0 }, + { "fixup_aarch64_ldst_imm12_scale1", 10, 12, 0 }, + { "fixup_aarch64_ldst_imm12_scale2", 10, 12, 0 }, + { "fixup_aarch64_ldst_imm12_scale4", 10, 12, 0 }, + { "fixup_aarch64_ldst_imm12_scale8", 10, 12, 0 }, + { "fixup_aarch64_ldst_imm12_scale16", 10, 12, 0 }, + { "fixup_aarch64_ldr_pcrel_imm19", 5, 19, PCRelFlagVal }, + { "fixup_aarch64_movw", 5, 16, 0 }, + { "fixup_aarch64_pcrel_branch14", 5, 14, PCRelFlagVal }, + { "fixup_aarch64_pcrel_branch19", 5, 19, PCRelFlagVal }, + { "fixup_aarch64_pcrel_branch26", 0, 26, PCRelFlagVal }, + { "fixup_aarch64_pcrel_call26", 0, 26, PCRelFlagVal }, + { "fixup_aarch64_tlsdesc_call", 0, 0, 0 } }; if (Kind < FirstTargetFixupKind) @@ -88,31 +88,31 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { default: assert(0 && "Unknown fixup kind!"); - case ARM64::fixup_arm64_tlsdesc_call: + case AArch64::fixup_aarch64_tlsdesc_call: return 0; case FK_Data_1: return 1; case FK_Data_2: - case ARM64::fixup_arm64_movw: + case AArch64::fixup_aarch64_movw: return 2; - case ARM64::fixup_arm64_pcrel_branch14: - case ARM64::fixup_arm64_add_imm12: - case ARM64::fixup_arm64_ldst_imm12_scale1: - case ARM64::fixup_arm64_ldst_imm12_scale2: - case ARM64::fixup_arm64_ldst_imm12_scale4: - case ARM64::fixup_arm64_ldst_imm12_scale8: - case ARM64::fixup_arm64_ldst_imm12_scale16: - case ARM64::fixup_arm64_ldr_pcrel_imm19: - case ARM64::fixup_arm64_pcrel_branch19: + case AArch64::fixup_aarch64_pcrel_branch14: + case AArch64::fixup_aarch64_add_imm12: + case AArch64::fixup_aarch64_ldst_imm12_scale1: + case AArch64::fixup_aarch64_ldst_imm12_scale2: + case AArch64::fixup_aarch64_ldst_imm12_scale4: + case AArch64::fixup_aarch64_ldst_imm12_scale8: + case AArch64::fixup_aarch64_ldst_imm12_scale16: + case AArch64::fixup_aarch64_ldr_pcrel_imm19: + case AArch64::fixup_aarch64_pcrel_branch19: return 3; - case ARM64::fixup_arm64_pcrel_adr_imm21: - case ARM64::fixup_arm64_pcrel_adrp_imm21: - case ARM64::fixup_arm64_pcrel_branch26: - case ARM64::fixup_arm64_pcrel_call26: + case AArch64::fixup_aarch64_pcrel_adr_imm21: + case AArch64::fixup_aarch64_pcrel_adrp_imm21: + case AArch64::fixup_aarch64_pcrel_branch26: + case AArch64::fixup_aarch64_pcrel_call26: case FK_Data_4: return 4; @@ -132,49 +132,49 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { switch (Kind) { default: assert(false && "Unknown fixup kind!"); - case ARM64::fixup_arm64_pcrel_adr_imm21: + case AArch64::fixup_aarch64_pcrel_adr_imm21: if (SignedValue > 2097151 || SignedValue < -2097152) report_fatal_error("fixup value out of range"); return AdrImmBits(Value & 0x1fffffULL); - case ARM64::fixup_arm64_pcrel_adrp_imm21: + case AArch64::fixup_aarch64_pcrel_adrp_imm21: return AdrImmBits((Value & 0x1fffff000ULL) >> 12); - case ARM64::fixup_arm64_ldr_pcrel_imm19: - case ARM64::fixup_arm64_pcrel_branch19: + case AArch64::fixup_aarch64_ldr_pcrel_imm19: + case AArch64::fixup_aarch64_pcrel_branch19: // Signed 21-bit immediate if (SignedValue > 2097151 || SignedValue < -2097152) report_fatal_error("fixup value out of range"); // Low two bits are not encoded. return (Value >> 2) & 0x7ffff; - case ARM64::fixup_arm64_add_imm12: - case ARM64::fixup_arm64_ldst_imm12_scale1: + case AArch64::fixup_aarch64_add_imm12: + case AArch64::fixup_aarch64_ldst_imm12_scale1: // Unsigned 12-bit immediate if (Value >= 0x1000) report_fatal_error("invalid imm12 fixup value"); return Value; - case ARM64::fixup_arm64_ldst_imm12_scale2: + case AArch64::fixup_aarch64_ldst_imm12_scale2: // Unsigned 12-bit immediate which gets multiplied by 2 if (Value & 1 || Value >= 0x2000) report_fatal_error("invalid imm12 fixup value"); return Value >> 1; - case ARM64::fixup_arm64_ldst_imm12_scale4: + case AArch64::fixup_aarch64_ldst_imm12_scale4: // Unsigned 12-bit immediate which gets multiplied by 4 if (Value & 3 || Value >= 0x4000) report_fatal_error("invalid imm12 fixup value"); return Value >> 2; - case ARM64::fixup_arm64_ldst_imm12_scale8: + case AArch64::fixup_aarch64_ldst_imm12_scale8: // Unsigned 12-bit immediate which gets multiplied by 8 if (Value & 7 || Value >= 0x8000) report_fatal_error("invalid imm12 fixup value"); return Value >> 3; - case ARM64::fixup_arm64_ldst_imm12_scale16: + case AArch64::fixup_aarch64_ldst_imm12_scale16: // Unsigned 12-bit immediate which gets multiplied by 16 if (Value & 15 || Value >= 0x10000) report_fatal_error("invalid imm12 fixup value"); return Value >> 4; - case ARM64::fixup_arm64_movw: + case AArch64::fixup_aarch64_movw: report_fatal_error("no resolvable MOVZ/MOVK fixups supported yet"); return Value; - case ARM64::fixup_arm64_pcrel_branch14: + case AArch64::fixup_aarch64_pcrel_branch14: // Signed 16-bit immediate if (SignedValue > 32767 || SignedValue < -32768) report_fatal_error("fixup value out of range"); @@ -182,8 +182,8 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { if (Value & 0x3) report_fatal_error("fixup not sufficiently aligned"); return (Value >> 2) & 0x3fff; - case ARM64::fixup_arm64_pcrel_branch26: - case ARM64::fixup_arm64_pcrel_call26: + case AArch64::fixup_aarch64_pcrel_branch26: + case AArch64::fixup_aarch64_pcrel_call26: // Signed 28-bit immediate if (SignedValue > 134217727 || SignedValue < -134217728) report_fatal_error("fixup value out of range"); @@ -199,9 +199,9 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { } } -void ARM64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, - bool IsPCRel) const { +void AArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data, + unsigned DataSize, uint64_t Value, + bool IsPCRel) const { unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); if (!Value) return; // Doesn't change encoding. @@ -221,25 +221,27 @@ void ARM64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data, Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); } -bool ARM64AsmBackend::mayNeedRelaxation(const MCInst &Inst) const { +bool AArch64AsmBackend::mayNeedRelaxation(const MCInst &Inst) const { return false; } -bool ARM64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const { - // FIXME: This isn't correct for ARM64. Just moving the "generic" logic +bool AArch64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const { + // FIXME: This isn't correct for AArch64. Just moving the "generic" logic // into the targets for now. // // Relax if the value is too big for a (signed) i8. return int64_t(Value) != int64_t(int8_t(Value)); } -void ARM64AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { - assert(false && "ARM64AsmBackend::relaxInstruction() unimplemented"); +void AArch64AsmBackend::relaxInstruction(const MCInst &Inst, + MCInst &Res) const { + assert(false && "AArch64AsmBackend::relaxInstruction() unimplemented"); } -bool ARM64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { +bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { // If the count is not 4-byte aligned, we must be writing data into the text // section (otherwise we have unaligned instructions, and thus have far // bigger problems), so just write zeros instead. @@ -263,14 +265,14 @@ namespace CU { enum CompactUnwindEncodings { /// \brief A "frameless" leaf function, where no non-volatile registers are /// saved. The return remains in LR throughout the function. - UNWIND_ARM64_MODE_FRAMELESS = 0x02000000, + UNWIND_AArch64_MODE_FRAMELESS = 0x02000000, /// \brief No compact unwind encoding available. Instead the low 23-bits of /// the compact unwind encoding is the offset of the DWARF FDE in the /// __eh_frame section. This mode is never used in object files. It is only /// generated by the linker in final linked images, which have only DWARF info /// for a function. - UNWIND_ARM64_MODE_DWARF = 0x03000000, + UNWIND_AArch64_MODE_DWARF = 0x03000000, /// \brief This is a standard arm64 prologue where FP/LR are immediately /// pushed on the stack, then SP is copied to FP. If there are any @@ -278,40 +280,40 @@ enum CompactUnwindEncodings { /// in a contiguous ranger right below the saved FP/LR pair. Any subset of the /// five X pairs and four D pairs can be saved, but the memory layout must be /// in register number order. - UNWIND_ARM64_MODE_FRAME = 0x04000000, + UNWIND_AArch64_MODE_FRAME = 0x04000000, /// \brief Frame register pair encodings. - UNWIND_ARM64_FRAME_X19_X20_PAIR = 0x00000001, - UNWIND_ARM64_FRAME_X21_X22_PAIR = 0x00000002, - UNWIND_ARM64_FRAME_X23_X24_PAIR = 0x00000004, - UNWIND_ARM64_FRAME_X25_X26_PAIR = 0x00000008, - UNWIND_ARM64_FRAME_X27_X28_PAIR = 0x00000010, - UNWIND_ARM64_FRAME_D8_D9_PAIR = 0x00000100, - UNWIND_ARM64_FRAME_D10_D11_PAIR = 0x00000200, - UNWIND_ARM64_FRAME_D12_D13_PAIR = 0x00000400, - UNWIND_ARM64_FRAME_D14_D15_PAIR = 0x00000800 + UNWIND_AArch64_FRAME_X19_X20_PAIR = 0x00000001, + UNWIND_AArch64_FRAME_X21_X22_PAIR = 0x00000002, + UNWIND_AArch64_FRAME_X23_X24_PAIR = 0x00000004, + UNWIND_AArch64_FRAME_X25_X26_PAIR = 0x00000008, + UNWIND_AArch64_FRAME_X27_X28_PAIR = 0x00000010, + UNWIND_AArch64_FRAME_D8_D9_PAIR = 0x00000100, + UNWIND_AArch64_FRAME_D10_D11_PAIR = 0x00000200, + UNWIND_AArch64_FRAME_D12_D13_PAIR = 0x00000400, + UNWIND_AArch64_FRAME_D14_D15_PAIR = 0x00000800 }; } // end CU namespace // FIXME: This should be in a separate file. -class DarwinARM64AsmBackend : public ARM64AsmBackend { +class DarwinAArch64AsmBackend : public AArch64AsmBackend { const MCRegisterInfo &MRI; /// \brief Encode compact unwind stack adjustment for frameless functions. - /// See UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK in compact_unwind_encoding.h. + /// See UNWIND_AArch64_FRAMELESS_STACK_SIZE_MASK in compact_unwind_encoding.h. /// The stack size always needs to be 16 byte aligned. uint32_t encodeStackAdjustment(uint32_t StackSize) const { return (StackSize / 16) << 12; } public: - DarwinARM64AsmBackend(const Target &T, const MCRegisterInfo &MRI) - : ARM64AsmBackend(T), MRI(MRI) {} + DarwinAArch64AsmBackend(const Target &T, const MCRegisterInfo &MRI) + : AArch64AsmBackend(T), MRI(MRI) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { - return createARM64MachObjectWriter(OS, MachO::CPU_TYPE_ARM64, - MachO::CPU_SUBTYPE_ARM64_ALL); + return createAArch64MachObjectWriter(OS, MachO::CPU_TYPE_ARM64, + MachO::CPU_SUBTYPE_ARM64_ALL); } bool doesSectionRequireSymbols(const MCSection &Section) const override { @@ -354,7 +356,7 @@ class DarwinARM64AsmBackend : public ARM64AsmBackend { uint32_t generateCompactUnwindEncoding( ArrayRef Instrs) const override { if (Instrs.empty()) - return CU::UNWIND_ARM64_MODE_FRAMELESS; + return CU::UNWIND_AArch64_MODE_FRAMELESS; bool HasFP = false; unsigned StackSize = 0; @@ -366,11 +368,11 @@ class DarwinARM64AsmBackend : public ARM64AsmBackend { switch (Inst.getOperation()) { default: // Cannot handle this directive: bail out. - return CU::UNWIND_ARM64_MODE_DWARF; + return CU::UNWIND_AArch64_MODE_DWARF; case MCCFIInstruction::OpDefCfa: { // Defines a frame pointer. assert(getXRegFromWReg(MRI.getLLVMRegNum(Inst.getRegister(), true)) == - ARM64::FP && + AArch64::FP && "Invalid frame pointer!"); assert(i + 2 < e && "Insufficient CFI instructions to define a frame!"); @@ -387,11 +389,11 @@ class DarwinARM64AsmBackend : public ARM64AsmBackend { LRReg = getXRegFromWReg(LRReg); FPReg = getXRegFromWReg(FPReg); - assert(LRReg == ARM64::LR && FPReg == ARM64::FP && + assert(LRReg == AArch64::LR && FPReg == AArch64::FP && "Pushing invalid registers for frame!"); // Indicate that the function has a frame. - CompactUnwindEncoding |= CU::UNWIND_ARM64_MODE_FRAME; + CompactUnwindEncoding |= CU::UNWIND_AArch64_MODE_FRAME; HasFP = true; break; } @@ -405,11 +407,11 @@ class DarwinARM64AsmBackend : public ARM64AsmBackend { // `.cfi_offset' instructions with the appropriate registers specified. unsigned Reg1 = MRI.getLLVMRegNum(Inst.getRegister(), true); if (i + 1 == e) - return CU::UNWIND_ARM64_MODE_DWARF; + return CU::UNWIND_AArch64_MODE_DWARF; const MCCFIInstruction &Inst2 = Instrs[++i]; if (Inst2.getOperation() != MCCFIInstruction::OpOffset) - return CU::UNWIND_ARM64_MODE_DWARF; + return CU::UNWIND_AArch64_MODE_DWARF; unsigned Reg2 = MRI.getLLVMRegNum(Inst2.getRegister(), true); // N.B. The encodings must be in register number order, and the X @@ -423,21 +425,21 @@ class DarwinARM64AsmBackend : public ARM64AsmBackend { Reg1 = getXRegFromWReg(Reg1); Reg2 = getXRegFromWReg(Reg2); - if (Reg1 == ARM64::X19 && Reg2 == ARM64::X20 && + if (Reg1 == AArch64::X19 && Reg2 == AArch64::X20 && (CompactUnwindEncoding & 0xF1E) == 0) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X19_X20_PAIR; - else if (Reg1 == ARM64::X21 && Reg2 == ARM64::X22 && + CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_X19_X20_PAIR; + else if (Reg1 == AArch64::X21 && Reg2 == AArch64::X22 && (CompactUnwindEncoding & 0xF1C) == 0) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X21_X22_PAIR; - else if (Reg1 == ARM64::X23 && Reg2 == ARM64::X24 && + CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_X21_X22_PAIR; + else if (Reg1 == AArch64::X23 && Reg2 == AArch64::X24 && (CompactUnwindEncoding & 0xF18) == 0) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X23_X24_PAIR; - else if (Reg1 == ARM64::X25 && Reg2 == ARM64::X26 && + CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_X23_X24_PAIR; + else if (Reg1 == AArch64::X25 && Reg2 == AArch64::X26 && (CompactUnwindEncoding & 0xF10) == 0) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X25_X26_PAIR; - else if (Reg1 == ARM64::X27 && Reg2 == ARM64::X28 && + CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_X25_X26_PAIR; + else if (Reg1 == AArch64::X27 && Reg2 == AArch64::X28 && (CompactUnwindEncoding & 0xF00) == 0) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X27_X28_PAIR; + CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_X27_X28_PAIR; else { Reg1 = getDRegFromBReg(Reg1); Reg2 = getDRegFromBReg(Reg2); @@ -446,20 +448,20 @@ class DarwinARM64AsmBackend : public ARM64AsmBackend { // D10/D11 pair = 0x00000200, // D12/D13 pair = 0x00000400, // D14/D15 pair = 0x00000800 - if (Reg1 == ARM64::D8 && Reg2 == ARM64::D9 && + if (Reg1 == AArch64::D8 && Reg2 == AArch64::D9 && (CompactUnwindEncoding & 0xE00) == 0) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D8_D9_PAIR; - else if (Reg1 == ARM64::D10 && Reg2 == ARM64::D11 && + CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_D8_D9_PAIR; + else if (Reg1 == AArch64::D10 && Reg2 == AArch64::D11 && (CompactUnwindEncoding & 0xC00) == 0) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D10_D11_PAIR; - else if (Reg1 == ARM64::D12 && Reg2 == ARM64::D13 && + CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_D10_D11_PAIR; + else if (Reg1 == AArch64::D12 && Reg2 == AArch64::D13 && (CompactUnwindEncoding & 0x800) == 0) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D12_D13_PAIR; - else if (Reg1 == ARM64::D14 && Reg2 == ARM64::D15) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D14_D15_PAIR; + CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_D12_D13_PAIR; + else if (Reg1 == AArch64::D14 && Reg2 == AArch64::D15) + CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_D14_D15_PAIR; else // A pair was pushed which we cannot handle. - return CU::UNWIND_ARM64_MODE_DWARF; + return CU::UNWIND_AArch64_MODE_DWARF; } break; @@ -471,9 +473,9 @@ class DarwinARM64AsmBackend : public ARM64AsmBackend { // With compact unwind info we can only represent stack adjustments of up // to 65520 bytes. if (StackSize > 65520) - return CU::UNWIND_ARM64_MODE_DWARF; + return CU::UNWIND_AArch64_MODE_DWARF; - CompactUnwindEncoding |= CU::UNWIND_ARM64_MODE_FRAMELESS; + CompactUnwindEncoding |= CU::UNWIND_AArch64_MODE_FRAMELESS; CompactUnwindEncoding |= encodeStackAdjustment(StackSize); } @@ -485,16 +487,16 @@ class DarwinARM64AsmBackend : public ARM64AsmBackend { namespace { -class ELFARM64AsmBackend : public ARM64AsmBackend { +class ELFAArch64AsmBackend : public AArch64AsmBackend { public: uint8_t OSABI; bool IsLittleEndian; - ELFARM64AsmBackend(const Target &T, uint8_t OSABI, bool IsLittleEndian) - : ARM64AsmBackend(T), OSABI(OSABI), IsLittleEndian(IsLittleEndian) {} + ELFAArch64AsmBackend(const Target &T, uint8_t OSABI, bool IsLittleEndian) + : AArch64AsmBackend(T), OSABI(OSABI), IsLittleEndian(IsLittleEndian) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { - return createARM64ELFObjectWriter(OS, OSABI, IsLittleEndian); + return createAArch64ELFObjectWriter(OS, OSABI, IsLittleEndian); } void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, @@ -506,12 +508,10 @@ class ELFARM64AsmBackend : public ARM64AsmBackend { uint64_t Value, bool IsPCRel) const override; }; -void ELFARM64AsmBackend::processFixupValue(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFixup &Fixup, - const MCFragment *DF, - const MCValue &Target, - uint64_t &Value, bool &IsResolved) { +void ELFAArch64AsmBackend::processFixupValue( + const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFixup &Fixup, + const MCFragment *DF, const MCValue &Target, uint64_t &Value, + bool &IsResolved) { // The ADRP instruction adds some multiple of 0x1000 to the current PC & // ~0xfff. This means that the required offset to reach a symbol can vary by // up to one step depending on where the ADRP is in memory. For example: @@ -524,13 +524,13 @@ void ELFARM64AsmBackend::processFixupValue(const MCAssembler &Asm, // same page as the ADRP and the instruction should encode 0x0. Assuming the // section isn't 0x1000-aligned, we therefore need to delegate this decision // to the linker -- a relocation! - if ((uint32_t)Fixup.getKind() == ARM64::fixup_arm64_pcrel_adrp_imm21) + if ((uint32_t)Fixup.getKind() == AArch64::fixup_aarch64_pcrel_adrp_imm21) IsResolved = false; } -void ELFARM64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, - bool IsPCRel) const { +void ELFAArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data, + unsigned DataSize, uint64_t Value, + bool IsPCRel) const { // store fixups in .eh_frame section in big endian order if (!IsLittleEndian && Fixup.getKind() == FK_Data_4) { const MCSection *Sec = Fixup.getValue()->FindAssociatedSection(); @@ -538,27 +538,29 @@ void ELFARM64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data, if (SecELF->getSectionName() == ".eh_frame") Value = ByteSwap_32(unsigned(Value)); } - ARM64AsmBackend::applyFixup (Fixup, Data, DataSize, Value, IsPCRel); + AArch64AsmBackend::applyFixup (Fixup, Data, DataSize, Value, IsPCRel); } } -MCAsmBackend *llvm::createARM64leAsmBackend(const Target &T, +MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU) { Triple TheTriple(TT); if (TheTriple.isOSDarwin()) - return new DarwinARM64AsmBackend(T, MRI); + return new DarwinAArch64AsmBackend(T, MRI); assert(TheTriple.isOSBinFormatELF() && "Expect either MachO or ELF target"); - return new ELFARM64AsmBackend(T, TheTriple.getOS(), /*IsLittleEndian=*/true); + return new ELFAArch64AsmBackend(T, TheTriple.getOS(), /*IsLittleEndian=*/true); } -MCAsmBackend *llvm::createARM64beAsmBackend(const Target &T, +MCAsmBackend *llvm::createAArch64beAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU) { Triple TheTriple(TT); - assert(TheTriple.isOSBinFormatELF() && "Big endian is only supported for ELF targets!"); - return new ELFARM64AsmBackend(T, TheTriple.getOS(), /*IsLittleEndian=*/false); + assert(TheTriple.isOSBinFormatELF() && + "Big endian is only supported for ELF targets!"); + return new ELFAArch64AsmBackend(T, TheTriple.getOS(), + /*IsLittleEndian=*/false); } diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp similarity index 56% rename from lib/Target/ARM64/MCTargetDesc/ARM64ELFObjectWriter.cpp rename to lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index 0990a701bc87..e05191eaf3e0 100644 --- a/lib/Target/ARM64/MCTargetDesc/ARM64ELFObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -1,4 +1,4 @@ -//===-- ARM64ELFObjectWriter.cpp - ARM64 ELF Writer -----------------------===// +//===-- AArch64ELFObjectWriter.cpp - AArch64 ELF Writer -------------------===// // // The LLVM Compiler Infrastructure // @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/ARM64FixupKinds.h" -#include "MCTargetDesc/ARM64MCExpr.h" -#include "MCTargetDesc/ARM64MCTargetDesc.h" +#include "MCTargetDesc/AArch64FixupKinds.h" +#include "MCTargetDesc/AArch64MCExpr.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" @@ -22,11 +22,11 @@ using namespace llvm; namespace { -class ARM64ELFObjectWriter : public MCELFObjectTargetWriter { +class AArch64ELFObjectWriter : public MCELFObjectTargetWriter { public: - ARM64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian); + AArch64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian); - virtual ~ARM64ELFObjectWriter(); + virtual ~AArch64ELFObjectWriter(); protected: unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, @@ -36,19 +36,20 @@ class ARM64ELFObjectWriter : public MCELFObjectTargetWriter { }; } -ARM64ELFObjectWriter::ARM64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian) +AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI, + bool IsLittleEndian) : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64, /*HasRelocationAddend*/ true) {} -ARM64ELFObjectWriter::~ARM64ELFObjectWriter() {} +AArch64ELFObjectWriter::~AArch64ELFObjectWriter() {} -unsigned ARM64ELFObjectWriter::GetRelocType(const MCValue &Target, +unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { - ARM64MCExpr::VariantKind RefKind = - static_cast(Target.getRefKind()); - ARM64MCExpr::VariantKind SymLoc = ARM64MCExpr::getSymbolLoc(RefKind); - bool IsNC = ARM64MCExpr::isNotChecked(RefKind); + AArch64MCExpr::VariantKind RefKind = + static_cast(Target.getRefKind()); + AArch64MCExpr::VariantKind SymLoc = AArch64MCExpr::getSymbolLoc(RefKind); + bool IsNC = AArch64MCExpr::isNotChecked(RefKind); assert((!Target.getSymA() || Target.getSymA()->getKind() == MCSymbolRefExpr::VK_None) && @@ -66,30 +67,30 @@ unsigned ARM64ELFObjectWriter::GetRelocType(const MCValue &Target, return ELF::R_AARCH64_PREL32; case FK_Data_8: return ELF::R_AARCH64_PREL64; - case ARM64::fixup_arm64_pcrel_adr_imm21: - assert(SymLoc == ARM64MCExpr::VK_NONE && "unexpected ADR relocation"); + case AArch64::fixup_aarch64_pcrel_adr_imm21: + assert(SymLoc == AArch64MCExpr::VK_NONE && "unexpected ADR relocation"); return ELF::R_AARCH64_ADR_PREL_LO21; - case ARM64::fixup_arm64_pcrel_adrp_imm21: - if (SymLoc == ARM64MCExpr::VK_ABS && !IsNC) + case AArch64::fixup_aarch64_pcrel_adrp_imm21: + if (SymLoc == AArch64MCExpr::VK_ABS && !IsNC) return ELF::R_AARCH64_ADR_PREL_PG_HI21; - if (SymLoc == ARM64MCExpr::VK_GOT && !IsNC) + if (SymLoc == AArch64MCExpr::VK_GOT && !IsNC) return ELF::R_AARCH64_ADR_GOT_PAGE; - if (SymLoc == ARM64MCExpr::VK_GOTTPREL && !IsNC) + if (SymLoc == AArch64MCExpr::VK_GOTTPREL && !IsNC) return ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21; - if (SymLoc == ARM64MCExpr::VK_TLSDESC && !IsNC) + if (SymLoc == AArch64MCExpr::VK_TLSDESC && !IsNC) return ELF::R_AARCH64_TLSDESC_ADR_PAGE; llvm_unreachable("invalid symbol kind for ADRP relocation"); - case ARM64::fixup_arm64_pcrel_branch26: + case AArch64::fixup_aarch64_pcrel_branch26: return ELF::R_AARCH64_JUMP26; - case ARM64::fixup_arm64_pcrel_call26: + case AArch64::fixup_aarch64_pcrel_call26: return ELF::R_AARCH64_CALL26; - case ARM64::fixup_arm64_ldr_pcrel_imm19: - if (SymLoc == ARM64MCExpr::VK_GOTTPREL) + case AArch64::fixup_aarch64_ldr_pcrel_imm19: + if (SymLoc == AArch64MCExpr::VK_GOTTPREL) return ELF::R_AARCH64_TLSIE_LD_GOTTPREL_PREL19; return ELF::R_AARCH64_LD_PREL_LO19; - case ARM64::fixup_arm64_pcrel_branch14: + case AArch64::fixup_aarch64_pcrel_branch14: return ELF::R_AARCH64_TSTBR14; - case ARM64::fixup_arm64_pcrel_branch19: + case AArch64::fixup_aarch64_pcrel_branch19: return ELF::R_AARCH64_CONDBR19; default: llvm_unreachable("Unsupported pc-relative fixup kind"); @@ -102,142 +103,142 @@ unsigned ARM64ELFObjectWriter::GetRelocType(const MCValue &Target, return ELF::R_AARCH64_ABS32; case FK_Data_8: return ELF::R_AARCH64_ABS64; - case ARM64::fixup_arm64_add_imm12: - if (RefKind == ARM64MCExpr::VK_DTPREL_HI12) + case AArch64::fixup_aarch64_add_imm12: + if (RefKind == AArch64MCExpr::VK_DTPREL_HI12) return ELF::R_AARCH64_TLSLD_ADD_DTPREL_HI12; - if (RefKind == ARM64MCExpr::VK_TPREL_HI12) + if (RefKind == AArch64MCExpr::VK_TPREL_HI12) return ELF::R_AARCH64_TLSLE_ADD_TPREL_HI12; - if (RefKind == ARM64MCExpr::VK_DTPREL_LO12_NC) + if (RefKind == AArch64MCExpr::VK_DTPREL_LO12_NC) return ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC; - if (RefKind == ARM64MCExpr::VK_DTPREL_LO12) + if (RefKind == AArch64MCExpr::VK_DTPREL_LO12) return ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12; - if (RefKind == ARM64MCExpr::VK_TPREL_LO12_NC) + if (RefKind == AArch64MCExpr::VK_TPREL_LO12_NC) return ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC; - if (RefKind == ARM64MCExpr::VK_TPREL_LO12) + if (RefKind == AArch64MCExpr::VK_TPREL_LO12) return ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12; - if (RefKind == ARM64MCExpr::VK_TLSDESC_LO12) + if (RefKind == AArch64MCExpr::VK_TLSDESC_LO12) return ELF::R_AARCH64_TLSDESC_ADD_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_ABS && IsNC) + if (SymLoc == AArch64MCExpr::VK_ABS && IsNC) return ELF::R_AARCH64_ADD_ABS_LO12_NC; report_fatal_error("invalid fixup for add (uimm12) instruction"); return 0; - case ARM64::fixup_arm64_ldst_imm12_scale1: - if (SymLoc == ARM64MCExpr::VK_ABS && IsNC) + case AArch64::fixup_aarch64_ldst_imm12_scale1: + if (SymLoc == AArch64MCExpr::VK_ABS && IsNC) return ELF::R_AARCH64_LDST8_ABS_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC) + if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC) return ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC) + if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC) return ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC) + if (SymLoc == AArch64MCExpr::VK_TPREL && !IsNC) return ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC) + if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC) return ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC; report_fatal_error("invalid fixup for 8-bit load/store instruction"); return 0; - case ARM64::fixup_arm64_ldst_imm12_scale2: - if (SymLoc == ARM64MCExpr::VK_ABS && IsNC) + case AArch64::fixup_aarch64_ldst_imm12_scale2: + if (SymLoc == AArch64MCExpr::VK_ABS && IsNC) return ELF::R_AARCH64_LDST16_ABS_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC) + if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC) return ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC) + if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC) return ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC) + if (SymLoc == AArch64MCExpr::VK_TPREL && !IsNC) return ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC) + if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC) return ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC; report_fatal_error("invalid fixup for 16-bit load/store instruction"); return 0; - case ARM64::fixup_arm64_ldst_imm12_scale4: - if (SymLoc == ARM64MCExpr::VK_ABS && IsNC) + case AArch64::fixup_aarch64_ldst_imm12_scale4: + if (SymLoc == AArch64MCExpr::VK_ABS && IsNC) return ELF::R_AARCH64_LDST32_ABS_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC) + if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC) return ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC) + if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC) return ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC) + if (SymLoc == AArch64MCExpr::VK_TPREL && !IsNC) return ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC) + if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC) return ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC; report_fatal_error("invalid fixup for 32-bit load/store instruction"); return 0; - case ARM64::fixup_arm64_ldst_imm12_scale8: - if (SymLoc == ARM64MCExpr::VK_ABS && IsNC) + case AArch64::fixup_aarch64_ldst_imm12_scale8: + if (SymLoc == AArch64MCExpr::VK_ABS && IsNC) return ELF::R_AARCH64_LDST64_ABS_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_GOT && IsNC) + if (SymLoc == AArch64MCExpr::VK_GOT && IsNC) return ELF::R_AARCH64_LD64_GOT_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC) + if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC) return ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC) + if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC) return ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC) + if (SymLoc == AArch64MCExpr::VK_TPREL && !IsNC) return ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC) + if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC) return ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_GOTTPREL && IsNC) + if (SymLoc == AArch64MCExpr::VK_GOTTPREL && IsNC) return ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_TLSDESC && IsNC) + if (SymLoc == AArch64MCExpr::VK_TLSDESC && IsNC) return ELF::R_AARCH64_TLSDESC_LD64_LO12_NC; report_fatal_error("invalid fixup for 64-bit load/store instruction"); return 0; - case ARM64::fixup_arm64_ldst_imm12_scale16: - if (SymLoc == ARM64MCExpr::VK_ABS && IsNC) + case AArch64::fixup_aarch64_ldst_imm12_scale16: + if (SymLoc == AArch64MCExpr::VK_ABS && IsNC) return ELF::R_AARCH64_LDST128_ABS_LO12_NC; report_fatal_error("invalid fixup for 128-bit load/store instruction"); return 0; - case ARM64::fixup_arm64_movw: - if (RefKind == ARM64MCExpr::VK_ABS_G3) + case AArch64::fixup_aarch64_movw: + if (RefKind == AArch64MCExpr::VK_ABS_G3) return ELF::R_AARCH64_MOVW_UABS_G3; - if (RefKind == ARM64MCExpr::VK_ABS_G2) + if (RefKind == AArch64MCExpr::VK_ABS_G2) return ELF::R_AARCH64_MOVW_UABS_G2; - if (RefKind == ARM64MCExpr::VK_ABS_G2_S) + if (RefKind == AArch64MCExpr::VK_ABS_G2_S) return ELF::R_AARCH64_MOVW_SABS_G2; - if (RefKind == ARM64MCExpr::VK_ABS_G2_NC) + if (RefKind == AArch64MCExpr::VK_ABS_G2_NC) return ELF::R_AARCH64_MOVW_UABS_G2_NC; - if (RefKind == ARM64MCExpr::VK_ABS_G1) + if (RefKind == AArch64MCExpr::VK_ABS_G1) return ELF::R_AARCH64_MOVW_UABS_G1; - if (RefKind == ARM64MCExpr::VK_ABS_G1_S) + if (RefKind == AArch64MCExpr::VK_ABS_G1_S) return ELF::R_AARCH64_MOVW_SABS_G1; - if (RefKind == ARM64MCExpr::VK_ABS_G1_NC) + if (RefKind == AArch64MCExpr::VK_ABS_G1_NC) return ELF::R_AARCH64_MOVW_UABS_G1_NC; - if (RefKind == ARM64MCExpr::VK_ABS_G0) + if (RefKind == AArch64MCExpr::VK_ABS_G0) return ELF::R_AARCH64_MOVW_UABS_G0; - if (RefKind == ARM64MCExpr::VK_ABS_G0_S) + if (RefKind == AArch64MCExpr::VK_ABS_G0_S) return ELF::R_AARCH64_MOVW_SABS_G0; - if (RefKind == ARM64MCExpr::VK_ABS_G0_NC) + if (RefKind == AArch64MCExpr::VK_ABS_G0_NC) return ELF::R_AARCH64_MOVW_UABS_G0_NC; - if (RefKind == ARM64MCExpr::VK_DTPREL_G2) + if (RefKind == AArch64MCExpr::VK_DTPREL_G2) return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2; - if (RefKind == ARM64MCExpr::VK_DTPREL_G1) + if (RefKind == AArch64MCExpr::VK_DTPREL_G1) return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1; - if (RefKind == ARM64MCExpr::VK_DTPREL_G1_NC) + if (RefKind == AArch64MCExpr::VK_DTPREL_G1_NC) return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC; - if (RefKind == ARM64MCExpr::VK_DTPREL_G0) + if (RefKind == AArch64MCExpr::VK_DTPREL_G0) return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0; - if (RefKind == ARM64MCExpr::VK_DTPREL_G0_NC) + if (RefKind == AArch64MCExpr::VK_DTPREL_G0_NC) return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC; - if (RefKind == ARM64MCExpr::VK_TPREL_G2) + if (RefKind == AArch64MCExpr::VK_TPREL_G2) return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G2; - if (RefKind == ARM64MCExpr::VK_TPREL_G1) + if (RefKind == AArch64MCExpr::VK_TPREL_G1) return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1; - if (RefKind == ARM64MCExpr::VK_TPREL_G1_NC) + if (RefKind == AArch64MCExpr::VK_TPREL_G1_NC) return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1_NC; - if (RefKind == ARM64MCExpr::VK_TPREL_G0) + if (RefKind == AArch64MCExpr::VK_TPREL_G0) return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0; - if (RefKind == ARM64MCExpr::VK_TPREL_G0_NC) + if (RefKind == AArch64MCExpr::VK_TPREL_G0_NC) return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0_NC; - if (RefKind == ARM64MCExpr::VK_GOTTPREL_G1) + if (RefKind == AArch64MCExpr::VK_GOTTPREL_G1) return ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1; - if (RefKind == ARM64MCExpr::VK_GOTTPREL_G0_NC) + if (RefKind == AArch64MCExpr::VK_GOTTPREL_G0_NC) return ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC; report_fatal_error("invalid fixup for movz/movk instruction"); return 0; - case ARM64::fixup_arm64_tlsdesc_call: + case AArch64::fixup_aarch64_tlsdesc_call: return ELF::R_AARCH64_TLSDESC_CALL; default: llvm_unreachable("Unknown ELF relocation type"); @@ -247,9 +248,10 @@ unsigned ARM64ELFObjectWriter::GetRelocType(const MCValue &Target, llvm_unreachable("Unimplemented fixup -> relocation"); } -MCObjectWriter *llvm::createARM64ELFObjectWriter(raw_ostream &OS, +MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI, bool IsLittleEndian) { - MCELFObjectTargetWriter *MOTW = new ARM64ELFObjectWriter(OSABI, IsLittleEndian); + MCELFObjectTargetWriter *MOTW = + new AArch64ELFObjectWriter(OSABI, IsLittleEndian); return createELFObjectWriter(MOTW, OS, IsLittleEndian); } diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp similarity index 86% rename from lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.cpp rename to lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index adbf83079725..a79406d9d1fe 100644 --- a/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -1,4 +1,4 @@ -//===- lib/MC/ARM64ELFStreamer.cpp - ELF Object Output for ARM64 ----------===// +//===- lib/MC/AArch64ELFStreamer.cpp - ELF Object Output for AArch64 ------===// // // The LLVM Compiler Infrastructure // @@ -53,14 +53,14 @@ namespace { /// /// As a result this system is orthogonal to the DataRegion infrastructure used /// by MachO. Beware! -class ARM64ELFStreamer : public MCELFStreamer { +class AArch64ELFStreamer : public MCELFStreamer { public: - ARM64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, + AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter) : MCELFStreamer(Context, TAB, OS, Emitter), MappingSymbolCounter(0), LastEMS(EMS_None) {} - ~ARM64ELFStreamer() {} + ~AArch64ELFStreamer() {} void ChangeSection(const MCSection *Section, const MCExpr *Subsection) override { @@ -83,7 +83,7 @@ class ARM64ELFStreamer : public MCELFStreamer { } /// This is one of the functions used to emit data into an ELF section, so the - /// ARM64 streamer overrides it to add the appropriate mapping symbol ($d) + /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d) /// if necessary. void EmitBytes(StringRef Data) override { EmitDataMappingSymbol(); @@ -91,7 +91,7 @@ class ARM64ELFStreamer : public MCELFStreamer { } /// This is one of the functions used to emit data into an ELF section, so the - /// ARM64 streamer overrides it to add the appropriate mapping symbol ($d) + /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d) /// if necessary. void EmitValueImpl(const MCExpr *Value, unsigned Size, const SMLoc &Loc) override { @@ -147,10 +147,10 @@ class ARM64ELFStreamer : public MCELFStreamer { } namespace llvm { -MCELFStreamer *createARM64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll, bool NoExecStack) { - ARM64ELFStreamer *S = new ARM64ELFStreamer(Context, TAB, OS, Emitter); +MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll, bool NoExecStack) { + AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter); if (RelaxAll) S->getAssembler().setRelaxAll(true); if (NoExecStack) diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h similarity index 55% rename from lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.h rename to lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h index 72dadbc50aa9..bc6973bd5f8b 100644 --- a/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h @@ -1,4 +1,4 @@ -//===-- ARM64ELFStreamer.h - ELF Streamer for ARM64 -------------*- C++ -*-===// +//===-- AArch64ELFStreamer.h - ELF Streamer for AArch64 ---------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file implements ELF streamer information for the ARM64 backend. +// This file implements ELF streamer information for the AArch64 backend. // //===----------------------------------------------------------------------===// @@ -18,9 +18,9 @@ namespace llvm { -MCELFStreamer *createARM64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll, bool NoExecStack); +MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll, bool NoExecStack); } -#endif // ARM64_ELF_STREAMER_H +#endif // AArch64_ELF_STREAMER_H diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h new file mode 100644 index 000000000000..bf405fbac77b --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h @@ -0,0 +1,76 @@ +//===-- AArch64FixupKinds.h - AArch64 Specific Fixup Entries ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AArch64FIXUPKINDS_H +#define LLVM_AArch64FIXUPKINDS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { +namespace AArch64 { + +enum Fixups { + // fixup_aarch64_pcrel_adr_imm21 - A 21-bit pc-relative immediate inserted into + // an ADR instruction. + fixup_aarch64_pcrel_adr_imm21 = FirstTargetFixupKind, + + // fixup_aarch64_pcrel_adrp_imm21 - A 21-bit pc-relative immediate inserted into + // an ADRP instruction. + fixup_aarch64_pcrel_adrp_imm21, + + // fixup_aarch64_imm12 - 12-bit fixup for add/sub instructions. + // No alignment adjustment. All value bits are encoded. + fixup_aarch64_add_imm12, + + // fixup_aarch64_ldst_imm12_* - unsigned 12-bit fixups for load and + // store instructions. + fixup_aarch64_ldst_imm12_scale1, + fixup_aarch64_ldst_imm12_scale2, + fixup_aarch64_ldst_imm12_scale4, + fixup_aarch64_ldst_imm12_scale8, + fixup_aarch64_ldst_imm12_scale16, + + // fixup_aarch64_ldr_pcrel_imm19 - The high 19 bits of a 21-bit pc-relative + // immediate. Same encoding as fixup_aarch64_pcrel_adrhi, except this is used by + // pc-relative loads and generates relocations directly when necessary. + fixup_aarch64_ldr_pcrel_imm19, + + // FIXME: comment + fixup_aarch64_movw, + + // fixup_aarch64_pcrel_imm14 - The high 14 bits of a 21-bit pc-relative + // immediate. + fixup_aarch64_pcrel_branch14, + + // fixup_aarch64_pcrel_branch19 - The high 19 bits of a 21-bit pc-relative + // immediate. Same encoding as fixup_aarch64_pcrel_adrhi, except this is use by + // b.cc and generates relocations directly when necessary. + fixup_aarch64_pcrel_branch19, + + // fixup_aarch64_pcrel_branch26 - The high 26 bits of a 28-bit pc-relative + // immediate. + fixup_aarch64_pcrel_branch26, + + // fixup_aarch64_pcrel_call26 - The high 26 bits of a 28-bit pc-relative + // immediate. Distinguished from branch26 only on ELF. + fixup_aarch64_pcrel_call26, + + // fixup_aarch64_tlsdesc_call - zero-space placeholder for the ELF + // R_AARCH64_TLSDESC_CALL relocation. + fixup_aarch64_tlsdesc_call, + + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind +}; + +} // end namespace AArch64 +} // end namespace llvm + +#endif diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp similarity index 82% rename from lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.cpp rename to lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index e211d3428bf6..dc4a8bf6c9a9 100644 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -1,4 +1,4 @@ -//===-- ARM64MCAsmInfo.cpp - ARM64 asm properties -----------------------===// +//===-- AArch64MCAsmInfo.cpp - AArch64 asm properties ---------------------===// // // The LLVM Compiler Infrastructure // @@ -7,11 +7,11 @@ // //===----------------------------------------------------------------------===// // -// This file contains the declarations of the ARM64MCAsmInfo properties. +// This file contains the declarations of the AArch64MCAsmInfo properties. // //===----------------------------------------------------------------------===// -#include "ARM64MCAsmInfo.h" +#include "AArch64MCAsmInfo.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCContext.h" @@ -26,13 +26,13 @@ enum AsmWriterVariantTy { }; static cl::opt AsmWriterVariant( - "arm64-neon-syntax", cl::init(Default), - cl::desc("Choose style of NEON code to emit from ARM64 backend:"), + "aarch64-neon-syntax", cl::init(Default), + cl::desc("Choose style of NEON code to emit from AArch64 backend:"), cl::values(clEnumValN(Generic, "generic", "Emit generic NEON assembly"), clEnumValN(Apple, "apple", "Emit Apple-style NEON assembly"), clEnumValEnd)); -ARM64MCAsmInfoDarwin::ARM64MCAsmInfoDarwin() { +AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() { // We prefer NEON instructions to be printed in the short form. AssemblerDialect = AsmWriterVariant == Default ? 1 : AsmWriterVariant; @@ -49,7 +49,7 @@ ARM64MCAsmInfoDarwin::ARM64MCAsmInfoDarwin() { ExceptionsType = ExceptionHandling::DwarfCFI; } -const MCExpr *ARM64MCAsmInfoDarwin::getExprForPersonalitySymbol( +const MCExpr *AArch64MCAsmInfoDarwin::getExprForPersonalitySymbol( const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const { // On Darwin, we can reference dwarf symbols with foo@GOT-., which // is an indirect pc-relative reference. The default implementation @@ -64,9 +64,9 @@ const MCExpr *ARM64MCAsmInfoDarwin::getExprForPersonalitySymbol( return MCBinaryExpr::CreateSub(Res, PC, Context); } -ARM64MCAsmInfoELF::ARM64MCAsmInfoELF(StringRef TT) { +AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(StringRef TT) { Triple T(TT); - if (T.getArch() == Triple::arm64_be) + if (T.getArch() == Triple::arm64_be || T.getArch() == Triple::aarch64_be) IsLittleEndian = false; // We prefer NEON instructions to be printed in the short form. diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h similarity index 61% rename from lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.h rename to lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h index 324bc39560f0..42a031d7c2ca 100644 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h @@ -1,4 +1,4 @@ -//=====-- ARM64MCAsmInfo.h - ARM64 asm properties -----------*- C++ -*--====// +//=====-- AArch64MCAsmInfo.h - AArch64 asm properties ---------*- C++ -*--====// // // The LLVM Compiler Infrastructure // @@ -7,12 +7,12 @@ // //===----------------------------------------------------------------------===// // -// This file contains the declaration of the ARM64MCAsmInfo class. +// This file contains the declaration of the AArch64MCAsmInfo class. // //===----------------------------------------------------------------------===// -#ifndef ARM64TARGETASMINFO_H -#define ARM64TARGETASMINFO_H +#ifndef AArch64TARGETASMINFO_H +#define AArch64TARGETASMINFO_H #include "llvm/MC/MCAsmInfoDarwin.h" @@ -20,15 +20,15 @@ namespace llvm { class Target; class StringRef; class MCStreamer; -struct ARM64MCAsmInfoDarwin : public MCAsmInfoDarwin { - explicit ARM64MCAsmInfoDarwin(); +struct AArch64MCAsmInfoDarwin : public MCAsmInfoDarwin { + explicit AArch64MCAsmInfoDarwin(); const MCExpr * getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const override; }; -struct ARM64MCAsmInfoELF : public MCAsmInfo { - explicit ARM64MCAsmInfoELF(StringRef TT); +struct AArch64MCAsmInfoELF : public MCAsmInfo { + explicit AArch64MCAsmInfoELF(StringRef TT); }; } // namespace llvm diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp similarity index 73% rename from lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp rename to lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index 0db08f422e41..464a18cdbc04 100644 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -1,4 +1,4 @@ -//===-- ARM64/ARM64MCCodeEmitter.cpp - Convert ARM64 code to machine code -===// +//=- AArch64/AArch64MCCodeEmitter.cpp - Convert AArch64 code to machine code-=// // // The LLVM Compiler Infrastructure // @@ -7,14 +7,14 @@ // //===----------------------------------------------------------------------===// // -// This file implements the ARM64MCCodeEmitter class. +// This file implements the AArch64MCCodeEmitter class. // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "MCTargetDesc/ARM64FixupKinds.h" -#include "MCTargetDesc/ARM64MCExpr.h" -#include "Utils/ARM64BaseInfo.h" +#include "MCTargetDesc/AArch64AddressingModes.h" +#include "MCTargetDesc/AArch64FixupKinds.h" +#include "MCTargetDesc/AArch64MCExpr.h" +#include "Utils/AArch64BaseInfo.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" @@ -32,17 +32,17 @@ STATISTIC(MCNumFixups, "Number of MC fixups created."); namespace { -class ARM64MCCodeEmitter : public MCCodeEmitter { +class AArch64MCCodeEmitter : public MCCodeEmitter { MCContext &Ctx; - ARM64MCCodeEmitter(const ARM64MCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const ARM64MCCodeEmitter &); // DO NOT IMPLEMENT + AArch64MCCodeEmitter(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT + void operator=(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT public: - ARM64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, + AArch64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, MCContext &ctx) : Ctx(ctx) {} - ~ARM64MCCodeEmitter() {} + ~AArch64MCCodeEmitter() {} // getBinaryCodeForInstr - TableGen'erated function for getting the // binary encoding for an instruction. @@ -203,19 +203,19 @@ class ARM64MCCodeEmitter : public MCCodeEmitter { } // end anonymous namespace -MCCodeEmitter *llvm::createARM64MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, - MCContext &Ctx) { - return new ARM64MCCodeEmitter(MCII, STI, Ctx); +MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new AArch64MCCodeEmitter(MCII, STI, Ctx); } /// getMachineOpValue - Return binary encoding of operand. If the machine /// operand requires relocation, record the relocation and return zero. unsigned -ARM64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { if (MO.isReg()) return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()); else { @@ -228,9 +228,9 @@ ARM64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, } template uint32_t -ARM64MCCodeEmitter::getLdStUImm12OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +AArch64MCCodeEmitter::getLdStUImm12OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); uint32_t ImmVal = 0; @@ -249,9 +249,9 @@ ARM64MCCodeEmitter::getLdStUImm12OpValue(const MCInst &MI, unsigned OpIdx, /// getAdrLabelOpValue - Return encoding info for 21-bit immediate ADR label /// target. uint32_t -ARM64MCCodeEmitter::getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +AArch64MCCodeEmitter::getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); // If the destination is an immediate, we have nothing to do. @@ -260,9 +260,9 @@ ARM64MCCodeEmitter::getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, assert(MO.isExpr() && "Unexpected target type!"); const MCExpr *Expr = MO.getExpr(); - MCFixupKind Kind = MI.getOpcode() == ARM64::ADR - ? MCFixupKind(ARM64::fixup_arm64_pcrel_adr_imm21) - : MCFixupKind(ARM64::fixup_arm64_pcrel_adrp_imm21); + MCFixupKind Kind = MI.getOpcode() == AArch64::ADR + ? MCFixupKind(AArch64::fixup_aarch64_pcrel_adr_imm21) + : MCFixupKind(AArch64::fixup_aarch64_pcrel_adrp_imm21); Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); MCNumFixups += 1; @@ -275,15 +275,15 @@ ARM64MCCodeEmitter::getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, /// the 2-bit shift field. The shift field is stored in bits 13-14 of the /// return value. uint32_t -ARM64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +AArch64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { // Suboperands are [imm, shifter]. const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx + 1); - assert(ARM64_AM::getShiftType(MO1.getImm()) == ARM64_AM::LSL && + assert(AArch64_AM::getShiftType(MO1.getImm()) == AArch64_AM::LSL && "unexpected shift type for add/sub immediate"); - unsigned ShiftVal = ARM64_AM::getShiftValue(MO1.getImm()); + unsigned ShiftVal = AArch64_AM::getShiftValue(MO1.getImm()); assert((ShiftVal == 0 || ShiftVal == 12) && "unexpected shift value for add/sub immediate"); if (MO.isImm()) @@ -292,7 +292,7 @@ ARM64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx, const MCExpr *Expr = MO.getExpr(); // Encode the 12 bits of the fixup. - MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_add_imm12); + MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_add_imm12); Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); ++MCNumFixups; @@ -302,7 +302,7 @@ ARM64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx, /// getCondBranchTargetOpValue - Return the encoded value for a conditional /// branch target. -uint32_t ARM64MCCodeEmitter::getCondBranchTargetOpValue( +uint32_t AArch64MCCodeEmitter::getCondBranchTargetOpValue( const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); @@ -312,7 +312,7 @@ uint32_t ARM64MCCodeEmitter::getCondBranchTargetOpValue( return MO.getImm(); assert(MO.isExpr() && "Unexpected target type!"); - MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_pcrel_branch19); + MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_pcrel_branch19); Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc())); ++MCNumFixups; @@ -324,9 +324,9 @@ uint32_t ARM64MCCodeEmitter::getCondBranchTargetOpValue( /// getLoadLiteralOpValue - Return the encoded value for a load-literal /// pc-relative address. uint32_t -ARM64MCCodeEmitter::getLoadLiteralOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +AArch64MCCodeEmitter::getLoadLiteralOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); // If the destination is an immediate, we have nothing to do. @@ -334,7 +334,7 @@ ARM64MCCodeEmitter::getLoadLiteralOpValue(const MCInst &MI, unsigned OpIdx, return MO.getImm(); assert(MO.isExpr() && "Unexpected target type!"); - MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_ldr_pcrel_imm19); + MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_ldr_pcrel_imm19); Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc())); ++MCNumFixups; @@ -344,18 +344,18 @@ ARM64MCCodeEmitter::getLoadLiteralOpValue(const MCInst &MI, unsigned OpIdx, } uint32_t -ARM64MCCodeEmitter::getMemExtendOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +AArch64MCCodeEmitter::getMemExtendOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { unsigned SignExtend = MI.getOperand(OpIdx).getImm(); unsigned DoShift = MI.getOperand(OpIdx + 1).getImm(); return (SignExtend << 1) | DoShift; } uint32_t -ARM64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); if (MO.isImm()) @@ -363,7 +363,7 @@ ARM64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, assert(MO.isExpr() && "Unexpected movz/movk immediate"); Fixups.push_back(MCFixup::Create( - 0, MO.getExpr(), MCFixupKind(ARM64::fixup_arm64_movw), MI.getLoc())); + 0, MO.getExpr(), MCFixupKind(AArch64::fixup_aarch64_movw), MI.getLoc())); ++MCNumFixups; @@ -372,7 +372,7 @@ ARM64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, /// getTestBranchTargetOpValue - Return the encoded value for a test-bit-and- /// branch target. -uint32_t ARM64MCCodeEmitter::getTestBranchTargetOpValue( +uint32_t AArch64MCCodeEmitter::getTestBranchTargetOpValue( const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); @@ -382,7 +382,7 @@ uint32_t ARM64MCCodeEmitter::getTestBranchTargetOpValue( return MO.getImm(); assert(MO.isExpr() && "Unexpected ADR target type!"); - MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_pcrel_branch14); + MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_pcrel_branch14); Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc())); ++MCNumFixups; @@ -394,9 +394,9 @@ uint32_t ARM64MCCodeEmitter::getTestBranchTargetOpValue( /// getBranchTargetOpValue - Return the encoded value for an unconditional /// branch target. uint32_t -ARM64MCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +AArch64MCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); // If the destination is an immediate, we have nothing to do. @@ -404,9 +404,9 @@ ARM64MCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, return MO.getImm(); assert(MO.isExpr() && "Unexpected ADR target type!"); - MCFixupKind Kind = MI.getOpcode() == ARM64::BL - ? MCFixupKind(ARM64::fixup_arm64_pcrel_call26) - : MCFixupKind(ARM64::fixup_arm64_pcrel_branch26); + MCFixupKind Kind = MI.getOpcode() == AArch64::BL + ? MCFixupKind(AArch64::fixup_aarch64_pcrel_call26) + : MCFixupKind(AArch64::fixup_aarch64_pcrel_branch26); Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc())); ++MCNumFixups; @@ -422,9 +422,9 @@ ARM64MCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, /// 10 -> 16 /// 11 -> 24 uint32_t -ARM64MCCodeEmitter::getVecShifterOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +AArch64MCCodeEmitter::getVecShifterOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); assert(MO.isImm() && "Expected an immediate value for the shift amount!"); @@ -446,36 +446,35 @@ ARM64MCCodeEmitter::getVecShifterOpValue(const MCInst &MI, unsigned OpIdx, } uint32_t -ARM64MCCodeEmitter::getSIMDShift64OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +AArch64MCCodeEmitter::getSIMDShift64OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); assert(MO.isImm() && "Expected an immediate value for the shift amount!"); return 64 - (MO.getImm()); } -uint32_t -ARM64MCCodeEmitter::getSIMDShift64_32OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +uint32_t AArch64MCCodeEmitter::getSIMDShift64_32OpValue( + const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); assert(MO.isImm() && "Expected an immediate value for the shift amount!"); return 64 - (MO.getImm() | 32); } uint32_t -ARM64MCCodeEmitter::getSIMDShift32OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +AArch64MCCodeEmitter::getSIMDShift32OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); assert(MO.isImm() && "Expected an immediate value for the shift amount!"); return 32 - (MO.getImm() | 16); } uint32_t -ARM64MCCodeEmitter::getSIMDShift16OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +AArch64MCCodeEmitter::getSIMDShift16OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); assert(MO.isImm() && "Expected an immediate value for the shift amount!"); return 16 - (MO.getImm() | 8); @@ -483,7 +482,7 @@ ARM64MCCodeEmitter::getSIMDShift16OpValue(const MCInst &MI, unsigned OpIdx, /// getFixedPointScaleOpValue - Return the encoded value for the // FP-to-fixed-point scale factor. -uint32_t ARM64MCCodeEmitter::getFixedPointScaleOpValue( +uint32_t AArch64MCCodeEmitter::getFixedPointScaleOpValue( const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); @@ -492,72 +491,72 @@ uint32_t ARM64MCCodeEmitter::getFixedPointScaleOpValue( } uint32_t -ARM64MCCodeEmitter::getVecShiftR64OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +AArch64MCCodeEmitter::getVecShiftR64OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); assert(MO.isImm() && "Expected an immediate value for the scale amount!"); return 64 - MO.getImm(); } uint32_t -ARM64MCCodeEmitter::getVecShiftR32OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +AArch64MCCodeEmitter::getVecShiftR32OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); assert(MO.isImm() && "Expected an immediate value for the scale amount!"); return 32 - MO.getImm(); } uint32_t -ARM64MCCodeEmitter::getVecShiftR16OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +AArch64MCCodeEmitter::getVecShiftR16OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); assert(MO.isImm() && "Expected an immediate value for the scale amount!"); return 16 - MO.getImm(); } uint32_t -ARM64MCCodeEmitter::getVecShiftR8OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +AArch64MCCodeEmitter::getVecShiftR8OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); assert(MO.isImm() && "Expected an immediate value for the scale amount!"); return 8 - MO.getImm(); } uint32_t -ARM64MCCodeEmitter::getVecShiftL64OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +AArch64MCCodeEmitter::getVecShiftL64OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); assert(MO.isImm() && "Expected an immediate value for the scale amount!"); return MO.getImm() - 64; } uint32_t -ARM64MCCodeEmitter::getVecShiftL32OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +AArch64MCCodeEmitter::getVecShiftL32OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); assert(MO.isImm() && "Expected an immediate value for the scale amount!"); return MO.getImm() - 32; } uint32_t -ARM64MCCodeEmitter::getVecShiftL16OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +AArch64MCCodeEmitter::getVecShiftL16OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); assert(MO.isImm() && "Expected an immediate value for the scale amount!"); return MO.getImm() - 16; } uint32_t -ARM64MCCodeEmitter::getVecShiftL8OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +AArch64MCCodeEmitter::getVecShiftL8OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); assert(MO.isImm() && "Expected an immediate value for the scale amount!"); return MO.getImm() - 8; @@ -565,20 +564,19 @@ ARM64MCCodeEmitter::getVecShiftL8OpValue(const MCInst &MI, unsigned OpIdx, /// getMoveVecShifterOpValue - Return the encoded value for the vector move /// shifter (MSL). -uint32_t -ARM64MCCodeEmitter::getMoveVecShifterOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +uint32_t AArch64MCCodeEmitter::getMoveVecShifterOpValue( + const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); assert(MO.isImm() && "Expected an immediate value for the move shift amount!"); - unsigned ShiftVal = ARM64_AM::getShiftValue(MO.getImm()); + unsigned ShiftVal = AArch64_AM::getShiftValue(MO.getImm()); assert((ShiftVal == 8 || ShiftVal == 16) && "Invalid shift amount!"); return ShiftVal == 8 ? 0 : 1; } -unsigned ARM64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue, - const MCSubtargetInfo &STI) const { +unsigned AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue, + const MCSubtargetInfo &STI) const { // If one of the signed fixup kinds is applied to a MOVZ instruction, the // eventual result could be either a MOVZ or a MOVN. It's the MCCodeEmitter's // job to ensure that any bits possibly affected by this are 0. This means we @@ -589,15 +587,15 @@ unsigned ARM64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue, if (UImm16MO.isImm()) return EncodedValue; - const ARM64MCExpr *A64E = cast(UImm16MO.getExpr()); + const AArch64MCExpr *A64E = cast(UImm16MO.getExpr()); switch (A64E->getKind()) { - case ARM64MCExpr::VK_DTPREL_G2: - case ARM64MCExpr::VK_DTPREL_G1: - case ARM64MCExpr::VK_DTPREL_G0: - case ARM64MCExpr::VK_GOTTPREL_G1: - case ARM64MCExpr::VK_TPREL_G2: - case ARM64MCExpr::VK_TPREL_G1: - case ARM64MCExpr::VK_TPREL_G0: + case AArch64MCExpr::VK_DTPREL_G2: + case AArch64MCExpr::VK_DTPREL_G1: + case AArch64MCExpr::VK_DTPREL_G0: + case AArch64MCExpr::VK_GOTTPREL_G1: + case AArch64MCExpr::VK_TPREL_G2: + case AArch64MCExpr::VK_TPREL_G1: + case AArch64MCExpr::VK_TPREL_G0: return EncodedValue & ~(1u << 30); default: // Nothing to do for an unsigned fixup. @@ -608,14 +606,14 @@ unsigned ARM64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue, return EncodedValue & ~(1u << 30); } -void ARM64MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - if (MI.getOpcode() == ARM64::TLSDESCCALL) { +void AArch64MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + if (MI.getOpcode() == AArch64::TLSDESCCALL) { // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the // following (BLR) instruction. It doesn't emit any code itself so it // doesn't go through the normal TableGenerated channels. - MCFixupKind Fixup = MCFixupKind(ARM64::fixup_arm64_tlsdesc_call); + MCFixupKind Fixup = MCFixupKind(AArch64::fixup_aarch64_tlsdesc_call); Fixups.push_back(MCFixup::Create(0, MI.getOperand(0).getExpr(), Fixup)); return; } @@ -626,9 +624,9 @@ void ARM64MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, } unsigned -ARM64MCCodeEmitter::fixMulHigh(const MCInst &MI, - unsigned EncodedValue, - const MCSubtargetInfo &STI) const { +AArch64MCCodeEmitter::fixMulHigh(const MCInst &MI, + unsigned EncodedValue, + const MCSubtargetInfo &STI) const { // The Ra field of SMULH and UMULH is unused: it should be assembled as 31 // (i.e. all bits 1) but is ignored by the processor. EncodedValue |= 0x1f << 10; @@ -636,23 +634,21 @@ ARM64MCCodeEmitter::fixMulHigh(const MCInst &MI, } template unsigned -ARM64MCCodeEmitter::fixLoadStoreExclusive(const MCInst &MI, - unsigned EncodedValue, - const MCSubtargetInfo &STI) const { +AArch64MCCodeEmitter::fixLoadStoreExclusive(const MCInst &MI, + unsigned EncodedValue, + const MCSubtargetInfo &STI) const { if (!hasRs) EncodedValue |= 0x001F0000; if (!hasRt2) EncodedValue |= 0x00007C00; return EncodedValue; } -unsigned -ARM64MCCodeEmitter::fixOneOperandFPComparison(const MCInst &MI, - unsigned EncodedValue, - const MCSubtargetInfo &STI) const { +unsigned AArch64MCCodeEmitter::fixOneOperandFPComparison( + const MCInst &MI, unsigned EncodedValue, const MCSubtargetInfo &STI) const { // The Rm field of FCMP and friends is unused - it should be assembled // as 0, but is ignored by the processor. EncodedValue &= ~(0x1f << 16); return EncodedValue; } -#include "ARM64GenMCCodeEmitter.inc" +#include "AArch64GenMCCodeEmitter.inc" diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp similarity index 89% rename from lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp rename to lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp index efa820b097f1..85c3ec7a55f1 100644 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp @@ -1,4 +1,4 @@ -//===-- ARM64MCExpr.cpp - ARM64 specific MC expression classes --------===// +//===-- AArch64MCExpr.cpp - AArch64 specific MC expression classes --------===// // // The LLVM Compiler Infrastructure // @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#include "ARM64MCExpr.h" +#include "AArch64MCExpr.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELF.h" @@ -25,12 +25,12 @@ using namespace llvm; #define DEBUG_TYPE "aarch64symbolrefexpr" -const ARM64MCExpr *ARM64MCExpr::Create(const MCExpr *Expr, VariantKind Kind, +const AArch64MCExpr *AArch64MCExpr::Create(const MCExpr *Expr, VariantKind Kind, MCContext &Ctx) { - return new (Ctx) ARM64MCExpr(Expr, Kind); + return new (Ctx) AArch64MCExpr(Expr, Kind); } -StringRef ARM64MCExpr::getVariantKindName() const { +StringRef AArch64MCExpr::getVariantKindName() const { switch (static_cast(getKind())) { case VK_CALL: return ""; case VK_LO12: return ":lo12:"; @@ -75,7 +75,7 @@ StringRef ARM64MCExpr::getVariantKindName() const { } } -void ARM64MCExpr::PrintImpl(raw_ostream &OS) const { +void AArch64MCExpr::PrintImpl(raw_ostream &OS) const { if (getKind() != VK_NONE) OS << getVariantKindName(); OS << *Expr; @@ -110,15 +110,15 @@ static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) { } } -void ARM64MCExpr::AddValueSymbols(MCAssembler *Asm) const { +void AArch64MCExpr::AddValueSymbols(MCAssembler *Asm) const { AddValueSymbolsImpl(getSubExpr(), Asm); } -const MCSection *ARM64MCExpr::FindAssociatedSection() const { +const MCSection *AArch64MCExpr::FindAssociatedSection() const { llvm_unreachable("FIXME: what goes here?"); } -bool ARM64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, +bool AArch64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout) const { if (!getSubExpr()->EvaluateAsRelocatable(Res, Layout)) return false; @@ -159,7 +159,7 @@ static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { } } -void ARM64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { +void AArch64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { switch (getSymbolLoc(Kind)) { default: return; diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h similarity index 92% rename from lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h rename to lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h index d8325465178e..e869ed0a26a4 100644 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h @@ -1,4 +1,4 @@ -//=---- ARM64MCExpr.h - ARM64 specific MC expression classes ------*- C++ -*-=// +//=--- AArch64MCExpr.h - AArch64 specific MC expression classes ---*- C++ -*-=// // // The LLVM Compiler Infrastructure // @@ -7,20 +7,20 @@ // //===----------------------------------------------------------------------===// // -// This file describes ARM64-specific MCExprs, used for modifiers like +// This file describes AArch64-specific MCExprs, used for modifiers like // ":lo12:" or ":gottprel_g1:". // //===----------------------------------------------------------------------===// -#ifndef LLVM_ARM64MCEXPR_H -#define LLVM_ARM64MCEXPR_H +#ifndef LLVM_AArch64MCEXPR_H +#define LLVM_AArch64MCEXPR_H #include "llvm/MC/MCExpr.h" #include "llvm/Support/ErrorHandling.h" namespace llvm { -class ARM64MCExpr : public MCTargetExpr { +class AArch64MCExpr : public MCTargetExpr { public: enum VariantKind { VK_NONE = 0x000, @@ -105,14 +105,14 @@ class ARM64MCExpr : public MCTargetExpr { const MCExpr *Expr; const VariantKind Kind; - explicit ARM64MCExpr(const MCExpr *Expr, VariantKind Kind) + explicit AArch64MCExpr(const MCExpr *Expr, VariantKind Kind) : Expr(Expr), Kind(Kind) {} public: /// @name Construction /// @{ - static const ARM64MCExpr *Create(const MCExpr *Expr, VariantKind Kind, + static const AArch64MCExpr *Create(const MCExpr *Expr, VariantKind Kind, MCContext &Ctx); /// @} @@ -160,7 +160,7 @@ class ARM64MCExpr : public MCTargetExpr { return E->getKind() == MCExpr::Target; } - static bool classof(const ARM64MCExpr *) { return true; } + static bool classof(const AArch64MCExpr *) { return true; } }; } // end namespace llvm diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp new file mode 100644 index 000000000000..ae698c59f6ce --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -0,0 +1,225 @@ +//===-- AArch64MCTargetDesc.cpp - AArch64 Target Descriptions ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides AArch64 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "AArch64MCTargetDesc.h" +#include "AArch64ELFStreamer.h" +#include "AArch64MCAsmInfo.h" +#include "InstPrinter/AArch64InstPrinter.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +#define GET_INSTRINFO_MC_DESC +#include "AArch64GenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "AArch64GenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "AArch64GenRegisterInfo.inc" + +static MCInstrInfo *createAArch64MCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitAArch64MCInstrInfo(X); + return X; +} + +static MCSubtargetInfo * +createAArch64MCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + + if (CPU.empty()) + CPU = "generic"; + + InitAArch64MCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitAArch64MCRegisterInfo(X, AArch64::LR); + return X; +} + +static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI, + StringRef TT) { + Triple TheTriple(TT); + + MCAsmInfo *MAI; + if (TheTriple.isOSDarwin()) + MAI = new AArch64MCAsmInfoDarwin(); + else { + assert(TheTriple.isOSBinFormatELF() && "Only expect Darwin or ELF"); + MAI = new AArch64MCAsmInfoELF(TT); + } + + // Initial state of the frame pointer is SP. + unsigned Reg = MRI.getDwarfRegNum(AArch64::SP, true); + MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 0); + MAI->addInitialFrameState(Inst); + + return MAI; +} + +static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { + Triple TheTriple(TT); + assert((TheTriple.isOSBinFormatELF() || TheTriple.isOSBinFormatMachO()) && + "Only expect Darwin and ELF targets"); + + if (CM == CodeModel::Default) + CM = CodeModel::Small; + // The default MCJIT memory managers make no guarantees about where they can + // find an executable page; JITed code needs to be able to refer to globals + // no matter how far away they are. + else if (CM == CodeModel::JITDefault) + CM = CodeModel::Large; + else if (CM != CodeModel::Small && CM != CodeModel::Large) + report_fatal_error( + "Only small and large code models are allowed on AArch64"); + + // AArch64 Darwin is always PIC. + if (TheTriple.isOSDarwin()) + RM = Reloc::PIC_; + // On ELF platforms the default static relocation model has a smart enough + // linker to cope with referencing external symbols defined in a shared + // library. Hence DynamicNoPIC doesn't need to be promoted to PIC. + else if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC) + RM = Reloc::Static; + + MCCodeGenInfo *X = new MCCodeGenInfo(); + X->InitMCCodeGenInfo(RM, CM, OL); + return X; +} + +static MCInstPrinter *createAArch64MCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI) { + if (SyntaxVariant == 0) + return new AArch64InstPrinter(MAI, MII, MRI, STI); + if (SyntaxVariant == 1) + return new AArch64AppleInstPrinter(MAI, MII, MRI, STI); + + return nullptr; +} + +static MCStreamer *createMCStreamer(const Target &T, StringRef TT, + MCContext &Ctx, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + const MCSubtargetInfo &STI, bool RelaxAll, + bool NoExecStack) { + Triple TheTriple(TT); + + if (TheTriple.isOSDarwin()) + return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll, + /*LabelSections*/ true); + + return createAArch64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack); +} + +// Force static initialization. +extern "C" void LLVMInitializeAArch64TargetMC() { + // Register the MC asm info. + RegisterMCAsmInfoFn X(TheAArch64leTarget, createAArch64MCAsmInfo); + RegisterMCAsmInfoFn Y(TheAArch64beTarget, createAArch64MCAsmInfo); + RegisterMCAsmInfoFn Z(TheARM64leTarget, createAArch64MCAsmInfo); + RegisterMCAsmInfoFn W(TheARM64beTarget, createAArch64MCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheAArch64leTarget, + createAArch64MCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheAArch64beTarget, + createAArch64MCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheARM64leTarget, + createAArch64MCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheARM64beTarget, + createAArch64MCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheAArch64leTarget, + createAArch64MCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheAArch64beTarget, + createAArch64MCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheARM64leTarget, + createAArch64MCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheARM64beTarget, + createAArch64MCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheAArch64leTarget, + createAArch64MCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheAArch64beTarget, + createAArch64MCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheARM64leTarget, + createAArch64MCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheARM64beTarget, + createAArch64MCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheAArch64leTarget, + createAArch64MCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(TheAArch64beTarget, + createAArch64MCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(TheARM64leTarget, + createAArch64MCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(TheARM64beTarget, + createAArch64MCSubtargetInfo); + + // Register the asm backend. + TargetRegistry::RegisterMCAsmBackend(TheAArch64leTarget, + createAArch64leAsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheAArch64beTarget, + createAArch64beAsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheARM64leTarget, + createAArch64leAsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheARM64beTarget, + createAArch64beAsmBackend); + + // Register the MC Code Emitter + TargetRegistry::RegisterMCCodeEmitter(TheAArch64leTarget, + createAArch64MCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheAArch64beTarget, + createAArch64MCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheARM64leTarget, + createAArch64MCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheARM64beTarget, + createAArch64MCCodeEmitter); + + // Register the object streamer. + TargetRegistry::RegisterMCObjectStreamer(TheAArch64leTarget, + createMCStreamer); + TargetRegistry::RegisterMCObjectStreamer(TheAArch64beTarget, + createMCStreamer); + TargetRegistry::RegisterMCObjectStreamer(TheARM64leTarget, createMCStreamer); + TargetRegistry::RegisterMCObjectStreamer(TheARM64beTarget, createMCStreamer); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(TheAArch64leTarget, + createAArch64MCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheAArch64beTarget, + createAArch64MCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheARM64leTarget, + createAArch64MCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheARM64beTarget, + createAArch64MCInstPrinter); +} diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h similarity index 51% rename from lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h rename to lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h index f2e9c17a3789..d886ea23c13e 100644 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h @@ -1,4 +1,4 @@ -//===-- ARM64MCTargetDesc.h - ARM64 Target Descriptions ---------*- C++ -*-===// +//===-- AArch64MCTargetDesc.h - AArch64 Target Descriptions -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,12 +7,12 @@ // //===----------------------------------------------------------------------===// // -// This file provides ARM64 specific target descriptions. +// This file provides AArch64 specific target descriptions. // //===----------------------------------------------------------------------===// -#ifndef ARM64MCTARGETDESC_H -#define ARM64MCTARGETDESC_H +#ifndef AArch64MCTARGETDESC_H +#define AArch64MCTARGETDESC_H #include "llvm/Support/DataTypes.h" #include @@ -29,40 +29,42 @@ class StringRef; class Target; class raw_ostream; -extern Target TheARM64leTarget; -extern Target TheARM64beTarget; extern Target TheAArch64leTarget; extern Target TheAArch64beTarget; +extern Target TheARM64leTarget; +extern Target TheARM64beTarget; -MCCodeEmitter *createARM64MCCodeEmitter(const MCInstrInfo &MCII, +MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI, MCContext &Ctx); -MCAsmBackend *createARM64leAsmBackend(const Target &T, const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU); -MCAsmBackend *createARM64beAsmBackend(const Target &T, const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU); +MCAsmBackend *createAArch64leAsmBackend(const Target &T, + const MCRegisterInfo &MRI, StringRef TT, + StringRef CPU); +MCAsmBackend *createAArch64beAsmBackend(const Target &T, + const MCRegisterInfo &MRI, StringRef TT, + StringRef CPU); - MCObjectWriter *createARM64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI, - bool IsLittleEndian); +MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI, + bool IsLittleEndian); -MCObjectWriter *createARM64MachObjectWriter(raw_ostream &OS, uint32_t CPUType, +MCObjectWriter *createAArch64MachObjectWriter(raw_ostream &OS, uint32_t CPUType, uint32_t CPUSubtype); } // End llvm namespace -// Defines symbolic names for ARM64 registers. This defines a mapping from +// Defines symbolic names for AArch64 registers. This defines a mapping from // register name to register number. // #define GET_REGINFO_ENUM -#include "ARM64GenRegisterInfo.inc" +#include "AArch64GenRegisterInfo.inc" -// Defines symbolic names for the ARM64 instructions. +// Defines symbolic names for the AArch64 instructions. // #define GET_INSTRINFO_ENUM -#include "ARM64GenInstrInfo.inc" +#include "AArch64GenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM -#include "ARM64GenSubtargetInfo.inc" +#include "AArch64GenSubtargetInfo.inc" #endif diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp similarity index 88% rename from lib/Target/ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp rename to lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp index 1c48159bbe95..5c86189a6ef5 100644 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp @@ -1,4 +1,4 @@ -//===-- ARMMachObjectWriter.cpp - ARM Mach Object Writer ------------------===// +//===-- AArch64MachObjectWriter.cpp - ARM Mach Object Writer --------------===// // // The LLVM Compiler Infrastructure // @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/ARM64FixupKinds.h" -#include "MCTargetDesc/ARM64MCTargetDesc.h" +#include "MCTargetDesc/AArch64FixupKinds.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCContext.h" @@ -23,13 +23,13 @@ using namespace llvm; namespace { -class ARM64MachObjectWriter : public MCMachObjectTargetWriter { - bool getARM64FixupKindMachOInfo(const MCFixup &Fixup, unsigned &RelocType, +class AArch64MachObjectWriter : public MCMachObjectTargetWriter { + bool getAArch64FixupKindMachOInfo(const MCFixup &Fixup, unsigned &RelocType, const MCSymbolRefExpr *Sym, unsigned &Log2Size, const MCAssembler &Asm); public: - ARM64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype) + AArch64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype) : MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype, /*UseAggressiveSymbolFolding=*/true) {} @@ -40,7 +40,7 @@ class ARM64MachObjectWriter : public MCMachObjectTargetWriter { }; } -bool ARM64MachObjectWriter::getARM64FixupKindMachOInfo( +bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo( const MCFixup &Fixup, unsigned &RelocType, const MCSymbolRefExpr *Sym, unsigned &Log2Size, const MCAssembler &Asm) { RelocType = unsigned(MachO::ARM64_RELOC_UNSIGNED); @@ -66,12 +66,12 @@ bool ARM64MachObjectWriter::getARM64FixupKindMachOInfo( if (Sym->getKind() == MCSymbolRefExpr::VK_GOT) RelocType = unsigned(MachO::ARM64_RELOC_POINTER_TO_GOT); return true; - case ARM64::fixup_arm64_add_imm12: - case ARM64::fixup_arm64_ldst_imm12_scale1: - case ARM64::fixup_arm64_ldst_imm12_scale2: - case ARM64::fixup_arm64_ldst_imm12_scale4: - case ARM64::fixup_arm64_ldst_imm12_scale8: - case ARM64::fixup_arm64_ldst_imm12_scale16: + case AArch64::fixup_aarch64_add_imm12: + case AArch64::fixup_aarch64_ldst_imm12_scale1: + case AArch64::fixup_aarch64_ldst_imm12_scale2: + case AArch64::fixup_aarch64_ldst_imm12_scale4: + case AArch64::fixup_aarch64_ldst_imm12_scale8: + case AArch64::fixup_aarch64_ldst_imm12_scale16: Log2Size = llvm::Log2_32(4); switch (Sym->getKind()) { default: @@ -86,7 +86,7 @@ bool ARM64MachObjectWriter::getARM64FixupKindMachOInfo( RelocType = unsigned(MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12); return true; } - case ARM64::fixup_arm64_pcrel_adrp_imm21: + case AArch64::fixup_aarch64_pcrel_adrp_imm21: Log2Size = llvm::Log2_32(4); // This encompasses the relocation for the whole 21-bit value. switch (Sym->getKind()) { @@ -104,15 +104,15 @@ bool ARM64MachObjectWriter::getARM64FixupKindMachOInfo( return true; } return true; - case ARM64::fixup_arm64_pcrel_branch26: - case ARM64::fixup_arm64_pcrel_call26: + case AArch64::fixup_aarch64_pcrel_branch26: + case AArch64::fixup_aarch64_pcrel_call26: Log2Size = llvm::Log2_32(4); RelocType = unsigned(MachO::ARM64_RELOC_BRANCH26); return true; } } -void ARM64MachObjectWriter::RecordRelocation( +void AArch64MachObjectWriter::RecordRelocation( MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) { @@ -129,20 +129,20 @@ void ARM64MachObjectWriter::RecordRelocation( FixupOffset += Fixup.getOffset(); - // ARM64 pcrel relocation addends do not include the section offset. + // AArch64 pcrel relocation addends do not include the section offset. if (IsPCRel) FixedValue += FixupOffset; // ADRP fixups use relocations for the whole symbol value and only // put the addend in the instruction itself. Clear out any value the // generic code figured out from the sybmol definition. - if (Kind == ARM64::fixup_arm64_pcrel_adrp_imm21) + if (Kind == AArch64::fixup_aarch64_pcrel_adrp_imm21) FixedValue = 0; // imm19 relocations are for conditional branches, which require // assembler local symbols. If we got here, that's not what we have, // so complain loudly. - if (Kind == ARM64::fixup_arm64_pcrel_branch19) { + if (Kind == AArch64::fixup_aarch64_pcrel_branch19) { Asm.getContext().FatalError(Fixup.getLoc(), "conditional branch requires assembler-local" " label. '" + @@ -153,15 +153,15 @@ void ARM64MachObjectWriter::RecordRelocation( // 14-bit branch relocations should only target internal labels, and so // should never get here. - if (Kind == ARM64::fixup_arm64_pcrel_branch14) { + if (Kind == AArch64::fixup_aarch64_pcrel_branch14) { Asm.getContext().FatalError(Fixup.getLoc(), "Invalid relocation on conditional branch!"); return; } - if (!getARM64FixupKindMachOInfo(Fixup, Type, Target.getSymA(), Log2Size, + if (!getAArch64FixupKindMachOInfo(Fixup, Type, Target.getSymA(), Log2Size, Asm)) { - Asm.getContext().FatalError(Fixup.getLoc(), "unknown ARM64 fixup kind!"); + Asm.getContext().FatalError(Fixup.getLoc(), "unknown AArch64 fixup kind!"); return; } @@ -220,7 +220,7 @@ void ARM64MachObjectWriter::RecordRelocation( "unsupported pc-relative relocation of " "difference"); - // ARM64 always uses external relocations. If there is no symbol to use as + // AArch64 always uses external relocations. If there is no symbol to use as // a base address (a local symbol with no preceding non-local symbol), // error out. // @@ -305,9 +305,9 @@ void ARM64MachObjectWriter::RecordRelocation( Base = nullptr; } - // ARM64 uses external relocations as much as possible. For debug sections, - // and for pointer-sized relocations (.quad), we allow section relocations. - // It's code sections that run into trouble. + // AArch64 uses external relocations as much as possible. For debug + // sections, and for pointer-sized relocations (.quad), we allow section + // relocations. It's code sections that run into trouble. if (Base) { Index = Base->getIndex(); IsExtern = 1; @@ -387,9 +387,10 @@ void ARM64MachObjectWriter::RecordRelocation( Writer->addRelocation(Fragment->getParent(), MRE); } -MCObjectWriter *llvm::createARM64MachObjectWriter(raw_ostream &OS, +MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_ostream &OS, uint32_t CPUType, uint32_t CPUSubtype) { - return createMachObjectWriter(new ARM64MachObjectWriter(CPUType, CPUSubtype), - OS, /*IsLittleEndian=*/true); + return createMachObjectWriter( + new AArch64MachObjectWriter(CPUType, CPUSubtype), OS, + /*IsLittleEndian=*/true); } diff --git a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt new file mode 100644 index 000000000000..7d5bced17a6a --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,14 @@ +add_llvm_library(LLVMAArch64Desc + AArch64AsmBackend.cpp + AArch64ELFObjectWriter.cpp + AArch64ELFStreamer.cpp + AArch64MCAsmInfo.cpp + AArch64MCCodeEmitter.cpp + AArch64MCExpr.cpp + AArch64MCTargetDesc.cpp + AArch64MachObjectWriter.cpp +) +add_dependencies(LLVMAArch64Desc AArch64CommonTableGen) + +# Hack: we need to include 'main' target directory to grab private headers +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt similarity index 72% rename from lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt rename to lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt index e4c74d285d48..70cff0b704f7 100644 --- a/lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===; +;===- ./lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -17,8 +17,8 @@ [component_0] type = Library -name = ARM64Desc -parent = ARM64 -required_libraries = ARM64AsmPrinter ARM64Info MC Support -add_to_library_groups = ARM64 +name = AArch64Desc +parent = AArch64 +required_libraries = AArch64AsmPrinter AArch64Info MC Support +add_to_library_groups = AArch64 diff --git a/lib/Target/ARM64/MCTargetDesc/Makefile b/lib/Target/AArch64/MCTargetDesc/Makefile similarity index 82% rename from lib/Target/ARM64/MCTargetDesc/Makefile rename to lib/Target/AArch64/MCTargetDesc/Makefile index 013cc633f664..5779ac5ac60a 100644 --- a/lib/Target/ARM64/MCTargetDesc/Makefile +++ b/lib/Target/AArch64/MCTargetDesc/Makefile @@ -1,4 +1,4 @@ -##===- lib/Target/ARM64/TargetDesc/Makefile ----------------*- Makefile -*-===## +##===- lib/Target/AArch64/TargetDesc/Makefile --------------*- Makefile -*-===## # # The LLVM Compiler Infrastructure # @@ -8,7 +8,7 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. -LIBRARYNAME = LLVMARM64Desc +LIBRARYNAME = LLVMAArch64Desc # Hack: we need to include 'main' target directory to grab private headers CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/AArch64/Makefile b/lib/Target/AArch64/Makefile new file mode 100644 index 000000000000..f356c5850413 --- /dev/null +++ b/lib/Target/AArch64/Makefile @@ -0,0 +1,25 @@ +##===- lib/Target/AArch64/Makefile -------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMAArch64CodeGen +TARGET = AArch64 + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = AArch64GenRegisterInfo.inc AArch64GenInstrInfo.inc \ + AArch64GenAsmWriter.inc AArch64GenAsmWriter1.inc \ + AArch64GenDAGISel.inc \ + AArch64GenCallingConv.inc AArch64GenAsmMatcher.inc \ + AArch64GenSubtargetInfo.inc AArch64GenMCCodeEmitter.inc \ + AArch64GenFastISel.inc AArch64GenDisassemblerTables.inc \ + AArch64GenMCPseudoLowering.inc + +DIRS = TargetInfo InstPrinter AsmParser Disassembler MCTargetDesc Utils + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp similarity index 64% rename from lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp rename to lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp index 247566825ab3..3a382c165e7c 100644 --- a/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp +++ b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp @@ -1,4 +1,4 @@ -//===-- ARM64TargetInfo.cpp - ARM64 Target Implementation -----------------===// +//===-- AArch64TargetInfo.cpp - AArch64 Target Implementation -----------------===// // // The LLVM Compiler Infrastructure // @@ -12,20 +12,20 @@ using namespace llvm; namespace llvm { -Target TheARM64leTarget; -Target TheARM64beTarget; Target TheAArch64leTarget; Target TheAArch64beTarget; +Target TheARM64leTarget; +Target TheARM64beTarget; } // end namespace llvm -extern "C" void LLVMInitializeARM64TargetInfo() { +extern "C" void LLVMInitializeAArch64TargetInfo() { RegisterTarget X(TheARM64leTarget, "arm64", - "ARM64 (little endian)"); + "AArch64 (little endian)"); RegisterTarget Y(TheARM64beTarget, "arm64_be", - "ARM64 (big endian)"); + "AArch64 (big endian)"); RegisterTarget Z( - TheAArch64leTarget, "aarch64", "ARM64 (little endian)"); + TheAArch64leTarget, "aarch64", "AArch64 (little endian)"); RegisterTarget W( - TheAArch64beTarget, "aarch64_be", "ARM64 (big endian)"); + TheAArch64beTarget, "aarch64_be", "AArch64 (big endian)"); } diff --git a/lib/Target/AArch64/TargetInfo/CMakeLists.txt b/lib/Target/AArch64/TargetInfo/CMakeLists.txt new file mode 100644 index 000000000000..e236eed00be1 --- /dev/null +++ b/lib/Target/AArch64/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMAArch64Info + AArch64TargetInfo.cpp + ) + +add_dependencies(LLVMAArch64Info AArch64CommonTableGen) diff --git a/lib/Target/ARM64/Utils/LLVMBuild.txt b/lib/Target/AArch64/TargetInfo/LLVMBuild.txt similarity index 79% rename from lib/Target/ARM64/Utils/LLVMBuild.txt rename to lib/Target/AArch64/TargetInfo/LLVMBuild.txt index 232dca29f407..93c5407bb1f1 100644 --- a/lib/Target/ARM64/Utils/LLVMBuild.txt +++ b/lib/Target/AArch64/TargetInfo/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/ARM64/Utils/LLVMBuild.txt ----------------*- Conf -*--===; +;===- ./lib/Target/AArch64/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -17,7 +17,7 @@ [component_0] type = Library -name = ARM64Utils -parent = ARM64 +name = AArch64Info +parent = AArch64 required_libraries = Support -add_to_library_groups = ARM64 +add_to_library_groups = AArch64 diff --git a/lib/Target/ARM64/TargetInfo/Makefile b/lib/Target/AArch64/TargetInfo/Makefile similarity index 82% rename from lib/Target/ARM64/TargetInfo/Makefile rename to lib/Target/AArch64/TargetInfo/Makefile index 2d5a1a087a57..9dc9aa4bccf7 100644 --- a/lib/Target/ARM64/TargetInfo/Makefile +++ b/lib/Target/AArch64/TargetInfo/Makefile @@ -1,4 +1,4 @@ -##===- lib/Target/ARM64/TargetInfo/Makefile ----------------*- Makefile -*-===## +##===- lib/Target/AArch64/TargetInfo/Makefile --------------*- Makefile -*-===## # # The LLVM Compiler Infrastructure # @@ -7,7 +7,7 @@ # ##===----------------------------------------------------------------------===## LEVEL = ../../../.. -LIBRARYNAME = LLVMARM64Info +LIBRARYNAME = LLVMAArch64Info # Hack: we need to include 'main' target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/ARM64/Utils/ARM64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp similarity index 89% rename from lib/Target/ARM64/Utils/ARM64BaseInfo.cpp rename to lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index 5142d18c23cc..3c24bb30a26d 100644 --- a/lib/Target/ARM64/Utils/ARM64BaseInfo.cpp +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -1,4 +1,4 @@ -//===-- ARM64BaseInfo.cpp - ARM64 Base encoding information------------===// +//===-- AArch64BaseInfo.cpp - AArch64 Base encoding information------------===// // // The LLVM Compiler Infrastructure // @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// // -// This file provides basic encoding and assembly information for ARM64. +// This file provides basic encoding and assembly information for AArch64. // //===----------------------------------------------------------------------===// -#include "ARM64BaseInfo.h" +#include "AArch64BaseInfo.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -18,7 +18,7 @@ using namespace llvm; -StringRef ARM64NamedImmMapper::toString(uint32_t Value, bool &Valid) const { +StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const { for (unsigned i = 0; i < NumPairs; ++i) { if (Pairs[i].Value == Value) { Valid = true; @@ -30,7 +30,7 @@ StringRef ARM64NamedImmMapper::toString(uint32_t Value, bool &Valid) const { return StringRef(); } -uint32_t ARM64NamedImmMapper::fromString(StringRef Name, bool &Valid) const { +uint32_t AArch64NamedImmMapper::fromString(StringRef Name, bool &Valid) const { std::string LowerCaseName = Name.lower(); for (unsigned i = 0; i < NumPairs; ++i) { if (Pairs[i].Name == LowerCaseName) { @@ -43,11 +43,11 @@ uint32_t ARM64NamedImmMapper::fromString(StringRef Name, bool &Valid) const { return -1; } -bool ARM64NamedImmMapper::validImm(uint32_t Value) const { +bool AArch64NamedImmMapper::validImm(uint32_t Value) const { return Value < TooBigImm; } -const ARM64NamedImmMapper::Mapping ARM64AT::ATMapper::ATPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATPairs[] = { {"s1e1r", S1E1R}, {"s1e2r", S1E2R}, {"s1e3r", S1E3R}, @@ -62,10 +62,10 @@ const ARM64NamedImmMapper::Mapping ARM64AT::ATMapper::ATPairs[] = { {"s12e0w", S12E0W}, }; -ARM64AT::ATMapper::ATMapper() - : ARM64NamedImmMapper(ATPairs, 0) {} +AArch64AT::ATMapper::ATMapper() + : AArch64NamedImmMapper(ATPairs, 0) {} -const ARM64NamedImmMapper::Mapping ARM64DB::DBarrierMapper::DBarrierPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierPairs[] = { {"oshld", OSHLD}, {"oshst", OSHST}, {"osh", OSH}, @@ -80,10 +80,10 @@ const ARM64NamedImmMapper::Mapping ARM64DB::DBarrierMapper::DBarrierPairs[] = { {"sy", SY} }; -ARM64DB::DBarrierMapper::DBarrierMapper() - : ARM64NamedImmMapper(DBarrierPairs, 16u) {} +AArch64DB::DBarrierMapper::DBarrierMapper() + : AArch64NamedImmMapper(DBarrierPairs, 16u) {} -const ARM64NamedImmMapper::Mapping ARM64DC::DCMapper::DCPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCPairs[] = { {"zva", ZVA}, {"ivac", IVAC}, {"isw", ISW}, @@ -94,26 +94,26 @@ const ARM64NamedImmMapper::Mapping ARM64DC::DCMapper::DCPairs[] = { {"cisw", CISW} }; -ARM64DC::DCMapper::DCMapper() - : ARM64NamedImmMapper(DCPairs, 0) {} +AArch64DC::DCMapper::DCMapper() + : AArch64NamedImmMapper(DCPairs, 0) {} -const ARM64NamedImmMapper::Mapping ARM64IC::ICMapper::ICPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICPairs[] = { {"ialluis", IALLUIS}, {"iallu", IALLU}, {"ivau", IVAU} }; -ARM64IC::ICMapper::ICMapper() - : ARM64NamedImmMapper(ICPairs, 0) {} +AArch64IC::ICMapper::ICMapper() + : AArch64NamedImmMapper(ICPairs, 0) {} -const ARM64NamedImmMapper::Mapping ARM64ISB::ISBMapper::ISBPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBPairs[] = { {"sy", SY}, }; -ARM64ISB::ISBMapper::ISBMapper() - : ARM64NamedImmMapper(ISBPairs, 16) {} +AArch64ISB::ISBMapper::ISBMapper() + : AArch64NamedImmMapper(ISBPairs, 16) {} -const ARM64NamedImmMapper::Mapping ARM64PRFM::PRFMMapper::PRFMPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMPairs[] = { {"pldl1keep", PLDL1KEEP}, {"pldl1strm", PLDL1STRM}, {"pldl2keep", PLDL2KEEP}, @@ -134,19 +134,19 @@ const ARM64NamedImmMapper::Mapping ARM64PRFM::PRFMMapper::PRFMPairs[] = { {"pstl3strm", PSTL3STRM} }; -ARM64PRFM::PRFMMapper::PRFMMapper() - : ARM64NamedImmMapper(PRFMPairs, 32) {} +AArch64PRFM::PRFMMapper::PRFMMapper() + : AArch64NamedImmMapper(PRFMPairs, 32) {} -const ARM64NamedImmMapper::Mapping ARM64PState::PStateMapper::PStatePairs[] = { +const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStatePairs[] = { {"spsel", SPSel}, {"daifset", DAIFSet}, {"daifclr", DAIFClr} }; -ARM64PState::PStateMapper::PStateMapper() - : ARM64NamedImmMapper(PStatePairs, 0) {} +AArch64PState::PStateMapper::PStateMapper() + : AArch64NamedImmMapper(PStatePairs, 0) {} -const ARM64NamedImmMapper::Mapping ARM64SysReg::MRSMapper::MRSPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSPairs[] = { {"mdccsr_el0", MDCCSR_EL0}, {"dbgdtrrx_el0", DBGDTRRX_EL0}, {"mdrar_el1", MDRAR_EL1}, @@ -176,16 +176,16 @@ const ARM64NamedImmMapper::Mapping ARM64SysReg::MRSMapper::MRSPairs[] = { {"id_isar3_el1", ID_ISAR3_EL1}, {"id_isar4_el1", ID_ISAR4_EL1}, {"id_isar5_el1", ID_ISAR5_EL1}, - {"id_aa64pfr0_el1", ID_AARM64PFR0_EL1}, - {"id_aa64pfr1_el1", ID_AARM64PFR1_EL1}, - {"id_aa64dfr0_el1", ID_AARM64DFR0_EL1}, - {"id_aa64dfr1_el1", ID_AARM64DFR1_EL1}, - {"id_aa64afr0_el1", ID_AARM64AFR0_EL1}, - {"id_aa64afr1_el1", ID_AARM64AFR1_EL1}, - {"id_aa64isar0_el1", ID_AARM64ISAR0_EL1}, - {"id_aa64isar1_el1", ID_AARM64ISAR1_EL1}, - {"id_aa64mmfr0_el1", ID_AARM64MMFR0_EL1}, - {"id_aa64mmfr1_el1", ID_AARM64MMFR1_EL1}, + {"id_aa64pfr0_el1", ID_A64PFR0_EL1}, + {"id_aa64pfr1_el1", ID_A64PFR1_EL1}, + {"id_aa64dfr0_el1", ID_A64DFR0_EL1}, + {"id_aa64dfr1_el1", ID_A64DFR1_EL1}, + {"id_aa64afr0_el1", ID_A64AFR0_EL1}, + {"id_aa64afr1_el1", ID_A64AFR1_EL1}, + {"id_aa64isar0_el1", ID_A64ISAR0_EL1}, + {"id_aa64isar1_el1", ID_A64ISAR1_EL1}, + {"id_aa64mmfr0_el1", ID_A64MMFR0_EL1}, + {"id_aa64mmfr1_el1", ID_A64MMFR1_EL1}, {"mvfr0_el1", MVFR0_EL1}, {"mvfr1_el1", MVFR1_EL1}, {"mvfr2_el1", MVFR2_EL1}, @@ -245,13 +245,13 @@ const ARM64NamedImmMapper::Mapping ARM64SysReg::MRSMapper::MRSPairs[] = { {"ich_elsr_el2", ICH_ELSR_EL2} }; -ARM64SysReg::MRSMapper::MRSMapper(uint64_t FeatureBits) +AArch64SysReg::MRSMapper::MRSMapper(uint64_t FeatureBits) : SysRegMapper(FeatureBits) { InstPairs = &MRSPairs[0]; NumInstPairs = llvm::array_lengthof(MRSPairs); } -const ARM64NamedImmMapper::Mapping ARM64SysReg::MSRMapper::MSRPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRPairs[] = { {"dbgdtrtx_el0", DBGDTRTX_EL0}, {"oslar_el1", OSLAR_EL1}, {"pmswinc_el0", PMSWINC_EL0}, @@ -269,14 +269,14 @@ const ARM64NamedImmMapper::Mapping ARM64SysReg::MSRMapper::MSRPairs[] = { {"icc_sgi0r_el1", ICC_SGI0R_EL1} }; -ARM64SysReg::MSRMapper::MSRMapper(uint64_t FeatureBits) +AArch64SysReg::MSRMapper::MSRMapper(uint64_t FeatureBits) : SysRegMapper(FeatureBits) { InstPairs = &MSRPairs[0]; NumInstPairs = llvm::array_lengthof(MSRPairs); } -const ARM64NamedImmMapper::Mapping ARM64SysReg::SysRegMapper::SysRegPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegPairs[] = { {"osdtrrx_el1", OSDTRRX_EL1}, {"osdtrtx_el1", OSDTRTX_EL1}, {"teecr32_el1", TEECR32_EL1}, @@ -755,13 +755,13 @@ const ARM64NamedImmMapper::Mapping ARM64SysReg::SysRegMapper::SysRegPairs[] = { {"ich_lr15_el2", ICH_LR15_EL2} }; -const ARM64NamedImmMapper::Mapping -ARM64SysReg::SysRegMapper::CycloneSysRegPairs[] = { +const AArch64NamedImmMapper::Mapping +AArch64SysReg::SysRegMapper::CycloneSysRegPairs[] = { {"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3} }; uint32_t -ARM64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { +AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { std::string NameLower = Name.lower(); // First search the registers shared by all @@ -773,7 +773,7 @@ ARM64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { } // Next search for target specific registers - if (FeatureBits & ARM64::ProcCyclone) { + if (FeatureBits & AArch64::ProcCyclone) { for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) { if (CycloneSysRegPairs[i].Name == NameLower) { Valid = true; @@ -814,7 +814,7 @@ ARM64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { } std::string -ARM64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const { +AArch64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const { // First search the registers shared by all for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { if (SysRegPairs[i].Value == Bits) { @@ -824,7 +824,7 @@ ARM64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const { } // Next search for target specific registers - if (FeatureBits & ARM64::ProcCyclone) { + if (FeatureBits & AArch64::ProcCyclone) { for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) { if (CycloneSysRegPairs[i].Value == Bits) { Valid = true; @@ -862,7 +862,7 @@ ARM64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const { + "_c" + utostr(CRm) + "_" + utostr(Op2); } -const ARM64NamedImmMapper::Mapping ARM64TLBI::TLBIMapper::TLBIPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIPairs[] = { {"ipas2e1is", IPAS2E1IS}, {"ipas2le1is", IPAS2LE1IS}, {"vmalle1is", VMALLE1IS}, @@ -897,5 +897,5 @@ const ARM64NamedImmMapper::Mapping ARM64TLBI::TLBIMapper::TLBIPairs[] = { {"vaale1", VAALE1} }; -ARM64TLBI::TLBIMapper::TLBIMapper() - : ARM64NamedImmMapper(TLBIPairs, 0) {} +AArch64TLBI::TLBIMapper::TLBIMapper() + : AArch64NamedImmMapper(TLBIPairs, 0) {} diff --git a/lib/Target/ARM64/Utils/ARM64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h similarity index 84% rename from lib/Target/ARM64/Utils/ARM64BaseInfo.h rename to lib/Target/AArch64/Utils/AArch64BaseInfo.h index 8075d6b37c9a..9e4c389cc2e9 100644 --- a/lib/Target/ARM64/Utils/ARM64BaseInfo.h +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -1,4 +1,4 @@ -//===-- ARM64BaseInfo.h - Top level definitions for ARM64 -------*- C++ -*-===// +//===-- AArch64BaseInfo.h - Top level definitions for AArch64 ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -8,18 +8,18 @@ //===----------------------------------------------------------------------===// // // This file contains small standalone helper functions and enum definitions for -// the ARM64 target useful for the compiler back-end and the MC libraries. +// the AArch64 target useful for the compiler back-end and the MC libraries. // As such, it deliberately does not include references to LLVM core // code gen types, passes, etc.. // //===----------------------------------------------------------------------===// -#ifndef ARM64BASEINFO_H -#define ARM64BASEINFO_H +#ifndef AArch64BASEINFO_H +#define AArch64BASEINFO_H // FIXME: Is it easiest to fix this layering violation by moving the .inc -// #includes from ARM64MCTargetDesc.h to here? -#include "MCTargetDesc/ARM64MCTargetDesc.h" // For ARM64::X0 and friends. +// #includes from AArch64MCTargetDesc.h to here? +#include "MCTargetDesc/AArch64MCTargetDesc.h" // For AArch64::X0 and friends. #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/ErrorHandling.h" @@ -28,39 +28,39 @@ namespace llvm { inline static unsigned getWRegFromXReg(unsigned Reg) { switch (Reg) { - case ARM64::X0: return ARM64::W0; - case ARM64::X1: return ARM64::W1; - case ARM64::X2: return ARM64::W2; - case ARM64::X3: return ARM64::W3; - case ARM64::X4: return ARM64::W4; - case ARM64::X5: return ARM64::W5; - case ARM64::X6: return ARM64::W6; - case ARM64::X7: return ARM64::W7; - case ARM64::X8: return ARM64::W8; - case ARM64::X9: return ARM64::W9; - case ARM64::X10: return ARM64::W10; - case ARM64::X11: return ARM64::W11; - case ARM64::X12: return ARM64::W12; - case ARM64::X13: return ARM64::W13; - case ARM64::X14: return ARM64::W14; - case ARM64::X15: return ARM64::W15; - case ARM64::X16: return ARM64::W16; - case ARM64::X17: return ARM64::W17; - case ARM64::X18: return ARM64::W18; - case ARM64::X19: return ARM64::W19; - case ARM64::X20: return ARM64::W20; - case ARM64::X21: return ARM64::W21; - case ARM64::X22: return ARM64::W22; - case ARM64::X23: return ARM64::W23; - case ARM64::X24: return ARM64::W24; - case ARM64::X25: return ARM64::W25; - case ARM64::X26: return ARM64::W26; - case ARM64::X27: return ARM64::W27; - case ARM64::X28: return ARM64::W28; - case ARM64::FP: return ARM64::W29; - case ARM64::LR: return ARM64::W30; - case ARM64::SP: return ARM64::WSP; - case ARM64::XZR: return ARM64::WZR; + case AArch64::X0: return AArch64::W0; + case AArch64::X1: return AArch64::W1; + case AArch64::X2: return AArch64::W2; + case AArch64::X3: return AArch64::W3; + case AArch64::X4: return AArch64::W4; + case AArch64::X5: return AArch64::W5; + case AArch64::X6: return AArch64::W6; + case AArch64::X7: return AArch64::W7; + case AArch64::X8: return AArch64::W8; + case AArch64::X9: return AArch64::W9; + case AArch64::X10: return AArch64::W10; + case AArch64::X11: return AArch64::W11; + case AArch64::X12: return AArch64::W12; + case AArch64::X13: return AArch64::W13; + case AArch64::X14: return AArch64::W14; + case AArch64::X15: return AArch64::W15; + case AArch64::X16: return AArch64::W16; + case AArch64::X17: return AArch64::W17; + case AArch64::X18: return AArch64::W18; + case AArch64::X19: return AArch64::W19; + case AArch64::X20: return AArch64::W20; + case AArch64::X21: return AArch64::W21; + case AArch64::X22: return AArch64::W22; + case AArch64::X23: return AArch64::W23; + case AArch64::X24: return AArch64::W24; + case AArch64::X25: return AArch64::W25; + case AArch64::X26: return AArch64::W26; + case AArch64::X27: return AArch64::W27; + case AArch64::X28: return AArch64::W28; + case AArch64::FP: return AArch64::W29; + case AArch64::LR: return AArch64::W30; + case AArch64::SP: return AArch64::WSP; + case AArch64::XZR: return AArch64::WZR; } // For anything else, return it unchanged. return Reg; @@ -68,39 +68,39 @@ inline static unsigned getWRegFromXReg(unsigned Reg) { inline static unsigned getXRegFromWReg(unsigned Reg) { switch (Reg) { - case ARM64::W0: return ARM64::X0; - case ARM64::W1: return ARM64::X1; - case ARM64::W2: return ARM64::X2; - case ARM64::W3: return ARM64::X3; - case ARM64::W4: return ARM64::X4; - case ARM64::W5: return ARM64::X5; - case ARM64::W6: return ARM64::X6; - case ARM64::W7: return ARM64::X7; - case ARM64::W8: return ARM64::X8; - case ARM64::W9: return ARM64::X9; - case ARM64::W10: return ARM64::X10; - case ARM64::W11: return ARM64::X11; - case ARM64::W12: return ARM64::X12; - case ARM64::W13: return ARM64::X13; - case ARM64::W14: return ARM64::X14; - case ARM64::W15: return ARM64::X15; - case ARM64::W16: return ARM64::X16; - case ARM64::W17: return ARM64::X17; - case ARM64::W18: return ARM64::X18; - case ARM64::W19: return ARM64::X19; - case ARM64::W20: return ARM64::X20; - case ARM64::W21: return ARM64::X21; - case ARM64::W22: return ARM64::X22; - case ARM64::W23: return ARM64::X23; - case ARM64::W24: return ARM64::X24; - case ARM64::W25: return ARM64::X25; - case ARM64::W26: return ARM64::X26; - case ARM64::W27: return ARM64::X27; - case ARM64::W28: return ARM64::X28; - case ARM64::W29: return ARM64::FP; - case ARM64::W30: return ARM64::LR; - case ARM64::WSP: return ARM64::SP; - case ARM64::WZR: return ARM64::XZR; + case AArch64::W0: return AArch64::X0; + case AArch64::W1: return AArch64::X1; + case AArch64::W2: return AArch64::X2; + case AArch64::W3: return AArch64::X3; + case AArch64::W4: return AArch64::X4; + case AArch64::W5: return AArch64::X5; + case AArch64::W6: return AArch64::X6; + case AArch64::W7: return AArch64::X7; + case AArch64::W8: return AArch64::X8; + case AArch64::W9: return AArch64::X9; + case AArch64::W10: return AArch64::X10; + case AArch64::W11: return AArch64::X11; + case AArch64::W12: return AArch64::X12; + case AArch64::W13: return AArch64::X13; + case AArch64::W14: return AArch64::X14; + case AArch64::W15: return AArch64::X15; + case AArch64::W16: return AArch64::X16; + case AArch64::W17: return AArch64::X17; + case AArch64::W18: return AArch64::X18; + case AArch64::W19: return AArch64::X19; + case AArch64::W20: return AArch64::X20; + case AArch64::W21: return AArch64::X21; + case AArch64::W22: return AArch64::X22; + case AArch64::W23: return AArch64::X23; + case AArch64::W24: return AArch64::X24; + case AArch64::W25: return AArch64::X25; + case AArch64::W26: return AArch64::X26; + case AArch64::W27: return AArch64::X27; + case AArch64::W28: return AArch64::X28; + case AArch64::W29: return AArch64::FP; + case AArch64::W30: return AArch64::LR; + case AArch64::WSP: return AArch64::SP; + case AArch64::WZR: return AArch64::XZR; } // For anything else, return it unchanged. return Reg; @@ -108,38 +108,38 @@ inline static unsigned getXRegFromWReg(unsigned Reg) { static inline unsigned getBRegFromDReg(unsigned Reg) { switch (Reg) { - case ARM64::D0: return ARM64::B0; - case ARM64::D1: return ARM64::B1; - case ARM64::D2: return ARM64::B2; - case ARM64::D3: return ARM64::B3; - case ARM64::D4: return ARM64::B4; - case ARM64::D5: return ARM64::B5; - case ARM64::D6: return ARM64::B6; - case ARM64::D7: return ARM64::B7; - case ARM64::D8: return ARM64::B8; - case ARM64::D9: return ARM64::B9; - case ARM64::D10: return ARM64::B10; - case ARM64::D11: return ARM64::B11; - case ARM64::D12: return ARM64::B12; - case ARM64::D13: return ARM64::B13; - case ARM64::D14: return ARM64::B14; - case ARM64::D15: return ARM64::B15; - case ARM64::D16: return ARM64::B16; - case ARM64::D17: return ARM64::B17; - case ARM64::D18: return ARM64::B18; - case ARM64::D19: return ARM64::B19; - case ARM64::D20: return ARM64::B20; - case ARM64::D21: return ARM64::B21; - case ARM64::D22: return ARM64::B22; - case ARM64::D23: return ARM64::B23; - case ARM64::D24: return ARM64::B24; - case ARM64::D25: return ARM64::B25; - case ARM64::D26: return ARM64::B26; - case ARM64::D27: return ARM64::B27; - case ARM64::D28: return ARM64::B28; - case ARM64::D29: return ARM64::B29; - case ARM64::D30: return ARM64::B30; - case ARM64::D31: return ARM64::B31; + case AArch64::D0: return AArch64::B0; + case AArch64::D1: return AArch64::B1; + case AArch64::D2: return AArch64::B2; + case AArch64::D3: return AArch64::B3; + case AArch64::D4: return AArch64::B4; + case AArch64::D5: return AArch64::B5; + case AArch64::D6: return AArch64::B6; + case AArch64::D7: return AArch64::B7; + case AArch64::D8: return AArch64::B8; + case AArch64::D9: return AArch64::B9; + case AArch64::D10: return AArch64::B10; + case AArch64::D11: return AArch64::B11; + case AArch64::D12: return AArch64::B12; + case AArch64::D13: return AArch64::B13; + case AArch64::D14: return AArch64::B14; + case AArch64::D15: return AArch64::B15; + case AArch64::D16: return AArch64::B16; + case AArch64::D17: return AArch64::B17; + case AArch64::D18: return AArch64::B18; + case AArch64::D19: return AArch64::B19; + case AArch64::D20: return AArch64::B20; + case AArch64::D21: return AArch64::B21; + case AArch64::D22: return AArch64::B22; + case AArch64::D23: return AArch64::B23; + case AArch64::D24: return AArch64::B24; + case AArch64::D25: return AArch64::B25; + case AArch64::D26: return AArch64::B26; + case AArch64::D27: return AArch64::B27; + case AArch64::D28: return AArch64::B28; + case AArch64::D29: return AArch64::B29; + case AArch64::D30: return AArch64::B30; + case AArch64::D31: return AArch64::B31; } // For anything else, return it unchanged. return Reg; @@ -148,44 +148,44 @@ static inline unsigned getBRegFromDReg(unsigned Reg) { static inline unsigned getDRegFromBReg(unsigned Reg) { switch (Reg) { - case ARM64::B0: return ARM64::D0; - case ARM64::B1: return ARM64::D1; - case ARM64::B2: return ARM64::D2; - case ARM64::B3: return ARM64::D3; - case ARM64::B4: return ARM64::D4; - case ARM64::B5: return ARM64::D5; - case ARM64::B6: return ARM64::D6; - case ARM64::B7: return ARM64::D7; - case ARM64::B8: return ARM64::D8; - case ARM64::B9: return ARM64::D9; - case ARM64::B10: return ARM64::D10; - case ARM64::B11: return ARM64::D11; - case ARM64::B12: return ARM64::D12; - case ARM64::B13: return ARM64::D13; - case ARM64::B14: return ARM64::D14; - case ARM64::B15: return ARM64::D15; - case ARM64::B16: return ARM64::D16; - case ARM64::B17: return ARM64::D17; - case ARM64::B18: return ARM64::D18; - case ARM64::B19: return ARM64::D19; - case ARM64::B20: return ARM64::D20; - case ARM64::B21: return ARM64::D21; - case ARM64::B22: return ARM64::D22; - case ARM64::B23: return ARM64::D23; - case ARM64::B24: return ARM64::D24; - case ARM64::B25: return ARM64::D25; - case ARM64::B26: return ARM64::D26; - case ARM64::B27: return ARM64::D27; - case ARM64::B28: return ARM64::D28; - case ARM64::B29: return ARM64::D29; - case ARM64::B30: return ARM64::D30; - case ARM64::B31: return ARM64::D31; + case AArch64::B0: return AArch64::D0; + case AArch64::B1: return AArch64::D1; + case AArch64::B2: return AArch64::D2; + case AArch64::B3: return AArch64::D3; + case AArch64::B4: return AArch64::D4; + case AArch64::B5: return AArch64::D5; + case AArch64::B6: return AArch64::D6; + case AArch64::B7: return AArch64::D7; + case AArch64::B8: return AArch64::D8; + case AArch64::B9: return AArch64::D9; + case AArch64::B10: return AArch64::D10; + case AArch64::B11: return AArch64::D11; + case AArch64::B12: return AArch64::D12; + case AArch64::B13: return AArch64::D13; + case AArch64::B14: return AArch64::D14; + case AArch64::B15: return AArch64::D15; + case AArch64::B16: return AArch64::D16; + case AArch64::B17: return AArch64::D17; + case AArch64::B18: return AArch64::D18; + case AArch64::B19: return AArch64::D19; + case AArch64::B20: return AArch64::D20; + case AArch64::B21: return AArch64::D21; + case AArch64::B22: return AArch64::D22; + case AArch64::B23: return AArch64::D23; + case AArch64::B24: return AArch64::D24; + case AArch64::B25: return AArch64::D25; + case AArch64::B26: return AArch64::D26; + case AArch64::B27: return AArch64::D27; + case AArch64::B28: return AArch64::D28; + case AArch64::B29: return AArch64::D29; + case AArch64::B30: return AArch64::D30; + case AArch64::B31: return AArch64::D31; } // For anything else, return it unchanged. return Reg; } -namespace ARM64CC { +namespace AArch64CC { // The CondCodes constants map directly to the 4-bit encoding of the condition // field for predicated instructions. @@ -277,7 +277,7 @@ inline static unsigned getNZCVToSatisfyCondCode(CondCode Code) { case LE: return Z; // Z == 1 || N != V } } -} // end namespace ARM64CC +} // end namespace AArch64CC /// Instances of this class can perform bidirectional mapping from random /// identifier strings to operand encodings. For example "MSR" takes a named @@ -290,14 +290,14 @@ inline static unsigned getNZCVToSatisfyCondCode(CondCode Code) { /// out just how often these instructions are emitted before working on it. It /// might even be optimal to just reorder the tables for the common instructions /// rather than changing the algorithm. -struct ARM64NamedImmMapper { +struct AArch64NamedImmMapper { struct Mapping { const char *Name; uint32_t Value; }; template - ARM64NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm) + AArch64NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm) : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {} StringRef toString(uint32_t Value, bool &Valid) const; @@ -313,7 +313,7 @@ struct ARM64NamedImmMapper { uint32_t TooBigImm; }; -namespace ARM64AT { +namespace AArch64AT { enum ATValues { Invalid = -1, // Op0 Op1 CRn CRm Op2 S1E1R = 0x43c0, // 01 000 0111 1000 000 @@ -330,14 +330,14 @@ namespace ARM64AT { S12E0W = 0x63c7 // 01 100 0111 1000 111 }; - struct ATMapper : ARM64NamedImmMapper { + struct ATMapper : AArch64NamedImmMapper { const static Mapping ATPairs[]; ATMapper(); }; } -namespace ARM64DB { +namespace AArch64DB { enum DBValues { Invalid = -1, OSHLD = 0x1, @@ -354,14 +354,14 @@ namespace ARM64DB { SY = 0xf }; - struct DBarrierMapper : ARM64NamedImmMapper { + struct DBarrierMapper : AArch64NamedImmMapper { const static Mapping DBarrierPairs[]; DBarrierMapper(); }; } -namespace ARM64DC { +namespace AArch64DC { enum DCValues { Invalid = -1, // Op1 CRn CRm Op2 ZVA = 0x5ba1, // 01 011 0111 0100 001 @@ -374,7 +374,7 @@ namespace ARM64DC { CISW = 0x43f2 // 01 000 0111 1110 010 }; - struct DCMapper : ARM64NamedImmMapper { + struct DCMapper : AArch64NamedImmMapper { const static Mapping DCPairs[]; DCMapper(); @@ -382,7 +382,7 @@ namespace ARM64DC { } -namespace ARM64IC { +namespace AArch64IC { enum ICValues { Invalid = -1, // Op1 CRn CRm Op2 IALLUIS = 0x0388, // 000 0111 0001 000 @@ -391,7 +391,7 @@ namespace ARM64IC { }; - struct ICMapper : ARM64NamedImmMapper { + struct ICMapper : AArch64NamedImmMapper { const static Mapping ICPairs[]; ICMapper(); @@ -402,19 +402,19 @@ namespace ARM64IC { } } -namespace ARM64ISB { +namespace AArch64ISB { enum ISBValues { Invalid = -1, SY = 0xf }; - struct ISBMapper : ARM64NamedImmMapper { + struct ISBMapper : AArch64NamedImmMapper { const static Mapping ISBPairs[]; ISBMapper(); }; } -namespace ARM64PRFM { +namespace AArch64PRFM { enum PRFMValues { Invalid = -1, PLDL1KEEP = 0x00, @@ -437,14 +437,14 @@ namespace ARM64PRFM { PSTL3STRM = 0x15 }; - struct PRFMMapper : ARM64NamedImmMapper { + struct PRFMMapper : AArch64NamedImmMapper { const static Mapping PRFMPairs[]; PRFMMapper(); }; } -namespace ARM64PState { +namespace AArch64PState { enum PStateValues { Invalid = -1, SPSel = 0x05, @@ -452,7 +452,7 @@ namespace ARM64PState { DAIFClr = 0x1f }; - struct PStateMapper : ARM64NamedImmMapper { + struct PStateMapper : AArch64NamedImmMapper { const static Mapping PStatePairs[]; PStateMapper(); @@ -460,7 +460,7 @@ namespace ARM64PState { } -namespace ARM64SE { +namespace AArch64SE { enum ShiftExtSpecifiers { Invalid = -1, LSL, @@ -481,7 +481,7 @@ namespace ARM64SE { }; } -namespace ARM64Layout { +namespace AArch64Layout { enum VectorLayout { Invalid = -1, VL_8B, @@ -504,43 +504,43 @@ namespace ARM64Layout { } inline static const char * -ARM64VectorLayoutToString(ARM64Layout::VectorLayout Layout) { +AArch64VectorLayoutToString(AArch64Layout::VectorLayout Layout) { switch (Layout) { - case ARM64Layout::VL_8B: return ".8b"; - case ARM64Layout::VL_4H: return ".4h"; - case ARM64Layout::VL_2S: return ".2s"; - case ARM64Layout::VL_1D: return ".1d"; - case ARM64Layout::VL_16B: return ".16b"; - case ARM64Layout::VL_8H: return ".8h"; - case ARM64Layout::VL_4S: return ".4s"; - case ARM64Layout::VL_2D: return ".2d"; - case ARM64Layout::VL_B: return ".b"; - case ARM64Layout::VL_H: return ".h"; - case ARM64Layout::VL_S: return ".s"; - case ARM64Layout::VL_D: return ".d"; + case AArch64Layout::VL_8B: return ".8b"; + case AArch64Layout::VL_4H: return ".4h"; + case AArch64Layout::VL_2S: return ".2s"; + case AArch64Layout::VL_1D: return ".1d"; + case AArch64Layout::VL_16B: return ".16b"; + case AArch64Layout::VL_8H: return ".8h"; + case AArch64Layout::VL_4S: return ".4s"; + case AArch64Layout::VL_2D: return ".2d"; + case AArch64Layout::VL_B: return ".b"; + case AArch64Layout::VL_H: return ".h"; + case AArch64Layout::VL_S: return ".s"; + case AArch64Layout::VL_D: return ".d"; default: llvm_unreachable("Unknown Vector Layout"); } } -inline static ARM64Layout::VectorLayout -ARM64StringToVectorLayout(StringRef LayoutStr) { - return StringSwitch(LayoutStr) - .Case(".8b", ARM64Layout::VL_8B) - .Case(".4h", ARM64Layout::VL_4H) - .Case(".2s", ARM64Layout::VL_2S) - .Case(".1d", ARM64Layout::VL_1D) - .Case(".16b", ARM64Layout::VL_16B) - .Case(".8h", ARM64Layout::VL_8H) - .Case(".4s", ARM64Layout::VL_4S) - .Case(".2d", ARM64Layout::VL_2D) - .Case(".b", ARM64Layout::VL_B) - .Case(".h", ARM64Layout::VL_H) - .Case(".s", ARM64Layout::VL_S) - .Case(".d", ARM64Layout::VL_D) - .Default(ARM64Layout::Invalid); +inline static AArch64Layout::VectorLayout +AArch64StringToVectorLayout(StringRef LayoutStr) { + return StringSwitch(LayoutStr) + .Case(".8b", AArch64Layout::VL_8B) + .Case(".4h", AArch64Layout::VL_4H) + .Case(".2s", AArch64Layout::VL_2S) + .Case(".1d", AArch64Layout::VL_1D) + .Case(".16b", AArch64Layout::VL_16B) + .Case(".8h", AArch64Layout::VL_8H) + .Case(".4s", AArch64Layout::VL_4S) + .Case(".2d", AArch64Layout::VL_2D) + .Case(".b", AArch64Layout::VL_B) + .Case(".h", AArch64Layout::VL_H) + .Case(".s", AArch64Layout::VL_S) + .Case(".d", AArch64Layout::VL_D) + .Default(AArch64Layout::Invalid); } -namespace ARM64SysReg { +namespace AArch64SysReg { enum SysRegROValues { MDCCSR_EL0 = 0x9808, // 10 011 0000 0001 000 DBGDTRRX_EL0 = 0x9828, // 10 011 0000 0101 000 @@ -571,16 +571,16 @@ namespace ARM64SysReg { ID_ISAR3_EL1 = 0xc013, // 11 000 0000 0010 011 ID_ISAR4_EL1 = 0xc014, // 11 000 0000 0010 100 ID_ISAR5_EL1 = 0xc015, // 11 000 0000 0010 101 - ID_AARM64PFR0_EL1 = 0xc020, // 11 000 0000 0100 000 - ID_AARM64PFR1_EL1 = 0xc021, // 11 000 0000 0100 001 - ID_AARM64DFR0_EL1 = 0xc028, // 11 000 0000 0101 000 - ID_AARM64DFR1_EL1 = 0xc029, // 11 000 0000 0101 001 - ID_AARM64AFR0_EL1 = 0xc02c, // 11 000 0000 0101 100 - ID_AARM64AFR1_EL1 = 0xc02d, // 11 000 0000 0101 101 - ID_AARM64ISAR0_EL1 = 0xc030, // 11 000 0000 0110 000 - ID_AARM64ISAR1_EL1 = 0xc031, // 11 000 0000 0110 001 - ID_AARM64MMFR0_EL1 = 0xc038, // 11 000 0000 0111 000 - ID_AARM64MMFR1_EL1 = 0xc039, // 11 000 0000 0111 001 + ID_A64PFR0_EL1 = 0xc020, // 11 000 0000 0100 000 + ID_A64PFR1_EL1 = 0xc021, // 11 000 0000 0100 001 + ID_A64DFR0_EL1 = 0xc028, // 11 000 0000 0101 000 + ID_A64DFR1_EL1 = 0xc029, // 11 000 0000 0101 001 + ID_A64AFR0_EL1 = 0xc02c, // 11 000 0000 0101 100 + ID_A64AFR1_EL1 = 0xc02d, // 11 000 0000 0101 101 + ID_A64ISAR0_EL1 = 0xc030, // 11 000 0000 0110 000 + ID_A64ISAR1_EL1 = 0xc031, // 11 000 0000 0110 001 + ID_A64MMFR0_EL1 = 0xc038, // 11 000 0000 0111 000 + ID_A64MMFR1_EL1 = 0xc039, // 11 000 0000 0111 001 MVFR0_EL1 = 0xc018, // 11 000 0000 0011 000 MVFR1_EL1 = 0xc019, // 11 000 0000 0011 001 MVFR2_EL1 = 0xc01a, // 11 000 0000 0011 010 @@ -1143,15 +1143,15 @@ namespace ARM64SysReg { CPM_IOACC_CTL_EL3 = 0xff90 }; - // Note that these do not inherit from ARM64NamedImmMapper. This class is + // Note that these do not inherit from AArch64NamedImmMapper. This class is // sufficiently different in its behaviour that I don't believe it's worth - // burdening the common ARM64NamedImmMapper with abstractions only needed in + // burdening the common AArch64NamedImmMapper with abstractions only needed in // this one case. struct SysRegMapper { - static const ARM64NamedImmMapper::Mapping SysRegPairs[]; - static const ARM64NamedImmMapper::Mapping CycloneSysRegPairs[]; + static const AArch64NamedImmMapper::Mapping SysRegPairs[]; + static const AArch64NamedImmMapper::Mapping CycloneSysRegPairs[]; - const ARM64NamedImmMapper::Mapping *InstPairs; + const AArch64NamedImmMapper::Mapping *InstPairs; size_t NumInstPairs; uint64_t FeatureBits; @@ -1161,19 +1161,19 @@ namespace ARM64SysReg { }; struct MSRMapper : SysRegMapper { - static const ARM64NamedImmMapper::Mapping MSRPairs[]; + static const AArch64NamedImmMapper::Mapping MSRPairs[]; MSRMapper(uint64_t FeatureBits); }; struct MRSMapper : SysRegMapper { - static const ARM64NamedImmMapper::Mapping MRSPairs[]; + static const AArch64NamedImmMapper::Mapping MRSPairs[]; MRSMapper(uint64_t FeatureBits); }; uint32_t ParseGenericRegister(StringRef Name, bool &Valid); } -namespace ARM64TLBI { +namespace AArch64TLBI { enum TLBIValues { Invalid = -1, // Op0 Op1 CRn CRm Op2 IPAS2E1IS = 0x6401, // 01 100 1000 0000 001 @@ -1210,7 +1210,7 @@ namespace ARM64TLBI { VAALE1 = 0x443f // 01 000 1000 0111 111 }; - struct TLBIMapper : ARM64NamedImmMapper { + struct TLBIMapper : AArch64NamedImmMapper { const static Mapping TLBIPairs[]; TLBIMapper(); @@ -1235,11 +1235,11 @@ namespace ARM64TLBI { } } -namespace ARM64II { +namespace AArch64II { /// Target Operand Flag enum. enum TOF { //===------------------------------------------------------------------===// - // ARM64 Specific MachineOperand flags. + // AArch64 Specific MachineOperand flags. MO_NO_FLAG, @@ -1287,7 +1287,7 @@ namespace ARM64II { /// referee will affect interpretation. MO_TLS = 0x20 }; -} // end namespace ARM64II +} // end namespace AArch64II } // end namespace llvm diff --git a/lib/Target/AArch64/Utils/CMakeLists.txt b/lib/Target/AArch64/Utils/CMakeLists.txt new file mode 100644 index 000000000000..8ee03a7571b4 --- /dev/null +++ b/lib/Target/AArch64/Utils/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_library(LLVMAArch64Utils + AArch64BaseInfo.cpp + ) diff --git a/lib/Target/ARM64/TargetInfo/LLVMBuild.txt b/lib/Target/AArch64/Utils/LLVMBuild.txt similarity index 79% rename from lib/Target/ARM64/TargetInfo/LLVMBuild.txt rename to lib/Target/AArch64/Utils/LLVMBuild.txt index b9ecb7069523..bcefeb672f76 100644 --- a/lib/Target/ARM64/TargetInfo/LLVMBuild.txt +++ b/lib/Target/AArch64/Utils/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/ARM64/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===; +;===- ./lib/Target/AArch64/Utils/LLVMBuild.txt ----------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -17,7 +17,7 @@ [component_0] type = Library -name = ARM64Info -parent = ARM64 +name = AArch64Utils +parent = AArch64 required_libraries = Support -add_to_library_groups = ARM64 +add_to_library_groups = AArch64 diff --git a/lib/Target/ARM64/Utils/Makefile b/lib/Target/AArch64/Utils/Makefile similarity index 64% rename from lib/Target/ARM64/Utils/Makefile rename to lib/Target/AArch64/Utils/Makefile index 6491ad9a07bd..0b80f82f2b99 100644 --- a/lib/Target/ARM64/Utils/Makefile +++ b/lib/Target/AArch64/Utils/Makefile @@ -1,4 +1,4 @@ -##===- lib/Target/ARM64/Utils/Makefile -------------------*- Makefile -*-===## +##===- lib/Target/AArch64/Utils/Makefile -------------------*- Makefile -*-===## # # The LLVM Compiler Infrastructure # @@ -7,9 +7,10 @@ # ##===----------------------------------------------------------------------===## LEVEL = ../../../.. -LIBRARYNAME = LLVMARM64Utils +LIBRARYNAME = LLVMAArch64Utils -# Hack: we need to include 'main' ARM64 target directory to grab private headers +# Hack: we need to include 'main' AArch64 target directory to grab private +# headers CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM64/ARM64.h b/lib/Target/ARM64/ARM64.h deleted file mode 100644 index debb9002eb4b..000000000000 --- a/lib/Target/ARM64/ARM64.h +++ /dev/null @@ -1,48 +0,0 @@ -//===-- ARM64.h - Top-level interface for ARM64 representation --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the entry points for global functions defined in the LLVM -// ARM64 back-end. -// -//===----------------------------------------------------------------------===// - -#ifndef TARGET_ARM64_H -#define TARGET_ARM64_H - -#include "Utils/ARM64BaseInfo.h" -#include "MCTargetDesc/ARM64MCTargetDesc.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Support/DataTypes.h" - -namespace llvm { - -class ARM64TargetMachine; -class FunctionPass; -class MachineFunctionPass; - -FunctionPass *createARM64DeadRegisterDefinitions(); -FunctionPass *createARM64ConditionalCompares(); -FunctionPass *createARM64AdvSIMDScalar(); -FunctionPass *createARM64BranchRelaxation(); -FunctionPass *createARM64ISelDag(ARM64TargetMachine &TM, - CodeGenOpt::Level OptLevel); -FunctionPass *createARM64StorePairSuppressPass(); -FunctionPass *createARM64ExpandPseudoPass(); -FunctionPass *createARM64LoadStoreOptimizationPass(); -ModulePass *createARM64PromoteConstantPass(); -FunctionPass *createARM64AddressTypePromotionPass(); -/// \brief Creates an ARM-specific Target Transformation Info pass. -ImmutablePass *createARM64TargetTransformInfoPass(const ARM64TargetMachine *TM); - -FunctionPass *createARM64CleanupLocalDynamicTLSPass(); - -FunctionPass *createARM64CollectLOHPass(); -} // end namespace llvm - -#endif diff --git a/lib/Target/ARM64/ARM64RegisterInfo.td b/lib/Target/ARM64/ARM64RegisterInfo.td deleted file mode 100644 index 28d01809739a..000000000000 --- a/lib/Target/ARM64/ARM64RegisterInfo.td +++ /dev/null @@ -1,593 +0,0 @@ -//===- ARM64RegisterInfo.td - Describe the ARM64 Regisers --*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - - -class ARM64Reg enc, string n, list subregs = [], - list altNames = []> - : Register { - let HWEncoding = enc; - let Namespace = "ARM64"; - let SubRegs = subregs; -} - -let Namespace = "ARM64" in { - def sub_32 : SubRegIndex<32>; - - def bsub : SubRegIndex<8>; - def hsub : SubRegIndex<16>; - def ssub : SubRegIndex<32>; - def dsub : SubRegIndex<32>; - def qhisub : SubRegIndex<64>; - def qsub : SubRegIndex<64>; - // Note: Code depends on these having consecutive numbers - def dsub0 : SubRegIndex<64>; - def dsub1 : SubRegIndex<64>; - def dsub2 : SubRegIndex<64>; - def dsub3 : SubRegIndex<64>; - // Note: Code depends on these having consecutive numbers - def qsub0 : SubRegIndex<128>; - def qsub1 : SubRegIndex<128>; - def qsub2 : SubRegIndex<128>; - def qsub3 : SubRegIndex<128>; -} - -let Namespace = "ARM64" in { - def vreg : RegAltNameIndex; - def vlist1 : RegAltNameIndex; -} - -//===----------------------------------------------------------------------===// -// Registers -//===----------------------------------------------------------------------===// -def W0 : ARM64Reg<0, "w0" >, DwarfRegNum<[0]>; -def W1 : ARM64Reg<1, "w1" >, DwarfRegNum<[1]>; -def W2 : ARM64Reg<2, "w2" >, DwarfRegNum<[2]>; -def W3 : ARM64Reg<3, "w3" >, DwarfRegNum<[3]>; -def W4 : ARM64Reg<4, "w4" >, DwarfRegNum<[4]>; -def W5 : ARM64Reg<5, "w5" >, DwarfRegNum<[5]>; -def W6 : ARM64Reg<6, "w6" >, DwarfRegNum<[6]>; -def W7 : ARM64Reg<7, "w7" >, DwarfRegNum<[7]>; -def W8 : ARM64Reg<8, "w8" >, DwarfRegNum<[8]>; -def W9 : ARM64Reg<9, "w9" >, DwarfRegNum<[9]>; -def W10 : ARM64Reg<10, "w10">, DwarfRegNum<[10]>; -def W11 : ARM64Reg<11, "w11">, DwarfRegNum<[11]>; -def W12 : ARM64Reg<12, "w12">, DwarfRegNum<[12]>; -def W13 : ARM64Reg<13, "w13">, DwarfRegNum<[13]>; -def W14 : ARM64Reg<14, "w14">, DwarfRegNum<[14]>; -def W15 : ARM64Reg<15, "w15">, DwarfRegNum<[15]>; -def W16 : ARM64Reg<16, "w16">, DwarfRegNum<[16]>; -def W17 : ARM64Reg<17, "w17">, DwarfRegNum<[17]>; -def W18 : ARM64Reg<18, "w18">, DwarfRegNum<[18]>; -def W19 : ARM64Reg<19, "w19">, DwarfRegNum<[19]>; -def W20 : ARM64Reg<20, "w20">, DwarfRegNum<[20]>; -def W21 : ARM64Reg<21, "w21">, DwarfRegNum<[21]>; -def W22 : ARM64Reg<22, "w22">, DwarfRegNum<[22]>; -def W23 : ARM64Reg<23, "w23">, DwarfRegNum<[23]>; -def W24 : ARM64Reg<24, "w24">, DwarfRegNum<[24]>; -def W25 : ARM64Reg<25, "w25">, DwarfRegNum<[25]>; -def W26 : ARM64Reg<26, "w26">, DwarfRegNum<[26]>; -def W27 : ARM64Reg<27, "w27">, DwarfRegNum<[27]>; -def W28 : ARM64Reg<28, "w28">, DwarfRegNum<[28]>; -def W29 : ARM64Reg<29, "w29">, DwarfRegNum<[29]>; -def W30 : ARM64Reg<30, "w30">, DwarfRegNum<[30]>; -def WSP : ARM64Reg<31, "wsp">, DwarfRegNum<[31]>; -def WZR : ARM64Reg<31, "wzr">, DwarfRegAlias; - -let SubRegIndices = [sub_32] in { -def X0 : ARM64Reg<0, "x0", [W0]>, DwarfRegAlias; -def X1 : ARM64Reg<1, "x1", [W1]>, DwarfRegAlias; -def X2 : ARM64Reg<2, "x2", [W2]>, DwarfRegAlias; -def X3 : ARM64Reg<3, "x3", [W3]>, DwarfRegAlias; -def X4 : ARM64Reg<4, "x4", [W4]>, DwarfRegAlias; -def X5 : ARM64Reg<5, "x5", [W5]>, DwarfRegAlias; -def X6 : ARM64Reg<6, "x6", [W6]>, DwarfRegAlias; -def X7 : ARM64Reg<7, "x7", [W7]>, DwarfRegAlias; -def X8 : ARM64Reg<8, "x8", [W8]>, DwarfRegAlias; -def X9 : ARM64Reg<9, "x9", [W9]>, DwarfRegAlias; -def X10 : ARM64Reg<10, "x10", [W10]>, DwarfRegAlias; -def X11 : ARM64Reg<11, "x11", [W11]>, DwarfRegAlias; -def X12 : ARM64Reg<12, "x12", [W12]>, DwarfRegAlias; -def X13 : ARM64Reg<13, "x13", [W13]>, DwarfRegAlias; -def X14 : ARM64Reg<14, "x14", [W14]>, DwarfRegAlias; -def X15 : ARM64Reg<15, "x15", [W15]>, DwarfRegAlias; -def X16 : ARM64Reg<16, "x16", [W16]>, DwarfRegAlias; -def X17 : ARM64Reg<17, "x17", [W17]>, DwarfRegAlias; -def X18 : ARM64Reg<18, "x18", [W18]>, DwarfRegAlias; -def X19 : ARM64Reg<19, "x19", [W19]>, DwarfRegAlias; -def X20 : ARM64Reg<20, "x20", [W20]>, DwarfRegAlias; -def X21 : ARM64Reg<21, "x21", [W21]>, DwarfRegAlias; -def X22 : ARM64Reg<22, "x22", [W22]>, DwarfRegAlias; -def X23 : ARM64Reg<23, "x23", [W23]>, DwarfRegAlias; -def X24 : ARM64Reg<24, "x24", [W24]>, DwarfRegAlias; -def X25 : ARM64Reg<25, "x25", [W25]>, DwarfRegAlias; -def X26 : ARM64Reg<26, "x26", [W26]>, DwarfRegAlias; -def X27 : ARM64Reg<27, "x27", [W27]>, DwarfRegAlias; -def X28 : ARM64Reg<28, "x28", [W28]>, DwarfRegAlias; -def FP : ARM64Reg<29, "x29", [W29]>, DwarfRegAlias; -def LR : ARM64Reg<30, "x30", [W30]>, DwarfRegAlias; -def SP : ARM64Reg<31, "sp", [WSP]>, DwarfRegAlias; -def XZR : ARM64Reg<31, "xzr", [WZR]>, DwarfRegAlias; -} - -// Condition code register. -def NZCV : ARM64Reg<0, "nzcv">; - -// GPR register classes with the intersections of GPR32/GPR32sp and -// GPR64/GPR64sp for use by the coalescer. -def GPR32common : RegisterClass<"ARM64", [i32], 32, (sequence "W%u", 0, 30)> { - let AltOrders = [(rotl GPR32common, 8)]; - let AltOrderSelect = [{ return 1; }]; -} -def GPR64common : RegisterClass<"ARM64", [i64], 64, - (add (sequence "X%u", 0, 28), FP, LR)> { - let AltOrders = [(rotl GPR64common, 8)]; - let AltOrderSelect = [{ return 1; }]; -} -// GPR register classes which exclude SP/WSP. -def GPR32 : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WZR)> { - let AltOrders = [(rotl GPR32, 8)]; - let AltOrderSelect = [{ return 1; }]; -} -def GPR64 : RegisterClass<"ARM64", [i64], 64, (add GPR64common, XZR)> { - let AltOrders = [(rotl GPR64, 8)]; - let AltOrderSelect = [{ return 1; }]; -} - -// GPR register classes which include SP/WSP. -def GPR32sp : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WSP)> { - let AltOrders = [(rotl GPR32sp, 8)]; - let AltOrderSelect = [{ return 1; }]; -} -def GPR64sp : RegisterClass<"ARM64", [i64], 64, (add GPR64common, SP)> { - let AltOrders = [(rotl GPR64sp, 8)]; - let AltOrderSelect = [{ return 1; }]; -} - -def GPR32sponly : RegisterClass<"ARM64", [i32], 32, (add WSP)>; -def GPR64sponly : RegisterClass<"ARM64", [i64], 64, (add SP)>; - -def GPR64spPlus0Operand : AsmOperandClass { - let Name = "GPR64sp0"; - let RenderMethod = "addRegOperands"; - let ParserMethod = "tryParseGPR64sp0Operand"; -} - -def GPR64sp0 : RegisterOperand { - let ParserMatchClass = GPR64spPlus0Operand; -} - -// GPR register classes which include WZR/XZR AND SP/WSP. This is not a -// constraint used by any instructions, it is used as a common super-class. -def GPR32all : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WZR, WSP)>; -def GPR64all : RegisterClass<"ARM64", [i64], 64, (add GPR64common, XZR, SP)>; - -// For tail calls, we can't use callee-saved registers, as they are restored -// to the saved value before the tail call, which would clobber a call address. -// This is for indirect tail calls to store the address of the destination. -def tcGPR64 : RegisterClass<"ARM64", [i64], 64, (sub GPR64common, X19, X20, X21, - X22, X23, X24, X25, X26, - X27, X28)>; - -// GPR register classes for post increment amount of vector load/store that -// has alternate printing when Rm=31 and prints a constant immediate value -// equal to the total number of bytes transferred. - -// FIXME: TableGen *should* be able to do these itself now. There appears to be -// a bug in counting how many operands a Post-indexed MCInst should have which -// means the aliases don't trigger. -def GPR64pi1 : RegisterOperand">; -def GPR64pi2 : RegisterOperand">; -def GPR64pi3 : RegisterOperand">; -def GPR64pi4 : RegisterOperand">; -def GPR64pi6 : RegisterOperand">; -def GPR64pi8 : RegisterOperand">; -def GPR64pi12 : RegisterOperand">; -def GPR64pi16 : RegisterOperand">; -def GPR64pi24 : RegisterOperand">; -def GPR64pi32 : RegisterOperand">; -def GPR64pi48 : RegisterOperand">; -def GPR64pi64 : RegisterOperand">; - -// Condition code regclass. -def CCR : RegisterClass<"ARM64", [i32], 32, (add NZCV)> { - let CopyCost = -1; // Don't allow copying of status registers. - - // CCR is not allocatable. - let isAllocatable = 0; -} - -//===----------------------------------------------------------------------===// -// Floating Point Scalar Registers -//===----------------------------------------------------------------------===// - -def B0 : ARM64Reg<0, "b0">, DwarfRegNum<[64]>; -def B1 : ARM64Reg<1, "b1">, DwarfRegNum<[65]>; -def B2 : ARM64Reg<2, "b2">, DwarfRegNum<[66]>; -def B3 : ARM64Reg<3, "b3">, DwarfRegNum<[67]>; -def B4 : ARM64Reg<4, "b4">, DwarfRegNum<[68]>; -def B5 : ARM64Reg<5, "b5">, DwarfRegNum<[69]>; -def B6 : ARM64Reg<6, "b6">, DwarfRegNum<[70]>; -def B7 : ARM64Reg<7, "b7">, DwarfRegNum<[71]>; -def B8 : ARM64Reg<8, "b8">, DwarfRegNum<[72]>; -def B9 : ARM64Reg<9, "b9">, DwarfRegNum<[73]>; -def B10 : ARM64Reg<10, "b10">, DwarfRegNum<[74]>; -def B11 : ARM64Reg<11, "b11">, DwarfRegNum<[75]>; -def B12 : ARM64Reg<12, "b12">, DwarfRegNum<[76]>; -def B13 : ARM64Reg<13, "b13">, DwarfRegNum<[77]>; -def B14 : ARM64Reg<14, "b14">, DwarfRegNum<[78]>; -def B15 : ARM64Reg<15, "b15">, DwarfRegNum<[79]>; -def B16 : ARM64Reg<16, "b16">, DwarfRegNum<[80]>; -def B17 : ARM64Reg<17, "b17">, DwarfRegNum<[81]>; -def B18 : ARM64Reg<18, "b18">, DwarfRegNum<[82]>; -def B19 : ARM64Reg<19, "b19">, DwarfRegNum<[83]>; -def B20 : ARM64Reg<20, "b20">, DwarfRegNum<[84]>; -def B21 : ARM64Reg<21, "b21">, DwarfRegNum<[85]>; -def B22 : ARM64Reg<22, "b22">, DwarfRegNum<[86]>; -def B23 : ARM64Reg<23, "b23">, DwarfRegNum<[87]>; -def B24 : ARM64Reg<24, "b24">, DwarfRegNum<[88]>; -def B25 : ARM64Reg<25, "b25">, DwarfRegNum<[89]>; -def B26 : ARM64Reg<26, "b26">, DwarfRegNum<[90]>; -def B27 : ARM64Reg<27, "b27">, DwarfRegNum<[91]>; -def B28 : ARM64Reg<28, "b28">, DwarfRegNum<[92]>; -def B29 : ARM64Reg<29, "b29">, DwarfRegNum<[93]>; -def B30 : ARM64Reg<30, "b30">, DwarfRegNum<[94]>; -def B31 : ARM64Reg<31, "b31">, DwarfRegNum<[95]>; - -let SubRegIndices = [bsub] in { -def H0 : ARM64Reg<0, "h0", [B0]>, DwarfRegAlias; -def H1 : ARM64Reg<1, "h1", [B1]>, DwarfRegAlias; -def H2 : ARM64Reg<2, "h2", [B2]>, DwarfRegAlias; -def H3 : ARM64Reg<3, "h3", [B3]>, DwarfRegAlias; -def H4 : ARM64Reg<4, "h4", [B4]>, DwarfRegAlias; -def H5 : ARM64Reg<5, "h5", [B5]>, DwarfRegAlias; -def H6 : ARM64Reg<6, "h6", [B6]>, DwarfRegAlias; -def H7 : ARM64Reg<7, "h7", [B7]>, DwarfRegAlias; -def H8 : ARM64Reg<8, "h8", [B8]>, DwarfRegAlias; -def H9 : ARM64Reg<9, "h9", [B9]>, DwarfRegAlias; -def H10 : ARM64Reg<10, "h10", [B10]>, DwarfRegAlias; -def H11 : ARM64Reg<11, "h11", [B11]>, DwarfRegAlias; -def H12 : ARM64Reg<12, "h12", [B12]>, DwarfRegAlias; -def H13 : ARM64Reg<13, "h13", [B13]>, DwarfRegAlias; -def H14 : ARM64Reg<14, "h14", [B14]>, DwarfRegAlias; -def H15 : ARM64Reg<15, "h15", [B15]>, DwarfRegAlias; -def H16 : ARM64Reg<16, "h16", [B16]>, DwarfRegAlias; -def H17 : ARM64Reg<17, "h17", [B17]>, DwarfRegAlias; -def H18 : ARM64Reg<18, "h18", [B18]>, DwarfRegAlias; -def H19 : ARM64Reg<19, "h19", [B19]>, DwarfRegAlias; -def H20 : ARM64Reg<20, "h20", [B20]>, DwarfRegAlias; -def H21 : ARM64Reg<21, "h21", [B21]>, DwarfRegAlias; -def H22 : ARM64Reg<22, "h22", [B22]>, DwarfRegAlias; -def H23 : ARM64Reg<23, "h23", [B23]>, DwarfRegAlias; -def H24 : ARM64Reg<24, "h24", [B24]>, DwarfRegAlias; -def H25 : ARM64Reg<25, "h25", [B25]>, DwarfRegAlias; -def H26 : ARM64Reg<26, "h26", [B26]>, DwarfRegAlias; -def H27 : ARM64Reg<27, "h27", [B27]>, DwarfRegAlias; -def H28 : ARM64Reg<28, "h28", [B28]>, DwarfRegAlias; -def H29 : ARM64Reg<29, "h29", [B29]>, DwarfRegAlias; -def H30 : ARM64Reg<30, "h30", [B30]>, DwarfRegAlias; -def H31 : ARM64Reg<31, "h31", [B31]>, DwarfRegAlias; -} - -let SubRegIndices = [hsub] in { -def S0 : ARM64Reg<0, "s0", [H0]>, DwarfRegAlias; -def S1 : ARM64Reg<1, "s1", [H1]>, DwarfRegAlias; -def S2 : ARM64Reg<2, "s2", [H2]>, DwarfRegAlias; -def S3 : ARM64Reg<3, "s3", [H3]>, DwarfRegAlias; -def S4 : ARM64Reg<4, "s4", [H4]>, DwarfRegAlias; -def S5 : ARM64Reg<5, "s5", [H5]>, DwarfRegAlias; -def S6 : ARM64Reg<6, "s6", [H6]>, DwarfRegAlias; -def S7 : ARM64Reg<7, "s7", [H7]>, DwarfRegAlias; -def S8 : ARM64Reg<8, "s8", [H8]>, DwarfRegAlias; -def S9 : ARM64Reg<9, "s9", [H9]>, DwarfRegAlias; -def S10 : ARM64Reg<10, "s10", [H10]>, DwarfRegAlias; -def S11 : ARM64Reg<11, "s11", [H11]>, DwarfRegAlias; -def S12 : ARM64Reg<12, "s12", [H12]>, DwarfRegAlias; -def S13 : ARM64Reg<13, "s13", [H13]>, DwarfRegAlias; -def S14 : ARM64Reg<14, "s14", [H14]>, DwarfRegAlias; -def S15 : ARM64Reg<15, "s15", [H15]>, DwarfRegAlias; -def S16 : ARM64Reg<16, "s16", [H16]>, DwarfRegAlias; -def S17 : ARM64Reg<17, "s17", [H17]>, DwarfRegAlias; -def S18 : ARM64Reg<18, "s18", [H18]>, DwarfRegAlias; -def S19 : ARM64Reg<19, "s19", [H19]>, DwarfRegAlias; -def S20 : ARM64Reg<20, "s20", [H20]>, DwarfRegAlias; -def S21 : ARM64Reg<21, "s21", [H21]>, DwarfRegAlias; -def S22 : ARM64Reg<22, "s22", [H22]>, DwarfRegAlias; -def S23 : ARM64Reg<23, "s23", [H23]>, DwarfRegAlias; -def S24 : ARM64Reg<24, "s24", [H24]>, DwarfRegAlias; -def S25 : ARM64Reg<25, "s25", [H25]>, DwarfRegAlias; -def S26 : ARM64Reg<26, "s26", [H26]>, DwarfRegAlias; -def S27 : ARM64Reg<27, "s27", [H27]>, DwarfRegAlias; -def S28 : ARM64Reg<28, "s28", [H28]>, DwarfRegAlias; -def S29 : ARM64Reg<29, "s29", [H29]>, DwarfRegAlias; -def S30 : ARM64Reg<30, "s30", [H30]>, DwarfRegAlias; -def S31 : ARM64Reg<31, "s31", [H31]>, DwarfRegAlias; -} - -let SubRegIndices = [ssub], RegAltNameIndices = [vreg, vlist1] in { -def D0 : ARM64Reg<0, "d0", [S0], ["v0", ""]>, DwarfRegAlias; -def D1 : ARM64Reg<1, "d1", [S1], ["v1", ""]>, DwarfRegAlias; -def D2 : ARM64Reg<2, "d2", [S2], ["v2", ""]>, DwarfRegAlias; -def D3 : ARM64Reg<3, "d3", [S3], ["v3", ""]>, DwarfRegAlias; -def D4 : ARM64Reg<4, "d4", [S4], ["v4", ""]>, DwarfRegAlias; -def D5 : ARM64Reg<5, "d5", [S5], ["v5", ""]>, DwarfRegAlias; -def D6 : ARM64Reg<6, "d6", [S6], ["v6", ""]>, DwarfRegAlias; -def D7 : ARM64Reg<7, "d7", [S7], ["v7", ""]>, DwarfRegAlias; -def D8 : ARM64Reg<8, "d8", [S8], ["v8", ""]>, DwarfRegAlias; -def D9 : ARM64Reg<9, "d9", [S9], ["v9", ""]>, DwarfRegAlias; -def D10 : ARM64Reg<10, "d10", [S10], ["v10", ""]>, DwarfRegAlias; -def D11 : ARM64Reg<11, "d11", [S11], ["v11", ""]>, DwarfRegAlias; -def D12 : ARM64Reg<12, "d12", [S12], ["v12", ""]>, DwarfRegAlias; -def D13 : ARM64Reg<13, "d13", [S13], ["v13", ""]>, DwarfRegAlias; -def D14 : ARM64Reg<14, "d14", [S14], ["v14", ""]>, DwarfRegAlias; -def D15 : ARM64Reg<15, "d15", [S15], ["v15", ""]>, DwarfRegAlias; -def D16 : ARM64Reg<16, "d16", [S16], ["v16", ""]>, DwarfRegAlias; -def D17 : ARM64Reg<17, "d17", [S17], ["v17", ""]>, DwarfRegAlias; -def D18 : ARM64Reg<18, "d18", [S18], ["v18", ""]>, DwarfRegAlias; -def D19 : ARM64Reg<19, "d19", [S19], ["v19", ""]>, DwarfRegAlias; -def D20 : ARM64Reg<20, "d20", [S20], ["v20", ""]>, DwarfRegAlias; -def D21 : ARM64Reg<21, "d21", [S21], ["v21", ""]>, DwarfRegAlias; -def D22 : ARM64Reg<22, "d22", [S22], ["v22", ""]>, DwarfRegAlias; -def D23 : ARM64Reg<23, "d23", [S23], ["v23", ""]>, DwarfRegAlias; -def D24 : ARM64Reg<24, "d24", [S24], ["v24", ""]>, DwarfRegAlias; -def D25 : ARM64Reg<25, "d25", [S25], ["v25", ""]>, DwarfRegAlias; -def D26 : ARM64Reg<26, "d26", [S26], ["v26", ""]>, DwarfRegAlias; -def D27 : ARM64Reg<27, "d27", [S27], ["v27", ""]>, DwarfRegAlias; -def D28 : ARM64Reg<28, "d28", [S28], ["v28", ""]>, DwarfRegAlias; -def D29 : ARM64Reg<29, "d29", [S29], ["v29", ""]>, DwarfRegAlias; -def D30 : ARM64Reg<30, "d30", [S30], ["v30", ""]>, DwarfRegAlias; -def D31 : ARM64Reg<31, "d31", [S31], ["v31", ""]>, DwarfRegAlias; -} - -let SubRegIndices = [dsub], RegAltNameIndices = [vreg, vlist1] in { -def Q0 : ARM64Reg<0, "q0", [D0], ["v0", ""]>, DwarfRegAlias; -def Q1 : ARM64Reg<1, "q1", [D1], ["v1", ""]>, DwarfRegAlias; -def Q2 : ARM64Reg<2, "q2", [D2], ["v2", ""]>, DwarfRegAlias; -def Q3 : ARM64Reg<3, "q3", [D3], ["v3", ""]>, DwarfRegAlias; -def Q4 : ARM64Reg<4, "q4", [D4], ["v4", ""]>, DwarfRegAlias; -def Q5 : ARM64Reg<5, "q5", [D5], ["v5", ""]>, DwarfRegAlias; -def Q6 : ARM64Reg<6, "q6", [D6], ["v6", ""]>, DwarfRegAlias; -def Q7 : ARM64Reg<7, "q7", [D7], ["v7", ""]>, DwarfRegAlias; -def Q8 : ARM64Reg<8, "q8", [D8], ["v8", ""]>, DwarfRegAlias; -def Q9 : ARM64Reg<9, "q9", [D9], ["v9", ""]>, DwarfRegAlias; -def Q10 : ARM64Reg<10, "q10", [D10], ["v10", ""]>, DwarfRegAlias; -def Q11 : ARM64Reg<11, "q11", [D11], ["v11", ""]>, DwarfRegAlias; -def Q12 : ARM64Reg<12, "q12", [D12], ["v12", ""]>, DwarfRegAlias; -def Q13 : ARM64Reg<13, "q13", [D13], ["v13", ""]>, DwarfRegAlias; -def Q14 : ARM64Reg<14, "q14", [D14], ["v14", ""]>, DwarfRegAlias; -def Q15 : ARM64Reg<15, "q15", [D15], ["v15", ""]>, DwarfRegAlias; -def Q16 : ARM64Reg<16, "q16", [D16], ["v16", ""]>, DwarfRegAlias; -def Q17 : ARM64Reg<17, "q17", [D17], ["v17", ""]>, DwarfRegAlias; -def Q18 : ARM64Reg<18, "q18", [D18], ["v18", ""]>, DwarfRegAlias; -def Q19 : ARM64Reg<19, "q19", [D19], ["v19", ""]>, DwarfRegAlias; -def Q20 : ARM64Reg<20, "q20", [D20], ["v20", ""]>, DwarfRegAlias; -def Q21 : ARM64Reg<21, "q21", [D21], ["v21", ""]>, DwarfRegAlias; -def Q22 : ARM64Reg<22, "q22", [D22], ["v22", ""]>, DwarfRegAlias; -def Q23 : ARM64Reg<23, "q23", [D23], ["v23", ""]>, DwarfRegAlias; -def Q24 : ARM64Reg<24, "q24", [D24], ["v24", ""]>, DwarfRegAlias; -def Q25 : ARM64Reg<25, "q25", [D25], ["v25", ""]>, DwarfRegAlias; -def Q26 : ARM64Reg<26, "q26", [D26], ["v26", ""]>, DwarfRegAlias; -def Q27 : ARM64Reg<27, "q27", [D27], ["v27", ""]>, DwarfRegAlias; -def Q28 : ARM64Reg<28, "q28", [D28], ["v28", ""]>, DwarfRegAlias; -def Q29 : ARM64Reg<29, "q29", [D29], ["v29", ""]>, DwarfRegAlias; -def Q30 : ARM64Reg<30, "q30", [D30], ["v30", ""]>, DwarfRegAlias; -def Q31 : ARM64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias; -} - -def FPR8 : RegisterClass<"ARM64", [untyped], 8, (sequence "B%u", 0, 31)> { - let Size = 8; -} -def FPR16 : RegisterClass<"ARM64", [f16], 16, (sequence "H%u", 0, 31)> { - let Size = 16; -} -def FPR32 : RegisterClass<"ARM64", [f32, i32], 32,(sequence "S%u", 0, 31)>; -def FPR64 : RegisterClass<"ARM64", [f64, i64, v2f32, v1f64, v8i8, v4i16, v2i32, - v1i64], - 64, (sequence "D%u", 0, 31)>; -// We don't (yet) have an f128 legal type, so don't use that here. We -// normalize 128-bit vectors to v2f64 for arg passing and such, so use -// that here. -def FPR128 : RegisterClass<"ARM64", - [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128], - 128, (sequence "Q%u", 0, 31)>; - -// The lower 16 vector registers. Some instructions can only take registers -// in this range. -def FPR128_lo : RegisterClass<"ARM64", - [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - 128, (trunc FPR128, 16)>; - -// Pairs, triples, and quads of 64-bit vector registers. -def DSeqPairs : RegisterTuples<[dsub0, dsub1], [(rotl FPR64, 0), (rotl FPR64, 1)]>; -def DSeqTriples : RegisterTuples<[dsub0, dsub1, dsub2], - [(rotl FPR64, 0), (rotl FPR64, 1), - (rotl FPR64, 2)]>; -def DSeqQuads : RegisterTuples<[dsub0, dsub1, dsub2, dsub3], - [(rotl FPR64, 0), (rotl FPR64, 1), - (rotl FPR64, 2), (rotl FPR64, 3)]>; -def DD : RegisterClass<"ARM64", [untyped], 64, (add DSeqPairs)> { - let Size = 128; -} -def DDD : RegisterClass<"ARM64", [untyped], 64, (add DSeqTriples)> { - let Size = 196; -} -def DDDD : RegisterClass<"ARM64", [untyped], 64, (add DSeqQuads)> { - let Size = 256; -} - -// Pairs, triples, and quads of 128-bit vector registers. -def QSeqPairs : RegisterTuples<[qsub0, qsub1], [(rotl FPR128, 0), (rotl FPR128, 1)]>; -def QSeqTriples : RegisterTuples<[qsub0, qsub1, qsub2], - [(rotl FPR128, 0), (rotl FPR128, 1), - (rotl FPR128, 2)]>; -def QSeqQuads : RegisterTuples<[qsub0, qsub1, qsub2, qsub3], - [(rotl FPR128, 0), (rotl FPR128, 1), - (rotl FPR128, 2), (rotl FPR128, 3)]>; -def QQ : RegisterClass<"ARM64", [untyped], 128, (add QSeqPairs)> { - let Size = 256; -} -def QQQ : RegisterClass<"ARM64", [untyped], 128, (add QSeqTriples)> { - let Size = 384; -} -def QQQQ : RegisterClass<"ARM64", [untyped], 128, (add QSeqQuads)> { - let Size = 512; -} - - -// Vector operand versions of the FP registers. Alternate name printing and -// assmebler matching. -def VectorReg64AsmOperand : AsmOperandClass { - let Name = "VectorReg64"; - let PredicateMethod = "isVectorReg"; -} -def VectorReg128AsmOperand : AsmOperandClass { - let Name = "VectorReg128"; - let PredicateMethod = "isVectorReg"; -} - -def V64 : RegisterOperand { - let ParserMatchClass = VectorReg64AsmOperand; -} - -def V128 : RegisterOperand { - let ParserMatchClass = VectorReg128AsmOperand; -} - -def VectorRegLoAsmOperand : AsmOperandClass { let Name = "VectorRegLo"; } -def V128_lo : RegisterOperand { - let ParserMatchClass = VectorRegLoAsmOperand; -} - -class TypedVecListAsmOperand - : AsmOperandClass { - let Name = "TypedVectorList" # count # "_" # lanes # kind; - - let PredicateMethod - = "isTypedVectorList<" # count # ", " # lanes # ", '" # kind # "'>"; - let RenderMethod = "addVectorList" # regsize # "Operands<" # count # ">"; -} - -class TypedVecListRegOperand - : RegisterOperand">; - -multiclass VectorList { - // With implicit types (probably on instruction instead). E.g. { v0, v1 } - def _64AsmOperand : AsmOperandClass { - let Name = NAME # "64"; - let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">"; - let RenderMethod = "addVectorList64Operands<" # count # ">"; - } - - def "64" : RegisterOperand { - let ParserMatchClass = !cast(NAME # "_64AsmOperand"); - } - - def _128AsmOperand : AsmOperandClass { - let Name = NAME # "128"; - let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">"; - let RenderMethod = "addVectorList128Operands<" # count # ">"; - } - - def "128" : RegisterOperand { - let ParserMatchClass = !cast(NAME # "_128AsmOperand"); - } - - // 64-bit register lists with explicit type. - - // { v0.8b, v1.8b } - def _8bAsmOperand : TypedVecListAsmOperand; - def "8b" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_8bAsmOperand"); - } - - // { v0.4h, v1.4h } - def _4hAsmOperand : TypedVecListAsmOperand; - def "4h" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_4hAsmOperand"); - } - - // { v0.2s, v1.2s } - def _2sAsmOperand : TypedVecListAsmOperand; - def "2s" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_2sAsmOperand"); - } - - // { v0.1d, v1.1d } - def _1dAsmOperand : TypedVecListAsmOperand; - def "1d" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_1dAsmOperand"); - } - - // 128-bit register lists with explicit type - - // { v0.16b, v1.16b } - def _16bAsmOperand : TypedVecListAsmOperand; - def "16b" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_16bAsmOperand"); - } - - // { v0.8h, v1.8h } - def _8hAsmOperand : TypedVecListAsmOperand; - def "8h" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_8hAsmOperand"); - } - - // { v0.4s, v1.4s } - def _4sAsmOperand : TypedVecListAsmOperand; - def "4s" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_4sAsmOperand"); - } - - // { v0.2d, v1.2d } - def _2dAsmOperand : TypedVecListAsmOperand; - def "2d" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_2dAsmOperand"); - } - - // { v0.b, v1.b } - def _bAsmOperand : TypedVecListAsmOperand; - def "b" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_bAsmOperand"); - } - - // { v0.h, v1.h } - def _hAsmOperand : TypedVecListAsmOperand; - def "h" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_hAsmOperand"); - } - - // { v0.s, v1.s } - def _sAsmOperand : TypedVecListAsmOperand; - def "s" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_sAsmOperand"); - } - - // { v0.d, v1.d } - def _dAsmOperand : TypedVecListAsmOperand; - def "d" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_dAsmOperand"); - } - - -} - -defm VecListOne : VectorList<1, FPR64, FPR128>; -defm VecListTwo : VectorList<2, DD, QQ>; -defm VecListThree : VectorList<3, DDD, QQQ>; -defm VecListFour : VectorList<4, DDDD, QQQQ>; - - -// Register operand versions of the scalar FP registers. -def FPR16Op : RegisterOperand; -def FPR32Op : RegisterOperand; -def FPR64Op : RegisterOperand; -def FPR128Op : RegisterOperand; diff --git a/lib/Target/ARM64/ARM64TargetMachine.h b/lib/Target/ARM64/ARM64TargetMachine.h deleted file mode 100644 index 730ffcaaf6d5..000000000000 --- a/lib/Target/ARM64/ARM64TargetMachine.h +++ /dev/null @@ -1,92 +0,0 @@ -//===-- ARM64TargetMachine.h - Define TargetMachine for ARM64 ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the ARM64 specific subclass of TargetMachine. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM64TARGETMACHINE_H -#define ARM64TARGETMACHINE_H - -#include "ARM64InstrInfo.h" -#include "ARM64ISelLowering.h" -#include "ARM64Subtarget.h" -#include "ARM64FrameLowering.h" -#include "ARM64SelectionDAGInfo.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/MC/MCStreamer.h" - -namespace llvm { - -class ARM64TargetMachine : public LLVMTargetMachine { -protected: - ARM64Subtarget Subtarget; - -private: - const DataLayout DL; - ARM64InstrInfo InstrInfo; - ARM64TargetLowering TLInfo; - ARM64FrameLowering FrameLowering; - ARM64SelectionDAGInfo TSInfo; - -public: - ARM64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - const TargetOptions &Options, Reloc::Model RM, - CodeModel::Model CM, CodeGenOpt::Level OL, - bool IsLittleEndian); - - const ARM64Subtarget *getSubtargetImpl() const override { return &Subtarget; } - const ARM64TargetLowering *getTargetLowering() const override { - return &TLInfo; - } - const DataLayout *getDataLayout() const override { return &DL; } - const ARM64FrameLowering *getFrameLowering() const override { - return &FrameLowering; - } - const ARM64InstrInfo *getInstrInfo() const override { return &InstrInfo; } - const ARM64RegisterInfo *getRegisterInfo() const override { - return &InstrInfo.getRegisterInfo(); - } - const ARM64SelectionDAGInfo *getSelectionDAGInfo() const override { - return &TSInfo; - } - - // Pass Pipeline Configuration - TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - - /// \brief Register ARM64 analysis passes with a pass manager. - void addAnalysisPasses(PassManagerBase &PM) override; -}; - -// ARM64leTargetMachine - ARM64 little endian target machine. -// -class ARM64leTargetMachine : public ARM64TargetMachine { - virtual void anchor(); -public: - ARM64leTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; - -// ARM64beTargetMachine - ARM64 big endian target machine. -// -class ARM64beTargetMachine : public ARM64TargetMachine { - virtual void anchor(); -public: - ARM64beTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; - -} // end namespace llvm - -#endif diff --git a/lib/Target/ARM64/CMakeLists.txt b/lib/Target/ARM64/CMakeLists.txt deleted file mode 100644 index 56ba3b732946..000000000000 --- a/lib/Target/ARM64/CMakeLists.txt +++ /dev/null @@ -1,51 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS ARM64.td) - -tablegen(LLVM ARM64GenRegisterInfo.inc -gen-register-info) -tablegen(LLVM ARM64GenInstrInfo.inc -gen-instr-info) -tablegen(LLVM ARM64GenMCCodeEmitter.inc -gen-emitter -mc-emitter) -tablegen(LLVM ARM64GenMCPseudoLowering.inc -gen-pseudo-lowering) -tablegen(LLVM ARM64GenAsmWriter.inc -gen-asm-writer) -tablegen(LLVM ARM64GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1) -tablegen(LLVM ARM64GenAsmMatcher.inc -gen-asm-matcher) -tablegen(LLVM ARM64GenDAGISel.inc -gen-dag-isel) -tablegen(LLVM ARM64GenFastISel.inc -gen-fast-isel) -tablegen(LLVM ARM64GenCallingConv.inc -gen-callingconv) -tablegen(LLVM ARM64GenSubtargetInfo.inc -gen-subtarget) -tablegen(LLVM ARM64GenDisassemblerTables.inc -gen-disassembler) -add_public_tablegen_target(ARM64CommonTableGen) - -add_llvm_target(ARM64CodeGen - ARM64AddressTypePromotion.cpp - ARM64AdvSIMDScalarPass.cpp - ARM64AsmPrinter.cpp - ARM64BranchRelaxation.cpp - ARM64CleanupLocalDynamicTLSPass.cpp - ARM64CollectLOH.cpp - ARM64ConditionalCompares.cpp - ARM64DeadRegisterDefinitionsPass.cpp - ARM64ExpandPseudoInsts.cpp - ARM64FastISel.cpp - ARM64FrameLowering.cpp - ARM64ISelDAGToDAG.cpp - ARM64ISelLowering.cpp - ARM64InstrInfo.cpp - ARM64LoadStoreOptimizer.cpp - ARM64MCInstLower.cpp - ARM64PromoteConstant.cpp - ARM64RegisterInfo.cpp - ARM64SelectionDAGInfo.cpp - ARM64StorePairSuppress.cpp - ARM64Subtarget.cpp - ARM64TargetMachine.cpp - ARM64TargetObjectFile.cpp - ARM64TargetTransformInfo.cpp -) - -add_dependencies(LLVMARM64CodeGen intrinsics_gen) - -add_subdirectory(TargetInfo) -add_subdirectory(AsmParser) -add_subdirectory(Disassembler) -add_subdirectory(InstPrinter) -add_subdirectory(MCTargetDesc) -add_subdirectory(Utils) diff --git a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp b/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp deleted file mode 100644 index 529b450352eb..000000000000 --- a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp +++ /dev/null @@ -1,1312 +0,0 @@ -//===-- ARM64InstPrinter.cpp - Convert ARM64 MCInst to assembly syntax ----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class prints an ARM64 MCInst to a .s file. -// -//===----------------------------------------------------------------------===// - -#include "ARM64InstPrinter.h" -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "Utils/ARM64BaseInfo.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -#define DEBUG_TYPE "asm-printer" - -#define GET_INSTRUCTION_NAME -#define PRINT_ALIAS_INSTR -#include "ARM64GenAsmWriter.inc" -#define GET_INSTRUCTION_NAME -#define PRINT_ALIAS_INSTR -#include "ARM64GenAsmWriter1.inc" - -ARM64InstPrinter::ARM64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) - : MCInstPrinter(MAI, MII, MRI) { - // Initialize the set of available features. - setAvailableFeatures(STI.getFeatureBits()); -} - -ARM64AppleInstPrinter::ARM64AppleInstPrinter(const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) - : ARM64InstPrinter(MAI, MII, MRI, STI) {} - -void ARM64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { - // This is for .cfi directives. - OS << getRegisterName(RegNo); -} - -void ARM64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { - // Check for special encodings and print the canonical alias instead. - - unsigned Opcode = MI->getOpcode(); - - if (Opcode == ARM64::SYSxt) - if (printSysAlias(MI, O)) { - printAnnotation(O, Annot); - return; - } - - // SBFM/UBFM should print to a nicer aliased form if possible. - if (Opcode == ARM64::SBFMXri || Opcode == ARM64::SBFMWri || - Opcode == ARM64::UBFMXri || Opcode == ARM64::UBFMWri) { - const MCOperand &Op0 = MI->getOperand(0); - const MCOperand &Op1 = MI->getOperand(1); - const MCOperand &Op2 = MI->getOperand(2); - const MCOperand &Op3 = MI->getOperand(3); - - bool IsSigned = (Opcode == ARM64::SBFMXri || Opcode == ARM64::SBFMWri); - bool Is64Bit = (Opcode == ARM64::SBFMXri || Opcode == ARM64::UBFMXri); - if (Op2.isImm() && Op2.getImm() == 0 && Op3.isImm()) { - const char *AsmMnemonic = nullptr; - - switch (Op3.getImm()) { - default: - break; - case 7: - if (IsSigned) - AsmMnemonic = "sxtb"; - else if (!Is64Bit) - AsmMnemonic = "uxtb"; - break; - case 15: - if (IsSigned) - AsmMnemonic = "sxth"; - else if (!Is64Bit) - AsmMnemonic = "uxth"; - break; - case 31: - // *xtw is only valid for signed 64-bit operations. - if (Is64Bit && IsSigned) - AsmMnemonic = "sxtw"; - break; - } - - if (AsmMnemonic) { - O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg()) - << ", " << getRegisterName(getWRegFromXReg(Op1.getReg())); - printAnnotation(O, Annot); - return; - } - } - - // All immediate shifts are aliases, implemented using the Bitfield - // instruction. In all cases the immediate shift amount shift must be in - // the range 0 to (reg.size -1). - if (Op2.isImm() && Op3.isImm()) { - const char *AsmMnemonic = nullptr; - int shift = 0; - int64_t immr = Op2.getImm(); - int64_t imms = Op3.getImm(); - if (Opcode == ARM64::UBFMWri && imms != 0x1F && ((imms + 1) == immr)) { - AsmMnemonic = "lsl"; - shift = 31 - imms; - } else if (Opcode == ARM64::UBFMXri && imms != 0x3f && - ((imms + 1 == immr))) { - AsmMnemonic = "lsl"; - shift = 63 - imms; - } else if (Opcode == ARM64::UBFMWri && imms == 0x1f) { - AsmMnemonic = "lsr"; - shift = immr; - } else if (Opcode == ARM64::UBFMXri && imms == 0x3f) { - AsmMnemonic = "lsr"; - shift = immr; - } else if (Opcode == ARM64::SBFMWri && imms == 0x1f) { - AsmMnemonic = "asr"; - shift = immr; - } else if (Opcode == ARM64::SBFMXri && imms == 0x3f) { - AsmMnemonic = "asr"; - shift = immr; - } - if (AsmMnemonic) { - O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg()) - << ", " << getRegisterName(Op1.getReg()) << ", #" << shift; - printAnnotation(O, Annot); - return; - } - } - - // SBFIZ/UBFIZ aliases - if (Op2.getImm() > Op3.getImm()) { - O << '\t' << (IsSigned ? "sbfiz" : "ubfiz") << '\t' - << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op1.getReg()) - << ", #" << (Is64Bit ? 64 : 32) - Op2.getImm() << ", #" << Op3.getImm() + 1; - printAnnotation(O, Annot); - return; - } - - // Otherwise SBFX/UBFX is the preferred form - O << '\t' << (IsSigned ? "sbfx" : "ubfx") << '\t' - << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op1.getReg()) - << ", #" << Op2.getImm() << ", #" << Op3.getImm() - Op2.getImm() + 1; - printAnnotation(O, Annot); - return; - } - - if (Opcode == ARM64::BFMXri || Opcode == ARM64::BFMWri) { - const MCOperand &Op0 = MI->getOperand(0); // Op1 == Op0 - const MCOperand &Op2 = MI->getOperand(2); - int ImmR = MI->getOperand(3).getImm(); - int ImmS = MI->getOperand(4).getImm(); - - // BFI alias - if (ImmS < ImmR) { - int BitWidth = Opcode == ARM64::BFMXri ? 64 : 32; - int LSB = (BitWidth - ImmR) % BitWidth; - int Width = ImmS + 1; - O << "\tbfi\t" << getRegisterName(Op0.getReg()) << ", " - << getRegisterName(Op2.getReg()) << ", #" << LSB << ", #" << Width; - printAnnotation(O, Annot); - return; - } - - int LSB = ImmR; - int Width = ImmS - ImmR + 1; - // Otherwise BFXIL the preferred form - O << "\tbfxil\t" - << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op2.getReg()) - << ", #" << LSB << ", #" << Width; - printAnnotation(O, Annot); - return; - } - - // Symbolic operands for MOVZ, MOVN and MOVK already imply a shift - // (e.g. :gottprel_g1: is always going to be "lsl #16") so it should not be - // printed. - if ((Opcode == ARM64::MOVZXi || Opcode == ARM64::MOVZWi || - Opcode == ARM64::MOVNXi || Opcode == ARM64::MOVNWi) && - MI->getOperand(1).isExpr()) { - if (Opcode == ARM64::MOVZXi || Opcode == ARM64::MOVZWi) - O << "\tmovz\t"; - else - O << "\tmovn\t"; - - O << getRegisterName(MI->getOperand(0).getReg()) << ", #" - << *MI->getOperand(1).getExpr(); - return; - } - - if ((Opcode == ARM64::MOVKXi || Opcode == ARM64::MOVKWi) && - MI->getOperand(2).isExpr()) { - O << "\tmovk\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #" - << *MI->getOperand(2).getExpr(); - return; - } - - if (!printAliasInstr(MI, O)) - printInstruction(MI, O); - - printAnnotation(O, Annot); -} - -static bool isTblTbxInstruction(unsigned Opcode, StringRef &Layout, - bool &IsTbx) { - switch (Opcode) { - case ARM64::TBXv8i8One: - case ARM64::TBXv8i8Two: - case ARM64::TBXv8i8Three: - case ARM64::TBXv8i8Four: - IsTbx = true; - Layout = ".8b"; - return true; - case ARM64::TBLv8i8One: - case ARM64::TBLv8i8Two: - case ARM64::TBLv8i8Three: - case ARM64::TBLv8i8Four: - IsTbx = false; - Layout = ".8b"; - return true; - case ARM64::TBXv16i8One: - case ARM64::TBXv16i8Two: - case ARM64::TBXv16i8Three: - case ARM64::TBXv16i8Four: - IsTbx = true; - Layout = ".16b"; - return true; - case ARM64::TBLv16i8One: - case ARM64::TBLv16i8Two: - case ARM64::TBLv16i8Three: - case ARM64::TBLv16i8Four: - IsTbx = false; - Layout = ".16b"; - return true; - default: - return false; - } -} - -struct LdStNInstrDesc { - unsigned Opcode; - const char *Mnemonic; - const char *Layout; - int ListOperand; - bool HasLane; - int NaturalOffset; -}; - -static LdStNInstrDesc LdStNInstInfo[] = { - { ARM64::LD1i8, "ld1", ".b", 1, true, 0 }, - { ARM64::LD1i16, "ld1", ".h", 1, true, 0 }, - { ARM64::LD1i32, "ld1", ".s", 1, true, 0 }, - { ARM64::LD1i64, "ld1", ".d", 1, true, 0 }, - { ARM64::LD1i8_POST, "ld1", ".b", 2, true, 1 }, - { ARM64::LD1i16_POST, "ld1", ".h", 2, true, 2 }, - { ARM64::LD1i32_POST, "ld1", ".s", 2, true, 4 }, - { ARM64::LD1i64_POST, "ld1", ".d", 2, true, 8 }, - { ARM64::LD1Rv16b, "ld1r", ".16b", 0, false, 0 }, - { ARM64::LD1Rv8h, "ld1r", ".8h", 0, false, 0 }, - { ARM64::LD1Rv4s, "ld1r", ".4s", 0, false, 0 }, - { ARM64::LD1Rv2d, "ld1r", ".2d", 0, false, 0 }, - { ARM64::LD1Rv8b, "ld1r", ".8b", 0, false, 0 }, - { ARM64::LD1Rv4h, "ld1r", ".4h", 0, false, 0 }, - { ARM64::LD1Rv2s, "ld1r", ".2s", 0, false, 0 }, - { ARM64::LD1Rv1d, "ld1r", ".1d", 0, false, 0 }, - { ARM64::LD1Rv16b_POST, "ld1r", ".16b", 1, false, 1 }, - { ARM64::LD1Rv8h_POST, "ld1r", ".8h", 1, false, 2 }, - { ARM64::LD1Rv4s_POST, "ld1r", ".4s", 1, false, 4 }, - { ARM64::LD1Rv2d_POST, "ld1r", ".2d", 1, false, 8 }, - { ARM64::LD1Rv8b_POST, "ld1r", ".8b", 1, false, 1 }, - { ARM64::LD1Rv4h_POST, "ld1r", ".4h", 1, false, 2 }, - { ARM64::LD1Rv2s_POST, "ld1r", ".2s", 1, false, 4 }, - { ARM64::LD1Rv1d_POST, "ld1r", ".1d", 1, false, 8 }, - { ARM64::LD1Onev16b, "ld1", ".16b", 0, false, 0 }, - { ARM64::LD1Onev8h, "ld1", ".8h", 0, false, 0 }, - { ARM64::LD1Onev4s, "ld1", ".4s", 0, false, 0 }, - { ARM64::LD1Onev2d, "ld1", ".2d", 0, false, 0 }, - { ARM64::LD1Onev8b, "ld1", ".8b", 0, false, 0 }, - { ARM64::LD1Onev4h, "ld1", ".4h", 0, false, 0 }, - { ARM64::LD1Onev2s, "ld1", ".2s", 0, false, 0 }, - { ARM64::LD1Onev1d, "ld1", ".1d", 0, false, 0 }, - { ARM64::LD1Onev16b_POST, "ld1", ".16b", 1, false, 16 }, - { ARM64::LD1Onev8h_POST, "ld1", ".8h", 1, false, 16 }, - { ARM64::LD1Onev4s_POST, "ld1", ".4s", 1, false, 16 }, - { ARM64::LD1Onev2d_POST, "ld1", ".2d", 1, false, 16 }, - { ARM64::LD1Onev8b_POST, "ld1", ".8b", 1, false, 8 }, - { ARM64::LD1Onev4h_POST, "ld1", ".4h", 1, false, 8 }, - { ARM64::LD1Onev2s_POST, "ld1", ".2s", 1, false, 8 }, - { ARM64::LD1Onev1d_POST, "ld1", ".1d", 1, false, 8 }, - { ARM64::LD1Twov16b, "ld1", ".16b", 0, false, 0 }, - { ARM64::LD1Twov8h, "ld1", ".8h", 0, false, 0 }, - { ARM64::LD1Twov4s, "ld1", ".4s", 0, false, 0 }, - { ARM64::LD1Twov2d, "ld1", ".2d", 0, false, 0 }, - { ARM64::LD1Twov8b, "ld1", ".8b", 0, false, 0 }, - { ARM64::LD1Twov4h, "ld1", ".4h", 0, false, 0 }, - { ARM64::LD1Twov2s, "ld1", ".2s", 0, false, 0 }, - { ARM64::LD1Twov1d, "ld1", ".1d", 0, false, 0 }, - { ARM64::LD1Twov16b_POST, "ld1", ".16b", 1, false, 32 }, - { ARM64::LD1Twov8h_POST, "ld1", ".8h", 1, false, 32 }, - { ARM64::LD1Twov4s_POST, "ld1", ".4s", 1, false, 32 }, - { ARM64::LD1Twov2d_POST, "ld1", ".2d", 1, false, 32 }, - { ARM64::LD1Twov8b_POST, "ld1", ".8b", 1, false, 16 }, - { ARM64::LD1Twov4h_POST, "ld1", ".4h", 1, false, 16 }, - { ARM64::LD1Twov2s_POST, "ld1", ".2s", 1, false, 16 }, - { ARM64::LD1Twov1d_POST, "ld1", ".1d", 1, false, 16 }, - { ARM64::LD1Threev16b, "ld1", ".16b", 0, false, 0 }, - { ARM64::LD1Threev8h, "ld1", ".8h", 0, false, 0 }, - { ARM64::LD1Threev4s, "ld1", ".4s", 0, false, 0 }, - { ARM64::LD1Threev2d, "ld1", ".2d", 0, false, 0 }, - { ARM64::LD1Threev8b, "ld1", ".8b", 0, false, 0 }, - { ARM64::LD1Threev4h, "ld1", ".4h", 0, false, 0 }, - { ARM64::LD1Threev2s, "ld1", ".2s", 0, false, 0 }, - { ARM64::LD1Threev1d, "ld1", ".1d", 0, false, 0 }, - { ARM64::LD1Threev16b_POST, "ld1", ".16b", 1, false, 48 }, - { ARM64::LD1Threev8h_POST, "ld1", ".8h", 1, false, 48 }, - { ARM64::LD1Threev4s_POST, "ld1", ".4s", 1, false, 48 }, - { ARM64::LD1Threev2d_POST, "ld1", ".2d", 1, false, 48 }, - { ARM64::LD1Threev8b_POST, "ld1", ".8b", 1, false, 24 }, - { ARM64::LD1Threev4h_POST, "ld1", ".4h", 1, false, 24 }, - { ARM64::LD1Threev2s_POST, "ld1", ".2s", 1, false, 24 }, - { ARM64::LD1Threev1d_POST, "ld1", ".1d", 1, false, 24 }, - { ARM64::LD1Fourv16b, "ld1", ".16b", 0, false, 0 }, - { ARM64::LD1Fourv8h, "ld1", ".8h", 0, false, 0 }, - { ARM64::LD1Fourv4s, "ld1", ".4s", 0, false, 0 }, - { ARM64::LD1Fourv2d, "ld1", ".2d", 0, false, 0 }, - { ARM64::LD1Fourv8b, "ld1", ".8b", 0, false, 0 }, - { ARM64::LD1Fourv4h, "ld1", ".4h", 0, false, 0 }, - { ARM64::LD1Fourv2s, "ld1", ".2s", 0, false, 0 }, - { ARM64::LD1Fourv1d, "ld1", ".1d", 0, false, 0 }, - { ARM64::LD1Fourv16b_POST, "ld1", ".16b", 1, false, 64 }, - { ARM64::LD1Fourv8h_POST, "ld1", ".8h", 1, false, 64 }, - { ARM64::LD1Fourv4s_POST, "ld1", ".4s", 1, false, 64 }, - { ARM64::LD1Fourv2d_POST, "ld1", ".2d", 1, false, 64 }, - { ARM64::LD1Fourv8b_POST, "ld1", ".8b", 1, false, 32 }, - { ARM64::LD1Fourv4h_POST, "ld1", ".4h", 1, false, 32 }, - { ARM64::LD1Fourv2s_POST, "ld1", ".2s", 1, false, 32 }, - { ARM64::LD1Fourv1d_POST, "ld1", ".1d", 1, false, 32 }, - { ARM64::LD2i8, "ld2", ".b", 1, true, 0 }, - { ARM64::LD2i16, "ld2", ".h", 1, true, 0 }, - { ARM64::LD2i32, "ld2", ".s", 1, true, 0 }, - { ARM64::LD2i64, "ld2", ".d", 1, true, 0 }, - { ARM64::LD2i8_POST, "ld2", ".b", 2, true, 2 }, - { ARM64::LD2i16_POST, "ld2", ".h", 2, true, 4 }, - { ARM64::LD2i32_POST, "ld2", ".s", 2, true, 8 }, - { ARM64::LD2i64_POST, "ld2", ".d", 2, true, 16 }, - { ARM64::LD2Rv16b, "ld2r", ".16b", 0, false, 0 }, - { ARM64::LD2Rv8h, "ld2r", ".8h", 0, false, 0 }, - { ARM64::LD2Rv4s, "ld2r", ".4s", 0, false, 0 }, - { ARM64::LD2Rv2d, "ld2r", ".2d", 0, false, 0 }, - { ARM64::LD2Rv8b, "ld2r", ".8b", 0, false, 0 }, - { ARM64::LD2Rv4h, "ld2r", ".4h", 0, false, 0 }, - { ARM64::LD2Rv2s, "ld2r", ".2s", 0, false, 0 }, - { ARM64::LD2Rv1d, "ld2r", ".1d", 0, false, 0 }, - { ARM64::LD2Rv16b_POST, "ld2r", ".16b", 1, false, 2 }, - { ARM64::LD2Rv8h_POST, "ld2r", ".8h", 1, false, 4 }, - { ARM64::LD2Rv4s_POST, "ld2r", ".4s", 1, false, 8 }, - { ARM64::LD2Rv2d_POST, "ld2r", ".2d", 1, false, 16 }, - { ARM64::LD2Rv8b_POST, "ld2r", ".8b", 1, false, 2 }, - { ARM64::LD2Rv4h_POST, "ld2r", ".4h", 1, false, 4 }, - { ARM64::LD2Rv2s_POST, "ld2r", ".2s", 1, false, 8 }, - { ARM64::LD2Rv1d_POST, "ld2r", ".1d", 1, false, 16 }, - { ARM64::LD2Twov16b, "ld2", ".16b", 0, false, 0 }, - { ARM64::LD2Twov8h, "ld2", ".8h", 0, false, 0 }, - { ARM64::LD2Twov4s, "ld2", ".4s", 0, false, 0 }, - { ARM64::LD2Twov2d, "ld2", ".2d", 0, false, 0 }, - { ARM64::LD2Twov8b, "ld2", ".8b", 0, false, 0 }, - { ARM64::LD2Twov4h, "ld2", ".4h", 0, false, 0 }, - { ARM64::LD2Twov2s, "ld2", ".2s", 0, false, 0 }, - { ARM64::LD2Twov16b_POST, "ld2", ".16b", 1, false, 32 }, - { ARM64::LD2Twov8h_POST, "ld2", ".8h", 1, false, 32 }, - { ARM64::LD2Twov4s_POST, "ld2", ".4s", 1, false, 32 }, - { ARM64::LD2Twov2d_POST, "ld2", ".2d", 1, false, 32 }, - { ARM64::LD2Twov8b_POST, "ld2", ".8b", 1, false, 16 }, - { ARM64::LD2Twov4h_POST, "ld2", ".4h", 1, false, 16 }, - { ARM64::LD2Twov2s_POST, "ld2", ".2s", 1, false, 16 }, - { ARM64::LD3i8, "ld3", ".b", 1, true, 0 }, - { ARM64::LD3i16, "ld3", ".h", 1, true, 0 }, - { ARM64::LD3i32, "ld3", ".s", 1, true, 0 }, - { ARM64::LD3i64, "ld3", ".d", 1, true, 0 }, - { ARM64::LD3i8_POST, "ld3", ".b", 2, true, 3 }, - { ARM64::LD3i16_POST, "ld3", ".h", 2, true, 6 }, - { ARM64::LD3i32_POST, "ld3", ".s", 2, true, 12 }, - { ARM64::LD3i64_POST, "ld3", ".d", 2, true, 24 }, - { ARM64::LD3Rv16b, "ld3r", ".16b", 0, false, 0 }, - { ARM64::LD3Rv8h, "ld3r", ".8h", 0, false, 0 }, - { ARM64::LD3Rv4s, "ld3r", ".4s", 0, false, 0 }, - { ARM64::LD3Rv2d, "ld3r", ".2d", 0, false, 0 }, - { ARM64::LD3Rv8b, "ld3r", ".8b", 0, false, 0 }, - { ARM64::LD3Rv4h, "ld3r", ".4h", 0, false, 0 }, - { ARM64::LD3Rv2s, "ld3r", ".2s", 0, false, 0 }, - { ARM64::LD3Rv1d, "ld3r", ".1d", 0, false, 0 }, - { ARM64::LD3Rv16b_POST, "ld3r", ".16b", 1, false, 3 }, - { ARM64::LD3Rv8h_POST, "ld3r", ".8h", 1, false, 6 }, - { ARM64::LD3Rv4s_POST, "ld3r", ".4s", 1, false, 12 }, - { ARM64::LD3Rv2d_POST, "ld3r", ".2d", 1, false, 24 }, - { ARM64::LD3Rv8b_POST, "ld3r", ".8b", 1, false, 3 }, - { ARM64::LD3Rv4h_POST, "ld3r", ".4h", 1, false, 6 }, - { ARM64::LD3Rv2s_POST, "ld3r", ".2s", 1, false, 12 }, - { ARM64::LD3Rv1d_POST, "ld3r", ".1d", 1, false, 24 }, - { ARM64::LD3Threev16b, "ld3", ".16b", 0, false, 0 }, - { ARM64::LD3Threev8h, "ld3", ".8h", 0, false, 0 }, - { ARM64::LD3Threev4s, "ld3", ".4s", 0, false, 0 }, - { ARM64::LD3Threev2d, "ld3", ".2d", 0, false, 0 }, - { ARM64::LD3Threev8b, "ld3", ".8b", 0, false, 0 }, - { ARM64::LD3Threev4h, "ld3", ".4h", 0, false, 0 }, - { ARM64::LD3Threev2s, "ld3", ".2s", 0, false, 0 }, - { ARM64::LD3Threev16b_POST, "ld3", ".16b", 1, false, 48 }, - { ARM64::LD3Threev8h_POST, "ld3", ".8h", 1, false, 48 }, - { ARM64::LD3Threev4s_POST, "ld3", ".4s", 1, false, 48 }, - { ARM64::LD3Threev2d_POST, "ld3", ".2d", 1, false, 48 }, - { ARM64::LD3Threev8b_POST, "ld3", ".8b", 1, false, 24 }, - { ARM64::LD3Threev4h_POST, "ld3", ".4h", 1, false, 24 }, - { ARM64::LD3Threev2s_POST, "ld3", ".2s", 1, false, 24 }, - { ARM64::LD4i8, "ld4", ".b", 1, true, 0 }, - { ARM64::LD4i16, "ld4", ".h", 1, true, 0 }, - { ARM64::LD4i32, "ld4", ".s", 1, true, 0 }, - { ARM64::LD4i64, "ld4", ".d", 1, true, 0 }, - { ARM64::LD4i8_POST, "ld4", ".b", 2, true, 4 }, - { ARM64::LD4i16_POST, "ld4", ".h", 2, true, 8 }, - { ARM64::LD4i32_POST, "ld4", ".s", 2, true, 16 }, - { ARM64::LD4i64_POST, "ld4", ".d", 2, true, 32 }, - { ARM64::LD4Rv16b, "ld4r", ".16b", 0, false, 0 }, - { ARM64::LD4Rv8h, "ld4r", ".8h", 0, false, 0 }, - { ARM64::LD4Rv4s, "ld4r", ".4s", 0, false, 0 }, - { ARM64::LD4Rv2d, "ld4r", ".2d", 0, false, 0 }, - { ARM64::LD4Rv8b, "ld4r", ".8b", 0, false, 0 }, - { ARM64::LD4Rv4h, "ld4r", ".4h", 0, false, 0 }, - { ARM64::LD4Rv2s, "ld4r", ".2s", 0, false, 0 }, - { ARM64::LD4Rv1d, "ld4r", ".1d", 0, false, 0 }, - { ARM64::LD4Rv16b_POST, "ld4r", ".16b", 1, false, 4 }, - { ARM64::LD4Rv8h_POST, "ld4r", ".8h", 1, false, 8 }, - { ARM64::LD4Rv4s_POST, "ld4r", ".4s", 1, false, 16 }, - { ARM64::LD4Rv2d_POST, "ld4r", ".2d", 1, false, 32 }, - { ARM64::LD4Rv8b_POST, "ld4r", ".8b", 1, false, 4 }, - { ARM64::LD4Rv4h_POST, "ld4r", ".4h", 1, false, 8 }, - { ARM64::LD4Rv2s_POST, "ld4r", ".2s", 1, false, 16 }, - { ARM64::LD4Rv1d_POST, "ld4r", ".1d", 1, false, 32 }, - { ARM64::LD4Fourv16b, "ld4", ".16b", 0, false, 0 }, - { ARM64::LD4Fourv8h, "ld4", ".8h", 0, false, 0 }, - { ARM64::LD4Fourv4s, "ld4", ".4s", 0, false, 0 }, - { ARM64::LD4Fourv2d, "ld4", ".2d", 0, false, 0 }, - { ARM64::LD4Fourv8b, "ld4", ".8b", 0, false, 0 }, - { ARM64::LD4Fourv4h, "ld4", ".4h", 0, false, 0 }, - { ARM64::LD4Fourv2s, "ld4", ".2s", 0, false, 0 }, - { ARM64::LD4Fourv16b_POST, "ld4", ".16b", 1, false, 64 }, - { ARM64::LD4Fourv8h_POST, "ld4", ".8h", 1, false, 64 }, - { ARM64::LD4Fourv4s_POST, "ld4", ".4s", 1, false, 64 }, - { ARM64::LD4Fourv2d_POST, "ld4", ".2d", 1, false, 64 }, - { ARM64::LD4Fourv8b_POST, "ld4", ".8b", 1, false, 32 }, - { ARM64::LD4Fourv4h_POST, "ld4", ".4h", 1, false, 32 }, - { ARM64::LD4Fourv2s_POST, "ld4", ".2s", 1, false, 32 }, - { ARM64::ST1i8, "st1", ".b", 0, true, 0 }, - { ARM64::ST1i16, "st1", ".h", 0, true, 0 }, - { ARM64::ST1i32, "st1", ".s", 0, true, 0 }, - { ARM64::ST1i64, "st1", ".d", 0, true, 0 }, - { ARM64::ST1i8_POST, "st1", ".b", 1, true, 1 }, - { ARM64::ST1i16_POST, "st1", ".h", 1, true, 2 }, - { ARM64::ST1i32_POST, "st1", ".s", 1, true, 4 }, - { ARM64::ST1i64_POST, "st1", ".d", 1, true, 8 }, - { ARM64::ST1Onev16b, "st1", ".16b", 0, false, 0 }, - { ARM64::ST1Onev8h, "st1", ".8h", 0, false, 0 }, - { ARM64::ST1Onev4s, "st1", ".4s", 0, false, 0 }, - { ARM64::ST1Onev2d, "st1", ".2d", 0, false, 0 }, - { ARM64::ST1Onev8b, "st1", ".8b", 0, false, 0 }, - { ARM64::ST1Onev4h, "st1", ".4h", 0, false, 0 }, - { ARM64::ST1Onev2s, "st1", ".2s", 0, false, 0 }, - { ARM64::ST1Onev1d, "st1", ".1d", 0, false, 0 }, - { ARM64::ST1Onev16b_POST, "st1", ".16b", 1, false, 16 }, - { ARM64::ST1Onev8h_POST, "st1", ".8h", 1, false, 16 }, - { ARM64::ST1Onev4s_POST, "st1", ".4s", 1, false, 16 }, - { ARM64::ST1Onev2d_POST, "st1", ".2d", 1, false, 16 }, - { ARM64::ST1Onev8b_POST, "st1", ".8b", 1, false, 8 }, - { ARM64::ST1Onev4h_POST, "st1", ".4h", 1, false, 8 }, - { ARM64::ST1Onev2s_POST, "st1", ".2s", 1, false, 8 }, - { ARM64::ST1Onev1d_POST, "st1", ".1d", 1, false, 8 }, - { ARM64::ST1Twov16b, "st1", ".16b", 0, false, 0 }, - { ARM64::ST1Twov8h, "st1", ".8h", 0, false, 0 }, - { ARM64::ST1Twov4s, "st1", ".4s", 0, false, 0 }, - { ARM64::ST1Twov2d, "st1", ".2d", 0, false, 0 }, - { ARM64::ST1Twov8b, "st1", ".8b", 0, false, 0 }, - { ARM64::ST1Twov4h, "st1", ".4h", 0, false, 0 }, - { ARM64::ST1Twov2s, "st1", ".2s", 0, false, 0 }, - { ARM64::ST1Twov1d, "st1", ".1d", 0, false, 0 }, - { ARM64::ST1Twov16b_POST, "st1", ".16b", 1, false, 32 }, - { ARM64::ST1Twov8h_POST, "st1", ".8h", 1, false, 32 }, - { ARM64::ST1Twov4s_POST, "st1", ".4s", 1, false, 32 }, - { ARM64::ST1Twov2d_POST, "st1", ".2d", 1, false, 32 }, - { ARM64::ST1Twov8b_POST, "st1", ".8b", 1, false, 16 }, - { ARM64::ST1Twov4h_POST, "st1", ".4h", 1, false, 16 }, - { ARM64::ST1Twov2s_POST, "st1", ".2s", 1, false, 16 }, - { ARM64::ST1Twov1d_POST, "st1", ".1d", 1, false, 16 }, - { ARM64::ST1Threev16b, "st1", ".16b", 0, false, 0 }, - { ARM64::ST1Threev8h, "st1", ".8h", 0, false, 0 }, - { ARM64::ST1Threev4s, "st1", ".4s", 0, false, 0 }, - { ARM64::ST1Threev2d, "st1", ".2d", 0, false, 0 }, - { ARM64::ST1Threev8b, "st1", ".8b", 0, false, 0 }, - { ARM64::ST1Threev4h, "st1", ".4h", 0, false, 0 }, - { ARM64::ST1Threev2s, "st1", ".2s", 0, false, 0 }, - { ARM64::ST1Threev1d, "st1", ".1d", 0, false, 0 }, - { ARM64::ST1Threev16b_POST, "st1", ".16b", 1, false, 48 }, - { ARM64::ST1Threev8h_POST, "st1", ".8h", 1, false, 48 }, - { ARM64::ST1Threev4s_POST, "st1", ".4s", 1, false, 48 }, - { ARM64::ST1Threev2d_POST, "st1", ".2d", 1, false, 48 }, - { ARM64::ST1Threev8b_POST, "st1", ".8b", 1, false, 24 }, - { ARM64::ST1Threev4h_POST, "st1", ".4h", 1, false, 24 }, - { ARM64::ST1Threev2s_POST, "st1", ".2s", 1, false, 24 }, - { ARM64::ST1Threev1d_POST, "st1", ".1d", 1, false, 24 }, - { ARM64::ST1Fourv16b, "st1", ".16b", 0, false, 0 }, - { ARM64::ST1Fourv8h, "st1", ".8h", 0, false, 0 }, - { ARM64::ST1Fourv4s, "st1", ".4s", 0, false, 0 }, - { ARM64::ST1Fourv2d, "st1", ".2d", 0, false, 0 }, - { ARM64::ST1Fourv8b, "st1", ".8b", 0, false, 0 }, - { ARM64::ST1Fourv4h, "st1", ".4h", 0, false, 0 }, - { ARM64::ST1Fourv2s, "st1", ".2s", 0, false, 0 }, - { ARM64::ST1Fourv1d, "st1", ".1d", 0, false, 0 }, - { ARM64::ST1Fourv16b_POST, "st1", ".16b", 1, false, 64 }, - { ARM64::ST1Fourv8h_POST, "st1", ".8h", 1, false, 64 }, - { ARM64::ST1Fourv4s_POST, "st1", ".4s", 1, false, 64 }, - { ARM64::ST1Fourv2d_POST, "st1", ".2d", 1, false, 64 }, - { ARM64::ST1Fourv8b_POST, "st1", ".8b", 1, false, 32 }, - { ARM64::ST1Fourv4h_POST, "st1", ".4h", 1, false, 32 }, - { ARM64::ST1Fourv2s_POST, "st1", ".2s", 1, false, 32 }, - { ARM64::ST1Fourv1d_POST, "st1", ".1d", 1, false, 32 }, - { ARM64::ST2i8, "st2", ".b", 0, true, 0 }, - { ARM64::ST2i16, "st2", ".h", 0, true, 0 }, - { ARM64::ST2i32, "st2", ".s", 0, true, 0 }, - { ARM64::ST2i64, "st2", ".d", 0, true, 0 }, - { ARM64::ST2i8_POST, "st2", ".b", 1, true, 2 }, - { ARM64::ST2i16_POST, "st2", ".h", 1, true, 4 }, - { ARM64::ST2i32_POST, "st2", ".s", 1, true, 8 }, - { ARM64::ST2i64_POST, "st2", ".d", 1, true, 16 }, - { ARM64::ST2Twov16b, "st2", ".16b", 0, false, 0 }, - { ARM64::ST2Twov8h, "st2", ".8h", 0, false, 0 }, - { ARM64::ST2Twov4s, "st2", ".4s", 0, false, 0 }, - { ARM64::ST2Twov2d, "st2", ".2d", 0, false, 0 }, - { ARM64::ST2Twov8b, "st2", ".8b", 0, false, 0 }, - { ARM64::ST2Twov4h, "st2", ".4h", 0, false, 0 }, - { ARM64::ST2Twov2s, "st2", ".2s", 0, false, 0 }, - { ARM64::ST2Twov16b_POST, "st2", ".16b", 1, false, 32 }, - { ARM64::ST2Twov8h_POST, "st2", ".8h", 1, false, 32 }, - { ARM64::ST2Twov4s_POST, "st2", ".4s", 1, false, 32 }, - { ARM64::ST2Twov2d_POST, "st2", ".2d", 1, false, 32 }, - { ARM64::ST2Twov8b_POST, "st2", ".8b", 1, false, 16 }, - { ARM64::ST2Twov4h_POST, "st2", ".4h", 1, false, 16 }, - { ARM64::ST2Twov2s_POST, "st2", ".2s", 1, false, 16 }, - { ARM64::ST3i8, "st3", ".b", 0, true, 0 }, - { ARM64::ST3i16, "st3", ".h", 0, true, 0 }, - { ARM64::ST3i32, "st3", ".s", 0, true, 0 }, - { ARM64::ST3i64, "st3", ".d", 0, true, 0 }, - { ARM64::ST3i8_POST, "st3", ".b", 1, true, 3 }, - { ARM64::ST3i16_POST, "st3", ".h", 1, true, 6 }, - { ARM64::ST3i32_POST, "st3", ".s", 1, true, 12 }, - { ARM64::ST3i64_POST, "st3", ".d", 1, true, 24 }, - { ARM64::ST3Threev16b, "st3", ".16b", 0, false, 0 }, - { ARM64::ST3Threev8h, "st3", ".8h", 0, false, 0 }, - { ARM64::ST3Threev4s, "st3", ".4s", 0, false, 0 }, - { ARM64::ST3Threev2d, "st3", ".2d", 0, false, 0 }, - { ARM64::ST3Threev8b, "st3", ".8b", 0, false, 0 }, - { ARM64::ST3Threev4h, "st3", ".4h", 0, false, 0 }, - { ARM64::ST3Threev2s, "st3", ".2s", 0, false, 0 }, - { ARM64::ST3Threev16b_POST, "st3", ".16b", 1, false, 48 }, - { ARM64::ST3Threev8h_POST, "st3", ".8h", 1, false, 48 }, - { ARM64::ST3Threev4s_POST, "st3", ".4s", 1, false, 48 }, - { ARM64::ST3Threev2d_POST, "st3", ".2d", 1, false, 48 }, - { ARM64::ST3Threev8b_POST, "st3", ".8b", 1, false, 24 }, - { ARM64::ST3Threev4h_POST, "st3", ".4h", 1, false, 24 }, - { ARM64::ST3Threev2s_POST, "st3", ".2s", 1, false, 24 }, - { ARM64::ST4i8, "st4", ".b", 0, true, 0 }, - { ARM64::ST4i16, "st4", ".h", 0, true, 0 }, - { ARM64::ST4i32, "st4", ".s", 0, true, 0 }, - { ARM64::ST4i64, "st4", ".d", 0, true, 0 }, - { ARM64::ST4i8_POST, "st4", ".b", 1, true, 4 }, - { ARM64::ST4i16_POST, "st4", ".h", 1, true, 8 }, - { ARM64::ST4i32_POST, "st4", ".s", 1, true, 16 }, - { ARM64::ST4i64_POST, "st4", ".d", 1, true, 32 }, - { ARM64::ST4Fourv16b, "st4", ".16b", 0, false, 0 }, - { ARM64::ST4Fourv8h, "st4", ".8h", 0, false, 0 }, - { ARM64::ST4Fourv4s, "st4", ".4s", 0, false, 0 }, - { ARM64::ST4Fourv2d, "st4", ".2d", 0, false, 0 }, - { ARM64::ST4Fourv8b, "st4", ".8b", 0, false, 0 }, - { ARM64::ST4Fourv4h, "st4", ".4h", 0, false, 0 }, - { ARM64::ST4Fourv2s, "st4", ".2s", 0, false, 0 }, - { ARM64::ST4Fourv16b_POST, "st4", ".16b", 1, false, 64 }, - { ARM64::ST4Fourv8h_POST, "st4", ".8h", 1, false, 64 }, - { ARM64::ST4Fourv4s_POST, "st4", ".4s", 1, false, 64 }, - { ARM64::ST4Fourv2d_POST, "st4", ".2d", 1, false, 64 }, - { ARM64::ST4Fourv8b_POST, "st4", ".8b", 1, false, 32 }, - { ARM64::ST4Fourv4h_POST, "st4", ".4h", 1, false, 32 }, - { ARM64::ST4Fourv2s_POST, "st4", ".2s", 1, false, 32 }, -}; - -static LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) { - unsigned Idx; - for (Idx = 0; Idx != array_lengthof(LdStNInstInfo); ++Idx) - if (LdStNInstInfo[Idx].Opcode == Opcode) - return &LdStNInstInfo[Idx]; - - return nullptr; -} - -void ARM64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { - unsigned Opcode = MI->getOpcode(); - StringRef Layout, Mnemonic; - - bool IsTbx; - if (isTblTbxInstruction(MI->getOpcode(), Layout, IsTbx)) { - O << "\t" << (IsTbx ? "tbx" : "tbl") << Layout << '\t' - << getRegisterName(MI->getOperand(0).getReg(), ARM64::vreg) << ", "; - - unsigned ListOpNum = IsTbx ? 2 : 1; - printVectorList(MI, ListOpNum, O, ""); - - O << ", " - << getRegisterName(MI->getOperand(ListOpNum + 1).getReg(), ARM64::vreg); - printAnnotation(O, Annot); - return; - } - - if (LdStNInstrDesc *LdStDesc = getLdStNInstrDesc(Opcode)) { - O << "\t" << LdStDesc->Mnemonic << LdStDesc->Layout << '\t'; - - // Now onto the operands: first a vector list with possible lane - // specifier. E.g. { v0 }[2] - int OpNum = LdStDesc->ListOperand; - printVectorList(MI, OpNum++, O, ""); - - if (LdStDesc->HasLane) - O << '[' << MI->getOperand(OpNum++).getImm() << ']'; - - // Next the address: [xN] - unsigned AddrReg = MI->getOperand(OpNum++).getReg(); - O << ", [" << getRegisterName(AddrReg) << ']'; - - // Finally, there might be a post-indexed offset. - if (LdStDesc->NaturalOffset != 0) { - unsigned Reg = MI->getOperand(OpNum++).getReg(); - if (Reg != ARM64::XZR) - O << ", " << getRegisterName(Reg); - else { - assert(LdStDesc->NaturalOffset && "no offset on post-inc instruction?"); - O << ", #" << LdStDesc->NaturalOffset; - } - } - - printAnnotation(O, Annot); - return; - } - - ARM64InstPrinter::printInst(MI, O, Annot); -} - -bool ARM64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) { -#ifndef NDEBUG - unsigned Opcode = MI->getOpcode(); - assert(Opcode == ARM64::SYSxt && "Invalid opcode for SYS alias!"); -#endif - - const char *Asm = nullptr; - const MCOperand &Op1 = MI->getOperand(0); - const MCOperand &Cn = MI->getOperand(1); - const MCOperand &Cm = MI->getOperand(2); - const MCOperand &Op2 = MI->getOperand(3); - - unsigned Op1Val = Op1.getImm(); - unsigned CnVal = Cn.getImm(); - unsigned CmVal = Cm.getImm(); - unsigned Op2Val = Op2.getImm(); - - if (CnVal == 7) { - switch (CmVal) { - default: - break; - - // IC aliases - case 1: - if (Op1Val == 0 && Op2Val == 0) - Asm = "ic\tialluis"; - break; - case 5: - if (Op1Val == 0 && Op2Val == 0) - Asm = "ic\tiallu"; - else if (Op1Val == 3 && Op2Val == 1) - Asm = "ic\tivau"; - break; - - // DC aliases - case 4: - if (Op1Val == 3 && Op2Val == 1) - Asm = "dc\tzva"; - break; - case 6: - if (Op1Val == 0 && Op2Val == 1) - Asm = "dc\tivac"; - if (Op1Val == 0 && Op2Val == 2) - Asm = "dc\tisw"; - break; - case 10: - if (Op1Val == 3 && Op2Val == 1) - Asm = "dc\tcvac"; - else if (Op1Val == 0 && Op2Val == 2) - Asm = "dc\tcsw"; - break; - case 11: - if (Op1Val == 3 && Op2Val == 1) - Asm = "dc\tcvau"; - break; - case 14: - if (Op1Val == 3 && Op2Val == 1) - Asm = "dc\tcivac"; - else if (Op1Val == 0 && Op2Val == 2) - Asm = "dc\tcisw"; - break; - - // AT aliases - case 8: - switch (Op1Val) { - default: - break; - case 0: - switch (Op2Val) { - default: - break; - case 0: Asm = "at\ts1e1r"; break; - case 1: Asm = "at\ts1e1w"; break; - case 2: Asm = "at\ts1e0r"; break; - case 3: Asm = "at\ts1e0w"; break; - } - break; - case 4: - switch (Op2Val) { - default: - break; - case 0: Asm = "at\ts1e2r"; break; - case 1: Asm = "at\ts1e2w"; break; - case 4: Asm = "at\ts12e1r"; break; - case 5: Asm = "at\ts12e1w"; break; - case 6: Asm = "at\ts12e0r"; break; - case 7: Asm = "at\ts12e0w"; break; - } - break; - case 6: - switch (Op2Val) { - default: - break; - case 0: Asm = "at\ts1e3r"; break; - case 1: Asm = "at\ts1e3w"; break; - } - break; - } - break; - } - } else if (CnVal == 8) { - // TLBI aliases - switch (CmVal) { - default: - break; - case 3: - switch (Op1Val) { - default: - break; - case 0: - switch (Op2Val) { - default: - break; - case 0: Asm = "tlbi\tvmalle1is"; break; - case 1: Asm = "tlbi\tvae1is"; break; - case 2: Asm = "tlbi\taside1is"; break; - case 3: Asm = "tlbi\tvaae1is"; break; - case 5: Asm = "tlbi\tvale1is"; break; - case 7: Asm = "tlbi\tvaale1is"; break; - } - break; - case 4: - switch (Op2Val) { - default: - break; - case 0: Asm = "tlbi\talle2is"; break; - case 1: Asm = "tlbi\tvae2is"; break; - case 4: Asm = "tlbi\talle1is"; break; - case 5: Asm = "tlbi\tvale2is"; break; - case 6: Asm = "tlbi\tvmalls12e1is"; break; - } - break; - case 6: - switch (Op2Val) { - default: - break; - case 0: Asm = "tlbi\talle3is"; break; - case 1: Asm = "tlbi\tvae3is"; break; - case 5: Asm = "tlbi\tvale3is"; break; - } - break; - } - break; - case 0: - switch (Op1Val) { - default: - break; - case 4: - switch (Op2Val) { - default: - break; - case 1: Asm = "tlbi\tipas2e1is"; break; - case 5: Asm = "tlbi\tipas2le1is"; break; - } - break; - } - break; - case 4: - switch (Op1Val) { - default: - break; - case 4: - switch (Op2Val) { - default: - break; - case 1: Asm = "tlbi\tipas2e1"; break; - case 5: Asm = "tlbi\tipas2le1"; break; - } - break; - } - break; - case 7: - switch (Op1Val) { - default: - break; - case 0: - switch (Op2Val) { - default: - break; - case 0: Asm = "tlbi\tvmalle1"; break; - case 1: Asm = "tlbi\tvae1"; break; - case 2: Asm = "tlbi\taside1"; break; - case 3: Asm = "tlbi\tvaae1"; break; - case 5: Asm = "tlbi\tvale1"; break; - case 7: Asm = "tlbi\tvaale1"; break; - } - break; - case 4: - switch (Op2Val) { - default: - break; - case 0: Asm = "tlbi\talle2"; break; - case 1: Asm = "tlbi\tvae2"; break; - case 4: Asm = "tlbi\talle1"; break; - case 5: Asm = "tlbi\tvale2"; break; - case 6: Asm = "tlbi\tvmalls12e1"; break; - } - break; - case 6: - switch (Op2Val) { - default: - break; - case 0: Asm = "tlbi\talle3"; break; - case 1: Asm = "tlbi\tvae3"; break; - case 5: Asm = "tlbi\tvale3"; break; - } - break; - } - break; - } - } - - if (Asm) { - unsigned Reg = MI->getOperand(4).getReg(); - - O << '\t' << Asm; - if (StringRef(Asm).lower().find("all") == StringRef::npos) - O << ", " << getRegisterName(Reg); - } - - return Asm != nullptr; -} - -void ARM64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - if (Op.isReg()) { - unsigned Reg = Op.getReg(); - O << getRegisterName(Reg); - } else if (Op.isImm()) { - O << '#' << Op.getImm(); - } else { - assert(Op.isExpr() && "unknown operand kind in printOperand"); - O << *Op.getExpr(); - } -} - -void ARM64InstPrinter::printHexImm(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - O << format("#%#llx", Op.getImm()); -} - -void ARM64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo, - unsigned Imm, raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - if (Op.isReg()) { - unsigned Reg = Op.getReg(); - if (Reg == ARM64::XZR) - O << "#" << Imm; - else - O << getRegisterName(Reg); - } else - assert(0 && "unknown operand kind in printPostIncOperand64"); -} - -void ARM64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - assert(Op.isReg() && "Non-register vreg operand!"); - unsigned Reg = Op.getReg(); - O << getRegisterName(Reg, ARM64::vreg); -} - -void ARM64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - assert(Op.isImm() && "System instruction C[nm] operands must be immediates!"); - O << "c" << Op.getImm(); -} - -void ARM64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - if (MO.isImm()) { - unsigned Val = (MO.getImm() & 0xfff); - assert(Val == MO.getImm() && "Add/sub immediate out of range!"); - unsigned Shift = - ARM64_AM::getShiftValue(MI->getOperand(OpNum + 1).getImm()); - O << '#' << Val; - if (Shift != 0) - printShifter(MI, OpNum + 1, O); - - if (CommentStream) - *CommentStream << '=' << (Val << Shift) << '\n'; - } else { - assert(MO.isExpr() && "Unexpected operand type!"); - O << *MO.getExpr(); - printShifter(MI, OpNum + 1, O); - } -} - -void ARM64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - uint64_t Val = MI->getOperand(OpNum).getImm(); - O << "#0x"; - O.write_hex(ARM64_AM::decodeLogicalImmediate(Val, 32)); -} - -void ARM64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - uint64_t Val = MI->getOperand(OpNum).getImm(); - O << "#0x"; - O.write_hex(ARM64_AM::decodeLogicalImmediate(Val, 64)); -} - -void ARM64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - unsigned Val = MI->getOperand(OpNum).getImm(); - // LSL #0 should not be printed. - if (ARM64_AM::getShiftType(Val) == ARM64_AM::LSL && - ARM64_AM::getShiftValue(Val) == 0) - return; - O << ", " << ARM64_AM::getShiftExtendName(ARM64_AM::getShiftType(Val)) << " #" - << ARM64_AM::getShiftValue(Val); -} - -void ARM64InstPrinter::printShiftedRegister(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << getRegisterName(MI->getOperand(OpNum).getReg()); - printShifter(MI, OpNum + 1, O); -} - -void ARM64InstPrinter::printExtendedRegister(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << getRegisterName(MI->getOperand(OpNum).getReg()); - printArithExtend(MI, OpNum + 1, O); -} - -void ARM64InstPrinter::printArithExtend(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - unsigned Val = MI->getOperand(OpNum).getImm(); - ARM64_AM::ShiftExtendType ExtType = ARM64_AM::getArithExtendType(Val); - unsigned ShiftVal = ARM64_AM::getArithShiftValue(Val); - - // If the destination or first source register operand is [W]SP, print - // UXTW/UXTX as LSL, and if the shift amount is also zero, print nothing at - // all. - if (ExtType == ARM64_AM::UXTW || ExtType == ARM64_AM::UXTX) { - unsigned Dest = MI->getOperand(0).getReg(); - unsigned Src1 = MI->getOperand(1).getReg(); - if ( ((Dest == ARM64::SP || Src1 == ARM64::SP) && - ExtType == ARM64_AM::UXTX) || - ((Dest == ARM64::WSP || Src1 == ARM64::WSP) && - ExtType == ARM64_AM::UXTW) ) { - if (ShiftVal != 0) - O << ", lsl #" << ShiftVal; - return; - } - } - O << ", " << ARM64_AM::getShiftExtendName(ExtType); - if (ShiftVal != 0) - O << " #" << ShiftVal; -} - -void ARM64InstPrinter::printMemExtend(const MCInst *MI, unsigned OpNum, - raw_ostream &O, char SrcRegKind, - unsigned Width) { - unsigned SignExtend = MI->getOperand(OpNum).getImm(); - unsigned DoShift = MI->getOperand(OpNum + 1).getImm(); - - // sxtw, sxtx, uxtw or lsl (== uxtx) - bool IsLSL = !SignExtend && SrcRegKind == 'x'; - if (IsLSL) - O << "lsl"; - else - O << (SignExtend ? 's' : 'u') << "xt" << SrcRegKind; - - if (DoShift || IsLSL) - O << " #" << Log2_32(Width / 8); -} - -void ARM64InstPrinter::printCondCode(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - ARM64CC::CondCode CC = (ARM64CC::CondCode)MI->getOperand(OpNum).getImm(); - O << ARM64CC::getCondCodeName(CC); -} - -void ARM64InstPrinter::printInverseCondCode(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - ARM64CC::CondCode CC = (ARM64CC::CondCode)MI->getOperand(OpNum).getImm(); - O << ARM64CC::getCondCodeName(ARM64CC::getInvertedCondCode(CC)); -} - -void ARM64InstPrinter::printAMNoIndex(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']'; -} - -template -void ARM64InstPrinter::printImmScale(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << '#' << Scale * MI->getOperand(OpNum).getImm(); -} - -void ARM64InstPrinter::printUImm12Offset(const MCInst *MI, unsigned OpNum, - unsigned Scale, raw_ostream &O) { - const MCOperand MO = MI->getOperand(OpNum); - if (MO.isImm()) { - O << "#" << (MO.getImm() * Scale); - } else { - assert(MO.isExpr() && "Unexpected operand type!"); - O << *MO.getExpr(); - } -} - -void ARM64InstPrinter::printAMIndexedWB(const MCInst *MI, unsigned OpNum, - unsigned Scale, raw_ostream &O) { - const MCOperand MO1 = MI->getOperand(OpNum + 1); - O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()); - if (MO1.isImm()) { - O << ", #" << (MO1.getImm() * Scale); - } else { - assert(MO1.isExpr() && "Unexpected operand type!"); - O << ", " << *MO1.getExpr(); - } - O << ']'; -} - -void ARM64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - unsigned prfop = MI->getOperand(OpNum).getImm(); - bool Valid; - StringRef Name = ARM64PRFM::PRFMMapper().toString(prfop, Valid); - if (Valid) - O << Name; - else - O << '#' << prfop; -} - -void ARM64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - float FPImm = MO.isFPImm() ? MO.getFPImm() : ARM64_AM::getFPImmFloat(MO.getImm()); - - // 8 decimal places are enough to perfectly represent permitted floats. - O << format("#%.8f", FPImm); -} - -static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) { - while (Stride--) { - switch (Reg) { - default: - assert(0 && "Vector register expected!"); - case ARM64::Q0: Reg = ARM64::Q1; break; - case ARM64::Q1: Reg = ARM64::Q2; break; - case ARM64::Q2: Reg = ARM64::Q3; break; - case ARM64::Q3: Reg = ARM64::Q4; break; - case ARM64::Q4: Reg = ARM64::Q5; break; - case ARM64::Q5: Reg = ARM64::Q6; break; - case ARM64::Q6: Reg = ARM64::Q7; break; - case ARM64::Q7: Reg = ARM64::Q8; break; - case ARM64::Q8: Reg = ARM64::Q9; break; - case ARM64::Q9: Reg = ARM64::Q10; break; - case ARM64::Q10: Reg = ARM64::Q11; break; - case ARM64::Q11: Reg = ARM64::Q12; break; - case ARM64::Q12: Reg = ARM64::Q13; break; - case ARM64::Q13: Reg = ARM64::Q14; break; - case ARM64::Q14: Reg = ARM64::Q15; break; - case ARM64::Q15: Reg = ARM64::Q16; break; - case ARM64::Q16: Reg = ARM64::Q17; break; - case ARM64::Q17: Reg = ARM64::Q18; break; - case ARM64::Q18: Reg = ARM64::Q19; break; - case ARM64::Q19: Reg = ARM64::Q20; break; - case ARM64::Q20: Reg = ARM64::Q21; break; - case ARM64::Q21: Reg = ARM64::Q22; break; - case ARM64::Q22: Reg = ARM64::Q23; break; - case ARM64::Q23: Reg = ARM64::Q24; break; - case ARM64::Q24: Reg = ARM64::Q25; break; - case ARM64::Q25: Reg = ARM64::Q26; break; - case ARM64::Q26: Reg = ARM64::Q27; break; - case ARM64::Q27: Reg = ARM64::Q28; break; - case ARM64::Q28: Reg = ARM64::Q29; break; - case ARM64::Q29: Reg = ARM64::Q30; break; - case ARM64::Q30: Reg = ARM64::Q31; break; - // Vector lists can wrap around. - case ARM64::Q31: - Reg = ARM64::Q0; - break; - } - } - return Reg; -} - -void ARM64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum, - raw_ostream &O, StringRef LayoutSuffix) { - unsigned Reg = MI->getOperand(OpNum).getReg(); - - O << "{ "; - - // Work out how many registers there are in the list (if there is an actual - // list). - unsigned NumRegs = 1; - if (MRI.getRegClass(ARM64::DDRegClassID).contains(Reg) || - MRI.getRegClass(ARM64::QQRegClassID).contains(Reg)) - NumRegs = 2; - else if (MRI.getRegClass(ARM64::DDDRegClassID).contains(Reg) || - MRI.getRegClass(ARM64::QQQRegClassID).contains(Reg)) - NumRegs = 3; - else if (MRI.getRegClass(ARM64::DDDDRegClassID).contains(Reg) || - MRI.getRegClass(ARM64::QQQQRegClassID).contains(Reg)) - NumRegs = 4; - - // Now forget about the list and find out what the first register is. - if (unsigned FirstReg = MRI.getSubReg(Reg, ARM64::dsub0)) - Reg = FirstReg; - else if (unsigned FirstReg = MRI.getSubReg(Reg, ARM64::qsub0)) - Reg = FirstReg; - - // If it's a D-reg, we need to promote it to the equivalent Q-reg before - // printing (otherwise getRegisterName fails). - if (MRI.getRegClass(ARM64::FPR64RegClassID).contains(Reg)) { - const MCRegisterClass &FPR128RC = MRI.getRegClass(ARM64::FPR128RegClassID); - Reg = MRI.getMatchingSuperReg(Reg, ARM64::dsub, &FPR128RC); - } - - for (unsigned i = 0; i < NumRegs; ++i, Reg = getNextVectorRegister(Reg)) { - O << getRegisterName(Reg, ARM64::vreg) << LayoutSuffix; - if (i + 1 != NumRegs) - O << ", "; - } - - O << " }"; -} - -void ARM64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - printVectorList(MI, OpNum, O, ""); -} - -template -void ARM64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - std::string Suffix("."); - if (NumLanes) - Suffix += itostr(NumLanes) + LaneKind; - else - Suffix += LaneKind; - - printVectorList(MI, OpNum, O, Suffix); -} - -void ARM64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << "[" << MI->getOperand(OpNum).getImm() << "]"; -} - -void ARM64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNum); - - // If the label has already been resolved to an immediate offset (say, when - // we're running the disassembler), just print the immediate. - if (Op.isImm()) { - O << "#" << (Op.getImm() << 2); - return; - } - - // If the branch target is simply an address then print it in hex. - const MCConstantExpr *BranchTarget = - dyn_cast(MI->getOperand(OpNum).getExpr()); - int64_t Address; - if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { - O << "0x"; - O.write_hex(Address); - } else { - // Otherwise, just print the expression. - O << *MI->getOperand(OpNum).getExpr(); - } -} - -void ARM64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNum); - - // If the label has already been resolved to an immediate offset (say, when - // we're running the disassembler), just print the immediate. - if (Op.isImm()) { - O << "#" << (Op.getImm() << 12); - return; - } - - // Otherwise, just print the expression. - O << *MI->getOperand(OpNum).getExpr(); -} - -void ARM64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - unsigned Val = MI->getOperand(OpNo).getImm(); - unsigned Opcode = MI->getOpcode(); - - bool Valid; - StringRef Name; - if (Opcode == ARM64::ISB) - Name = ARM64ISB::ISBMapper().toString(Val, Valid); - else - Name = ARM64DB::DBarrierMapper().toString(Val, Valid); - if (Valid) - O << Name; - else - O << "#" << Val; -} - -void ARM64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - unsigned Val = MI->getOperand(OpNo).getImm(); - - bool Valid; - auto Mapper = ARM64SysReg::MRSMapper(getAvailableFeatures()); - std::string Name = Mapper.toString(Val, Valid); - - if (Valid) - O << StringRef(Name).upper(); -} - -void ARM64InstPrinter::printMSRSystemRegister(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - unsigned Val = MI->getOperand(OpNo).getImm(); - - bool Valid; - auto Mapper = ARM64SysReg::MSRMapper(getAvailableFeatures()); - std::string Name = Mapper.toString(Val, Valid); - - if (Valid) - O << StringRef(Name).upper(); -} - -void ARM64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - unsigned Val = MI->getOperand(OpNo).getImm(); - - bool Valid; - StringRef Name = ARM64PState::PStateMapper().toString(Val, Valid); - if (Valid) - O << StringRef(Name.str()).upper(); - else - O << "#" << Val; -} - -void ARM64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - unsigned RawVal = MI->getOperand(OpNo).getImm(); - uint64_t Val = ARM64_AM::decodeAdvSIMDModImmType10(RawVal); - O << format("#%#016llx", Val); -} diff --git a/lib/Target/ARM64/InstPrinter/CMakeLists.txt b/lib/Target/ARM64/InstPrinter/CMakeLists.txt deleted file mode 100644 index b8ee12c55412..000000000000 --- a/lib/Target/ARM64/InstPrinter/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMARM64AsmPrinter - ARM64InstPrinter.cpp - ) - -add_dependencies(LLVMARM64AsmPrinter ARM64CommonTableGen) diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64FixupKinds.h b/lib/Target/ARM64/MCTargetDesc/ARM64FixupKinds.h deleted file mode 100644 index 7106b314ea20..000000000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64FixupKinds.h +++ /dev/null @@ -1,76 +0,0 @@ -//===-- ARM64FixupKinds.h - ARM64 Specific Fixup Entries --------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ARM64FIXUPKINDS_H -#define LLVM_ARM64FIXUPKINDS_H - -#include "llvm/MC/MCFixup.h" - -namespace llvm { -namespace ARM64 { - -enum Fixups { - // fixup_arm64_pcrel_adr_imm21 - A 21-bit pc-relative immediate inserted into - // an ADR instruction. - fixup_arm64_pcrel_adr_imm21 = FirstTargetFixupKind, - - // fixup_arm64_pcrel_adrp_imm21 - A 21-bit pc-relative immediate inserted into - // an ADRP instruction. - fixup_arm64_pcrel_adrp_imm21, - - // fixup_arm64_imm12 - 12-bit fixup for add/sub instructions. - // No alignment adjustment. All value bits are encoded. - fixup_arm64_add_imm12, - - // fixup_arm64_ldst_imm12_* - unsigned 12-bit fixups for load and - // store instructions. - fixup_arm64_ldst_imm12_scale1, - fixup_arm64_ldst_imm12_scale2, - fixup_arm64_ldst_imm12_scale4, - fixup_arm64_ldst_imm12_scale8, - fixup_arm64_ldst_imm12_scale16, - - // fixup_arm64_ldr_pcrel_imm19 - The high 19 bits of a 21-bit pc-relative - // immediate. Same encoding as fixup_arm64_pcrel_adrhi, except this is used by - // pc-relative loads and generates relocations directly when necessary. - fixup_arm64_ldr_pcrel_imm19, - - // FIXME: comment - fixup_arm64_movw, - - // fixup_arm64_pcrel_imm14 - The high 14 bits of a 21-bit pc-relative - // immediate. - fixup_arm64_pcrel_branch14, - - // fixup_arm64_pcrel_branch19 - The high 19 bits of a 21-bit pc-relative - // immediate. Same encoding as fixup_arm64_pcrel_adrhi, except this is use by - // b.cc and generates relocations directly when necessary. - fixup_arm64_pcrel_branch19, - - // fixup_arm64_pcrel_branch26 - The high 26 bits of a 28-bit pc-relative - // immediate. - fixup_arm64_pcrel_branch26, - - // fixup_arm64_pcrel_call26 - The high 26 bits of a 28-bit pc-relative - // immediate. Distinguished from branch26 only on ELF. - fixup_arm64_pcrel_call26, - - // fixup_arm64_tlsdesc_call - zero-space placeholder for the ELF - // R_AARCH64_TLSDESC_CALL relocation. - fixup_arm64_tlsdesc_call, - - // Marker - LastTargetFixupKind, - NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind -}; - -} // end namespace ARM64 -} // end namespace llvm - -#endif diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp deleted file mode 100644 index 079d3588f6ea..000000000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp +++ /dev/null @@ -1,210 +0,0 @@ -//===-- ARM64MCTargetDesc.cpp - ARM64 Target Descriptions -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides ARM64 specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#include "ARM64MCTargetDesc.h" -#include "ARM64ELFStreamer.h" -#include "ARM64MCAsmInfo.h" -#include "InstPrinter/ARM64InstPrinter.h" -#include "llvm/MC/MCCodeGenInfo.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" - -using namespace llvm; - -#define GET_INSTRINFO_MC_DESC -#include "ARM64GenInstrInfo.inc" - -#define GET_SUBTARGETINFO_MC_DESC -#include "ARM64GenSubtargetInfo.inc" - -#define GET_REGINFO_MC_DESC -#include "ARM64GenRegisterInfo.inc" - -static MCInstrInfo *createARM64MCInstrInfo() { - MCInstrInfo *X = new MCInstrInfo(); - InitARM64MCInstrInfo(X); - return X; -} - -static MCSubtargetInfo *createARM64MCSubtargetInfo(StringRef TT, StringRef CPU, - StringRef FS) { - MCSubtargetInfo *X = new MCSubtargetInfo(); - - if (CPU.empty()) - CPU = "generic"; - - InitARM64MCSubtargetInfo(X, TT, CPU, FS); - return X; -} - -static MCRegisterInfo *createARM64MCRegisterInfo(StringRef Triple) { - MCRegisterInfo *X = new MCRegisterInfo(); - InitARM64MCRegisterInfo(X, ARM64::LR); - return X; -} - -static MCAsmInfo *createARM64MCAsmInfo(const MCRegisterInfo &MRI, - StringRef TT) { - Triple TheTriple(TT); - - MCAsmInfo *MAI; - if (TheTriple.isOSDarwin()) - MAI = new ARM64MCAsmInfoDarwin(); - else { - assert(TheTriple.isOSBinFormatELF() && "Only expect Darwin or ELF"); - MAI = new ARM64MCAsmInfoELF(TT); - } - - // Initial state of the frame pointer is SP. - unsigned Reg = MRI.getDwarfRegNum(ARM64::SP, true); - MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 0); - MAI->addInitialFrameState(Inst); - - return MAI; -} - -static MCCodeGenInfo *createARM64MCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) { - Triple TheTriple(TT); - assert((TheTriple.isOSBinFormatELF() || TheTriple.isOSBinFormatMachO()) && - "Only expect Darwin and ELF targets"); - - if (CM == CodeModel::Default) - CM = CodeModel::Small; - // The default MCJIT memory managers make no guarantees about where they can - // find an executable page; JITed code needs to be able to refer to globals - // no matter how far away they are. - else if (CM == CodeModel::JITDefault) - CM = CodeModel::Large; - else if (CM != CodeModel::Small && CM != CodeModel::Large) - report_fatal_error("Only small and large code models are allowed on ARM64"); - - // ARM64 Darwin is always PIC. - if (TheTriple.isOSDarwin()) - RM = Reloc::PIC_; - // On ELF platforms the default static relocation model has a smart enough - // linker to cope with referencing external symbols defined in a shared - // library. Hence DynamicNoPIC doesn't need to be promoted to PIC. - else if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC) - RM = Reloc::Static; - - MCCodeGenInfo *X = new MCCodeGenInfo(); - X->InitMCCodeGenInfo(RM, CM, OL); - return X; -} - -static MCInstPrinter *createARM64MCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { - if (SyntaxVariant == 0) - return new ARM64InstPrinter(MAI, MII, MRI, STI); - if (SyntaxVariant == 1) - return new ARM64AppleInstPrinter(MAI, MII, MRI, STI); - - return nullptr; -} - -static MCStreamer *createMCStreamer(const Target &T, StringRef TT, - MCContext &Ctx, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - const MCSubtargetInfo &STI, bool RelaxAll, - bool NoExecStack) { - Triple TheTriple(TT); - - if (TheTriple.isOSDarwin()) - return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll, - /*LabelSections*/ true); - - return createARM64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack); -} - -// Force static initialization. -extern "C" void LLVMInitializeARM64TargetMC() { - // Register the MC asm info. - RegisterMCAsmInfoFn X(TheARM64leTarget, createARM64MCAsmInfo); - RegisterMCAsmInfoFn Y(TheARM64beTarget, createARM64MCAsmInfo); - RegisterMCAsmInfoFn Z(TheAArch64leTarget, createARM64MCAsmInfo); - RegisterMCAsmInfoFn W(TheAArch64beTarget, createARM64MCAsmInfo); - - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(TheARM64leTarget, - createARM64MCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheARM64beTarget, - createARM64MCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheAArch64leTarget, - createARM64MCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheAArch64beTarget, - createARM64MCCodeGenInfo); - - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(TheARM64leTarget, createARM64MCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheARM64beTarget, createARM64MCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheAArch64leTarget, createARM64MCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheAArch64beTarget, createARM64MCInstrInfo); - - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(TheARM64leTarget, createARM64MCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheARM64beTarget, createARM64MCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheAArch64leTarget, createARM64MCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheAArch64beTarget, createARM64MCRegisterInfo); - - // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(TheARM64leTarget, - createARM64MCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheARM64beTarget, - createARM64MCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheAArch64leTarget, - createARM64MCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheAArch64beTarget, - createARM64MCSubtargetInfo); - - // Register the asm backend. - TargetRegistry::RegisterMCAsmBackend(TheARM64leTarget, createARM64leAsmBackend); - TargetRegistry::RegisterMCAsmBackend(TheARM64beTarget, createARM64beAsmBackend); - TargetRegistry::RegisterMCAsmBackend(TheAArch64leTarget, createARM64leAsmBackend); - TargetRegistry::RegisterMCAsmBackend(TheAArch64beTarget, createARM64beAsmBackend); - - // Register the MC Code Emitter - TargetRegistry::RegisterMCCodeEmitter(TheARM64leTarget, - createARM64MCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(TheARM64beTarget, - createARM64MCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(TheAArch64leTarget, - createARM64MCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(TheAArch64beTarget, - createARM64MCCodeEmitter); - - // Register the object streamer. - TargetRegistry::RegisterMCObjectStreamer(TheARM64leTarget, createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(TheARM64beTarget, createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(TheAArch64leTarget, createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(TheAArch64beTarget, createMCStreamer); - - // Register the MCInstPrinter. - TargetRegistry::RegisterMCInstPrinter(TheARM64leTarget, - createARM64MCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheARM64beTarget, - createARM64MCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheAArch64leTarget, - createARM64MCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheAArch64beTarget, - createARM64MCInstPrinter); -} diff --git a/lib/Target/ARM64/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM64/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index f8665bcfe949..000000000000 --- a/lib/Target/ARM64/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -add_llvm_library(LLVMARM64Desc - ARM64AsmBackend.cpp - ARM64ELFObjectWriter.cpp - ARM64ELFStreamer.cpp - ARM64MCAsmInfo.cpp - ARM64MCCodeEmitter.cpp - ARM64MCExpr.cpp - ARM64MCTargetDesc.cpp - ARM64MachObjectWriter.cpp -) -add_dependencies(LLVMARM64Desc ARM64CommonTableGen) - -# Hack: we need to include 'main' target directory to grab private headers -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/lib/Target/ARM64/Makefile b/lib/Target/ARM64/Makefile deleted file mode 100644 index cfb05d2a87ba..000000000000 --- a/lib/Target/ARM64/Makefile +++ /dev/null @@ -1,25 +0,0 @@ -##===- lib/Target/ARM64/Makefile ---------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMARM64CodeGen -TARGET = ARM64 - -# Make sure that tblgen is run, first thing. -BUILT_SOURCES = ARM64GenRegisterInfo.inc ARM64GenInstrInfo.inc \ - ARM64GenAsmWriter.inc ARM64GenAsmWriter1.inc \ - ARM64GenDAGISel.inc \ - ARM64GenCallingConv.inc ARM64GenAsmMatcher.inc \ - ARM64GenSubtargetInfo.inc ARM64GenMCCodeEmitter.inc \ - ARM64GenFastISel.inc ARM64GenDisassemblerTables.inc \ - ARM64GenMCPseudoLowering.inc - -DIRS = TargetInfo InstPrinter AsmParser Disassembler MCTargetDesc Utils - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM64/TargetInfo/CMakeLists.txt b/lib/Target/ARM64/TargetInfo/CMakeLists.txt deleted file mode 100644 index a0142c40713e..000000000000 --- a/lib/Target/ARM64/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMARM64Info - ARM64TargetInfo.cpp - ) - -add_dependencies(LLVMARM64Info ARM64CommonTableGen) diff --git a/lib/Target/ARM64/Utils/CMakeLists.txt b/lib/Target/ARM64/Utils/CMakeLists.txt deleted file mode 100644 index f69076f4ef64..000000000000 --- a/lib/Target/ARM64/Utils/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_llvm_library(LLVMARM64Utils - ARM64BaseInfo.cpp - ) diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt index da2309ba0cbe..1b0837cb3b54 100644 --- a/lib/Target/LLVMBuild.txt +++ b/lib/Target/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = ARM ARM64 CppBackend Hexagon MSP430 NVPTX Mips PowerPC R600 Sparc SystemZ X86 XCore +subdirectories = ARM AArch64 CppBackend Hexagon MSP430 NVPTX Mips PowerPC R600 Sparc SystemZ X86 XCore ; This is a special group whose required libraries are extended (by llvm-build) ; with the best execution engine (the native JIT, if available, or the diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 18a0f9c167a9..be1b5aa50b18 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -836,8 +836,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::arm_neon_vmulls: case Intrinsic::arm_neon_vmullu: - case Intrinsic::arm64_neon_smull: - case Intrinsic::arm64_neon_umull: { + case Intrinsic::aarch64_neon_smull: + case Intrinsic::aarch64_neon_umull: { Value *Arg0 = II->getArgOperand(0); Value *Arg1 = II->getArgOperand(1); @@ -848,7 +848,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Check for constant LHS & RHS - in this case we just simplify. bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu || - II->getIntrinsicID() == Intrinsic::arm64_neon_umull); + II->getIntrinsicID() == Intrinsic::aarch64_neon_umull); VectorType *NewVT = cast(II->getType()); if (Constant *CV0 = dyn_cast(Arg0)) { if (Constant *CV1 = dyn_cast(Arg1)) { diff --git a/test/Analysis/CostModel/ARM64/lit.local.cfg b/test/Analysis/CostModel/AArch64/lit.local.cfg similarity index 73% rename from test/Analysis/CostModel/ARM64/lit.local.cfg rename to test/Analysis/CostModel/AArch64/lit.local.cfg index 84ac9811f012..c42034979fcf 100644 --- a/test/Analysis/CostModel/ARM64/lit.local.cfg +++ b/test/Analysis/CostModel/AArch64/lit.local.cfg @@ -1,3 +1,3 @@ targets = set(config.root.targets_to_build.split()) -if not 'ARM64' in targets: +if not 'AArch64' in targets: config.unsupported = True diff --git a/test/Analysis/CostModel/ARM64/select.ll b/test/Analysis/CostModel/AArch64/select.ll similarity index 100% rename from test/Analysis/CostModel/ARM64/select.ll rename to test/Analysis/CostModel/AArch64/select.ll diff --git a/test/Analysis/CostModel/ARM64/store.ll b/test/Analysis/CostModel/AArch64/store.ll similarity index 100% rename from test/Analysis/CostModel/ARM64/store.ll rename to test/Analysis/CostModel/AArch64/store.ll diff --git a/test/CodeGen/AArch64/128bit_load_store.ll b/test/CodeGen/AArch64/128bit_load_store.ll index 56f67873f848..a6f077698e40 100644 --- a/test/CodeGen/AArch64/128bit_load_store.ll +++ b/test/CodeGen/AArch64/128bit_load_store.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=neon | FileCheck %s --check-prefix=CHECK define void @test_store_f128(fp128* %ptr, fp128 %val) #0 { ; CHECK-LABEL: test_store_f128 @@ -17,8 +17,8 @@ entry: } define void @test_vstrq_p128(i128* %ptr, i128 %val) #0 { -; CHECK-ARM64-LABEL: test_vstrq_p128 -; CHECK-ARM64: stp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}] +; CHECK-LABEL: test_vstrq_p128 +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}] entry: %0 = bitcast i128* %ptr to fp128* @@ -28,8 +28,8 @@ entry: } define i128 @test_vldrq_p128(i128* readonly %ptr) #2 { -; CHECK-ARM64-LABEL: test_vldrq_p128 -; CHECK-ARM64: ldp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}] +; CHECK-LABEL: test_vldrq_p128 +; CHECK: ldp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}] entry: %0 = bitcast i128* %ptr to fp128* diff --git a/test/CodeGen/ARM64/aarch64-neon-v1i1-setcc.ll b/test/CodeGen/AArch64/aarch64-neon-v1i1-setcc.ll similarity index 100% rename from test/CodeGen/ARM64/aarch64-neon-v1i1-setcc.ll rename to test/CodeGen/AArch64/aarch64-neon-v1i1-setcc.ll diff --git a/test/CodeGen/AArch64/addsub.ll b/test/CodeGen/AArch64/addsub.ll index 3aa427c352c6..b85fdbb14ce2 100644 --- a/test/CodeGen/AArch64/addsub.ll +++ b/test/CodeGen/AArch64/addsub.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-linux-gnu | FileCheck %s ; Note that this should be refactored (for efficiency if nothing else) ; when the PCS is implemented so we don't have to worry about the diff --git a/test/CodeGen/AArch64/addsub_ext.ll b/test/CodeGen/AArch64/addsub_ext.ll index cd01f594dcde..a2266b1d36de 100644 --- a/test/CodeGen/AArch64/addsub_ext.ll +++ b/test/CodeGen/AArch64/addsub_ext.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs %s -o - -mtriple=arm64-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu | FileCheck %s @var8 = global i8 0 @var16 = global i16 0 diff --git a/test/CodeGen/AArch64/alloca.ll b/test/CodeGen/AArch64/alloca.ll index 7cab200b1ea7..f93efbc42e65 100644 --- a/test/CodeGen/AArch64/alloca.ll +++ b/test/CodeGen/AArch64/alloca.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 -; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP-ARM64 %s +; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP-ARM64 %s declare void @use_addr(i8*) @@ -53,7 +53,7 @@ define i64 @test_alloca_with_local(i64 %n) { %val = load i64* %loc -; CHECK-ARM64: ldur x0, [x29, #-[[LOC_FROM_FP]]] +; CHECK: ldur x0, [x29, #-[[LOC_FROM_FP]]] ret i64 %val ; Make sure epilogue restores sp from fp @@ -74,16 +74,16 @@ define void @test_variadic_alloca(i64 %n, ...) { ; CHECK-NOFP-AARCH64: add x8, [[TMP]], #0 -; CHECK-ARM64: stp x29, x30, [sp, #-16]! -; CHECK-ARM64: mov x29, sp -; CHECK-ARM64: sub sp, sp, #192 -; CHECK-ARM64: stp q6, q7, [x29, #-96] +; CHECK: stp x29, x30, [sp, #-16]! +; CHECK: mov x29, sp +; CHECK: sub sp, sp, #192 +; CHECK: stp q6, q7, [x29, #-96] ; [...] -; CHECK-ARM64: stp q0, q1, [x29, #-192] +; CHECK: stp q0, q1, [x29, #-192] -; CHECK-ARM64: stp x6, x7, [x29, #-16] +; CHECK: stp x6, x7, [x29, #-16] ; [...] -; CHECK-ARM64: stp x2, x3, [x29, #-48] +; CHECK: stp x2, x3, [x29, #-48] ; CHECK-NOFP-ARM64: stp x29, x30, [sp, #-16]! ; CHECK-NOFP-ARM64: mov x29, sp @@ -115,11 +115,11 @@ define void @test_alloca_large_frame(i64 %n) { ; CHECK-LABEL: test_alloca_large_frame: -; CHECK-ARM64: stp x20, x19, [sp, #-32]! -; CHECK-ARM64: stp x29, x30, [sp, #16] -; CHECK-ARM64: add x29, sp, #16 -; CHECK-ARM64: sub sp, sp, #1953, lsl #12 -; CHECK-ARM64: sub sp, sp, #512 +; CHECK: stp x20, x19, [sp, #-32]! +; CHECK: stp x29, x30, [sp, #16] +; CHECK: add x29, sp, #16 +; CHECK: sub sp, sp, #1953, lsl #12 +; CHECK: sub sp, sp, #512 %addr1 = alloca i8, i64 %n %addr2 = alloca i64, i64 1000000 @@ -128,9 +128,9 @@ define void @test_alloca_large_frame(i64 %n) { ret void -; CHECK-ARM64: sub sp, x29, #16 -; CHECK-ARM64: ldp x29, x30, [sp, #16] -; CHECK-ARM64: ldp x20, x19, [sp], #32 +; CHECK: sub sp, x29, #16 +; CHECK: ldp x29, x30, [sp, #16] +; CHECK: ldp x20, x19, [sp], #32 } declare i8* @llvm.stacksave() diff --git a/test/CodeGen/AArch64/analyze-branch.ll b/test/CodeGen/AArch64/analyze-branch.ll index 1d4daec5f43d..6616b27c45b7 100644 --- a/test/CodeGen/AArch64/analyze-branch.ll +++ b/test/CodeGen/AArch64/analyze-branch.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-none-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s ; This test checks that LLVM can do basic stripping and reapplying of branches ; to basic blocks. diff --git a/test/CodeGen/ARM64/2011-03-09-CPSRSpill.ll b/test/CodeGen/AArch64/arm64-2011-03-09-CPSRSpill.ll similarity index 100% rename from test/CodeGen/ARM64/2011-03-09-CPSRSpill.ll rename to test/CodeGen/AArch64/arm64-2011-03-09-CPSRSpill.ll diff --git a/test/CodeGen/ARM64/2011-03-17-AsmPrinterCrash.ll b/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll similarity index 100% rename from test/CodeGen/ARM64/2011-03-17-AsmPrinterCrash.ll rename to test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll diff --git a/test/CodeGen/ARM64/2011-03-21-Unaligned-Frame-Index.ll b/test/CodeGen/AArch64/arm64-2011-03-21-Unaligned-Frame-Index.ll similarity index 100% rename from test/CodeGen/ARM64/2011-03-21-Unaligned-Frame-Index.ll rename to test/CodeGen/AArch64/arm64-2011-03-21-Unaligned-Frame-Index.ll diff --git a/test/CodeGen/ARM64/2011-04-21-CPSRBug.ll b/test/CodeGen/AArch64/arm64-2011-04-21-CPSRBug.ll similarity index 100% rename from test/CodeGen/ARM64/2011-04-21-CPSRBug.ll rename to test/CodeGen/AArch64/arm64-2011-04-21-CPSRBug.ll diff --git a/test/CodeGen/ARM64/2011-10-18-LdStOptBug.ll b/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll similarity index 100% rename from test/CodeGen/ARM64/2011-10-18-LdStOptBug.ll rename to test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll diff --git a/test/CodeGen/ARM64/2012-01-11-ComparisonDAGCrash.ll b/test/CodeGen/AArch64/arm64-2012-01-11-ComparisonDAGCrash.ll similarity index 100% rename from test/CodeGen/ARM64/2012-01-11-ComparisonDAGCrash.ll rename to test/CodeGen/AArch64/arm64-2012-01-11-ComparisonDAGCrash.ll diff --git a/test/CodeGen/ARM64/2012-05-07-DAGCombineVectorExtract.ll b/test/CodeGen/AArch64/arm64-2012-05-07-DAGCombineVectorExtract.ll similarity index 100% rename from test/CodeGen/ARM64/2012-05-07-DAGCombineVectorExtract.ll rename to test/CodeGen/AArch64/arm64-2012-05-07-DAGCombineVectorExtract.ll diff --git a/test/CodeGen/ARM64/2012-05-07-MemcpyAlignBug.ll b/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll similarity index 100% rename from test/CodeGen/ARM64/2012-05-07-MemcpyAlignBug.ll rename to test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll diff --git a/test/CodeGen/ARM64/2012-05-09-LOADgot-bug.ll b/test/CodeGen/AArch64/arm64-2012-05-09-LOADgot-bug.ll similarity index 100% rename from test/CodeGen/ARM64/2012-05-09-LOADgot-bug.ll rename to test/CodeGen/AArch64/arm64-2012-05-09-LOADgot-bug.ll diff --git a/test/CodeGen/ARM64/2012-05-22-LdStOptBug.ll b/test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll similarity index 100% rename from test/CodeGen/ARM64/2012-05-22-LdStOptBug.ll rename to test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll diff --git a/test/CodeGen/ARM64/2012-06-06-FPToUI.ll b/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll similarity index 100% rename from test/CodeGen/ARM64/2012-06-06-FPToUI.ll rename to test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll diff --git a/test/CodeGen/ARM64/2012-07-11-InstrEmitterBug.ll b/test/CodeGen/AArch64/arm64-2012-07-11-InstrEmitterBug.ll similarity index 100% rename from test/CodeGen/ARM64/2012-07-11-InstrEmitterBug.ll rename to test/CodeGen/AArch64/arm64-2012-07-11-InstrEmitterBug.ll diff --git a/test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll b/test/CodeGen/AArch64/arm64-2013-01-13-ffast-fcmp.ll similarity index 72% rename from test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll rename to test/CodeGen/AArch64/arm64-2013-01-13-ffast-fcmp.ll index b40a581d6118..e2c43d953bb9 100644 --- a/test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll +++ b/test/CodeGen/AArch64/arm64-2013-01-13-ffast-fcmp.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -fp-contract=fast | FileCheck %s --check-prefix=FAST +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -fp-contract=fast | FileCheck %s --check-prefix=FAST target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128" target triple = "arm64-apple-ios7.0.0" diff --git a/test/CodeGen/ARM64/2013-01-23-frem-crash.ll b/test/CodeGen/AArch64/arm64-2013-01-23-frem-crash.ll similarity index 100% rename from test/CodeGen/ARM64/2013-01-23-frem-crash.ll rename to test/CodeGen/AArch64/arm64-2013-01-23-frem-crash.ll diff --git a/test/CodeGen/ARM64/2013-01-23-sext-crash.ll b/test/CodeGen/AArch64/arm64-2013-01-23-sext-crash.ll similarity index 100% rename from test/CodeGen/ARM64/2013-01-23-sext-crash.ll rename to test/CodeGen/AArch64/arm64-2013-01-23-sext-crash.ll diff --git a/test/CodeGen/ARM64/2013-02-12-shufv8i8.ll b/test/CodeGen/AArch64/arm64-2013-02-12-shufv8i8.ll similarity index 83% rename from test/CodeGen/ARM64/2013-02-12-shufv8i8.ll rename to test/CodeGen/AArch64/arm64-2013-02-12-shufv8i8.ll index 70e745fc5775..a350ba1472c9 100644 --- a/test/CodeGen/ARM64/2013-02-12-shufv8i8.ll +++ b/test/CodeGen/AArch64/arm64-2013-02-12-shufv8i8.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple ;CHECK-LABEL: Shuff: ;CHECK: tbl.8b diff --git a/test/CodeGen/ARM64/2014-04-16-AnInfiniteLoopInDAGCombine.ll b/test/CodeGen/AArch64/arm64-2014-04-16-AnInfiniteLoopInDAGCombine.ll similarity index 100% rename from test/CodeGen/ARM64/2014-04-16-AnInfiniteLoopInDAGCombine.ll rename to test/CodeGen/AArch64/arm64-2014-04-16-AnInfiniteLoopInDAGCombine.ll diff --git a/test/CodeGen/ARM64/2014-04-28-sqshl-uqshl-i64Contant.ll b/test/CodeGen/AArch64/arm64-2014-04-28-sqshl-uqshl-i64Contant.ll similarity index 63% rename from test/CodeGen/ARM64/2014-04-28-sqshl-uqshl-i64Contant.ll rename to test/CodeGen/AArch64/arm64-2014-04-28-sqshl-uqshl-i64Contant.ll index a4c9cd8f1062..3949b85fbd32 100644 --- a/test/CodeGen/ARM64/2014-04-28-sqshl-uqshl-i64Contant.ll +++ b/test/CodeGen/AArch64/arm64-2014-04-28-sqshl-uqshl-i64Contant.ll @@ -4,16 +4,16 @@ define i64 @test_vqshld_s64_i(i64 %a) { ; CHECK-LABEL: test_vqshld_s64_i: ; CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, #36 - %1 = tail call i64 @llvm.arm64.neon.sqshl.i64(i64 %a, i64 36) + %1 = tail call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 36) ret i64 %1 } define i64 @test_vqshld_u64_i(i64 %a) { ; CHECK-LABEL: test_vqshld_u64_i: ; CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, #36 - %1 = tail call i64 @llvm.arm64.neon.uqshl.i64(i64 %a, i64 36) + %1 = tail call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 36) ret i64 %1 } -declare i64 @llvm.arm64.neon.uqshl.i64(i64, i64) -declare i64 @llvm.arm64.neon.sqshl.i64(i64, i64) +declare i64 @llvm.aarch64.neon.uqshl.i64(i64, i64) +declare i64 @llvm.aarch64.neon.sqshl.i64(i64, i64) diff --git a/test/CodeGen/ARM64/2014-04-29-EXT-undef-mask.ll b/test/CodeGen/AArch64/arm64-2014-04-29-EXT-undef-mask.ll similarity index 94% rename from test/CodeGen/ARM64/2014-04-29-EXT-undef-mask.ll rename to test/CodeGen/AArch64/arm64-2014-04-29-EXT-undef-mask.ll index b0ab9fda22d5..1b2d54317c23 100644 --- a/test/CodeGen/ARM64/2014-04-29-EXT-undef-mask.ll +++ b/test/CodeGen/AArch64/arm64-2014-04-29-EXT-undef-mask.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O0 -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -O0 -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s ; The following 2 test cases test shufflevector with beginning UNDEF mask. define <8 x i16> @test_vext_undef_traverse(<8 x i16> %in) { diff --git a/test/CodeGen/ARM64/AdvSIMD-Scalar.ll b/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll similarity index 89% rename from test/CodeGen/ARM64/AdvSIMD-Scalar.ll rename to test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll index 3e75eed4cd5d..c4597d5a4815 100644 --- a/test/CodeGen/ARM64/AdvSIMD-Scalar.ll +++ b/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -arm64-simd-scalar=true -asm-verbose=false | FileCheck %s -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=generic -arm64-simd-scalar=true -asm-verbose=false | FileCheck %s -check-prefix=GENERIC +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -aarch64-simd-scalar=true -asm-verbose=false | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=generic -aarch64-simd-scalar=true -asm-verbose=false | FileCheck %s -check-prefix=GENERIC define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone { ; CHECK-LABEL: bar: diff --git a/test/CodeGen/ARM64/aapcs.ll b/test/CodeGen/AArch64/arm64-aapcs.ll similarity index 100% rename from test/CodeGen/ARM64/aapcs.ll rename to test/CodeGen/AArch64/arm64-aapcs.ll diff --git a/test/CodeGen/ARM64/abi-varargs.ll b/test/CodeGen/AArch64/arm64-abi-varargs.ll similarity index 100% rename from test/CodeGen/ARM64/abi-varargs.ll rename to test/CodeGen/AArch64/arm64-abi-varargs.ll diff --git a/test/CodeGen/ARM64/abi.ll b/test/CodeGen/AArch64/arm64-abi.ll similarity index 100% rename from test/CodeGen/ARM64/abi.ll rename to test/CodeGen/AArch64/arm64-abi.ll diff --git a/test/CodeGen/ARM64/abi_align.ll b/test/CodeGen/AArch64/arm64-abi_align.ll similarity index 100% rename from test/CodeGen/ARM64/abi_align.ll rename to test/CodeGen/AArch64/arm64-abi_align.ll diff --git a/test/CodeGen/ARM64/addp.ll b/test/CodeGen/AArch64/arm64-addp.ll similarity index 90% rename from test/CodeGen/ARM64/addp.ll rename to test/CodeGen/AArch64/arm64-addp.ll index 428f6d4f28a5..3f1e5c5d44e3 100644 --- a/test/CodeGen/ARM64/addp.ll +++ b/test/CodeGen/AArch64/arm64-addp.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -mcpu=cyclone | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -mcpu=cyclone | FileCheck %s define double @foo(<2 x double> %a) nounwind { ; CHECK-LABEL: foo: diff --git a/test/CodeGen/ARM64/addr-mode-folding.ll b/test/CodeGen/AArch64/arm64-addr-mode-folding.ll similarity index 100% rename from test/CodeGen/ARM64/addr-mode-folding.ll rename to test/CodeGen/AArch64/arm64-addr-mode-folding.ll diff --git a/test/CodeGen/ARM64/addr-type-promotion.ll b/test/CodeGen/AArch64/arm64-addr-type-promotion.ll similarity index 100% rename from test/CodeGen/ARM64/addr-type-promotion.ll rename to test/CodeGen/AArch64/arm64-addr-type-promotion.ll diff --git a/test/CodeGen/ARM64/addrmode.ll b/test/CodeGen/AArch64/arm64-addrmode.ll similarity index 100% rename from test/CodeGen/ARM64/addrmode.ll rename to test/CodeGen/AArch64/arm64-addrmode.ll diff --git a/test/CodeGen/ARM64/alloc-no-stack-realign.ll b/test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll similarity index 100% rename from test/CodeGen/ARM64/alloc-no-stack-realign.ll rename to test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll diff --git a/test/CodeGen/ARM64/alloca-frame-pointer-offset.ll b/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll similarity index 100% rename from test/CodeGen/ARM64/alloca-frame-pointer-offset.ll rename to test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll diff --git a/test/CodeGen/ARM64/andCmpBrToTBZ.ll b/test/CodeGen/AArch64/arm64-andCmpBrToTBZ.ll similarity index 100% rename from test/CodeGen/ARM64/andCmpBrToTBZ.ll rename to test/CodeGen/AArch64/arm64-andCmpBrToTBZ.ll diff --git a/test/CodeGen/ARM64/ands-bad-peephole.ll b/test/CodeGen/AArch64/arm64-ands-bad-peephole.ll similarity index 100% rename from test/CodeGen/ARM64/ands-bad-peephole.ll rename to test/CodeGen/AArch64/arm64-ands-bad-peephole.ll diff --git a/test/CodeGen/ARM64/anyregcc-crash.ll b/test/CodeGen/AArch64/arm64-anyregcc-crash.ll similarity index 100% rename from test/CodeGen/ARM64/anyregcc-crash.ll rename to test/CodeGen/AArch64/arm64-anyregcc-crash.ll diff --git a/test/CodeGen/ARM64/anyregcc.ll b/test/CodeGen/AArch64/arm64-anyregcc.ll similarity index 100% rename from test/CodeGen/ARM64/anyregcc.ll rename to test/CodeGen/AArch64/arm64-anyregcc.ll diff --git a/test/CodeGen/ARM64/arith-saturating.ll b/test/CodeGen/AArch64/arm64-arith-saturating.ll similarity index 58% rename from test/CodeGen/ARM64/arith-saturating.ll rename to test/CodeGen/AArch64/arm64-arith-saturating.ll index 2d188ff9cc7e..78cd1fcb1a21 100644 --- a/test/CodeGen/ARM64/arith-saturating.ll +++ b/test/CodeGen/AArch64/arm64-arith-saturating.ll @@ -5,7 +5,7 @@ define i32 @qadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { ; CHECK: sqadd s0, s0, s1 %vecext = extractelement <4 x i32> %b, i32 0 %vecext1 = extractelement <4 x i32> %c, i32 0 - %vqadd.i = tail call i32 @llvm.arm64.neon.sqadd.i32(i32 %vecext, i32 %vecext1) nounwind + %vqadd.i = tail call i32 @llvm.aarch64.neon.sqadd.i32(i32 %vecext, i32 %vecext1) nounwind ret i32 %vqadd.i } @@ -14,7 +14,7 @@ define i64 @qaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { ; CHECK: sqadd d0, d0, d1 %vecext = extractelement <2 x i64> %b, i32 0 %vecext1 = extractelement <2 x i64> %c, i32 0 - %vqadd.i = tail call i64 @llvm.arm64.neon.sqadd.i64(i64 %vecext, i64 %vecext1) nounwind + %vqadd.i = tail call i64 @llvm.aarch64.neon.sqadd.i64(i64 %vecext, i64 %vecext1) nounwind ret i64 %vqadd.i } @@ -23,7 +23,7 @@ define i32 @uqadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { ; CHECK: uqadd s0, s0, s1 %vecext = extractelement <4 x i32> %b, i32 0 %vecext1 = extractelement <4 x i32> %c, i32 0 - %vqadd.i = tail call i32 @llvm.arm64.neon.uqadd.i32(i32 %vecext, i32 %vecext1) nounwind + %vqadd.i = tail call i32 @llvm.aarch64.neon.uqadd.i32(i32 %vecext, i32 %vecext1) nounwind ret i32 %vqadd.i } @@ -32,21 +32,21 @@ define i64 @uqaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { ; CHECK: uqadd d0, d0, d1 %vecext = extractelement <2 x i64> %b, i32 0 %vecext1 = extractelement <2 x i64> %c, i32 0 - %vqadd.i = tail call i64 @llvm.arm64.neon.uqadd.i64(i64 %vecext, i64 %vecext1) nounwind + %vqadd.i = tail call i64 @llvm.aarch64.neon.uqadd.i64(i64 %vecext, i64 %vecext1) nounwind ret i64 %vqadd.i } -declare i64 @llvm.arm64.neon.uqadd.i64(i64, i64) nounwind readnone -declare i32 @llvm.arm64.neon.uqadd.i32(i32, i32) nounwind readnone -declare i64 @llvm.arm64.neon.sqadd.i64(i64, i64) nounwind readnone -declare i32 @llvm.arm64.neon.sqadd.i32(i32, i32) nounwind readnone +declare i64 @llvm.aarch64.neon.uqadd.i64(i64, i64) nounwind readnone +declare i32 @llvm.aarch64.neon.uqadd.i32(i32, i32) nounwind readnone +declare i64 @llvm.aarch64.neon.sqadd.i64(i64, i64) nounwind readnone +declare i32 @llvm.aarch64.neon.sqadd.i32(i32, i32) nounwind readnone define i32 @qsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { ; CHECK-LABEL: qsubs: ; CHECK: sqsub s0, s0, s1 %vecext = extractelement <4 x i32> %b, i32 0 %vecext1 = extractelement <4 x i32> %c, i32 0 - %vqsub.i = tail call i32 @llvm.arm64.neon.sqsub.i32(i32 %vecext, i32 %vecext1) nounwind + %vqsub.i = tail call i32 @llvm.aarch64.neon.sqsub.i32(i32 %vecext, i32 %vecext1) nounwind ret i32 %vqsub.i } @@ -55,7 +55,7 @@ define i64 @qsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { ; CHECK: sqsub d0, d0, d1 %vecext = extractelement <2 x i64> %b, i32 0 %vecext1 = extractelement <2 x i64> %c, i32 0 - %vqsub.i = tail call i64 @llvm.arm64.neon.sqsub.i64(i64 %vecext, i64 %vecext1) nounwind + %vqsub.i = tail call i64 @llvm.aarch64.neon.sqsub.i64(i64 %vecext, i64 %vecext1) nounwind ret i64 %vqsub.i } @@ -64,7 +64,7 @@ define i32 @uqsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { ; CHECK: uqsub s0, s0, s1 %vecext = extractelement <4 x i32> %b, i32 0 %vecext1 = extractelement <4 x i32> %c, i32 0 - %vqsub.i = tail call i32 @llvm.arm64.neon.uqsub.i32(i32 %vecext, i32 %vecext1) nounwind + %vqsub.i = tail call i32 @llvm.aarch64.neon.uqsub.i32(i32 %vecext, i32 %vecext1) nounwind ret i32 %vqsub.i } @@ -73,21 +73,21 @@ define i64 @uqsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { ; CHECK: uqsub d0, d0, d1 %vecext = extractelement <2 x i64> %b, i32 0 %vecext1 = extractelement <2 x i64> %c, i32 0 - %vqsub.i = tail call i64 @llvm.arm64.neon.uqsub.i64(i64 %vecext, i64 %vecext1) nounwind + %vqsub.i = tail call i64 @llvm.aarch64.neon.uqsub.i64(i64 %vecext, i64 %vecext1) nounwind ret i64 %vqsub.i } -declare i64 @llvm.arm64.neon.uqsub.i64(i64, i64) nounwind readnone -declare i32 @llvm.arm64.neon.uqsub.i32(i32, i32) nounwind readnone -declare i64 @llvm.arm64.neon.sqsub.i64(i64, i64) nounwind readnone -declare i32 @llvm.arm64.neon.sqsub.i32(i32, i32) nounwind readnone +declare i64 @llvm.aarch64.neon.uqsub.i64(i64, i64) nounwind readnone +declare i32 @llvm.aarch64.neon.uqsub.i32(i32, i32) nounwind readnone +declare i64 @llvm.aarch64.neon.sqsub.i64(i64, i64) nounwind readnone +declare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32) nounwind readnone define i32 @qabss(<4 x i32> %b, <4 x i32> %c) nounwind readnone { ; CHECK-LABEL: qabss: ; CHECK: sqabs s0, s0 ; CHECK: ret %vecext = extractelement <4 x i32> %b, i32 0 - %vqabs.i = tail call i32 @llvm.arm64.neon.sqabs.i32(i32 %vecext) nounwind + %vqabs.i = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %vecext) nounwind ret i32 %vqabs.i } @@ -96,7 +96,7 @@ define i64 @qabsd(<2 x i64> %b, <2 x i64> %c) nounwind readnone { ; CHECK: sqabs d0, d0 ; CHECK: ret %vecext = extractelement <2 x i64> %b, i32 0 - %vqabs.i = tail call i64 @llvm.arm64.neon.sqabs.i64(i64 %vecext) nounwind + %vqabs.i = tail call i64 @llvm.aarch64.neon.sqabs.i64(i64 %vecext) nounwind ret i64 %vqabs.i } @@ -105,7 +105,7 @@ define i32 @qnegs(<4 x i32> %b, <4 x i32> %c) nounwind readnone { ; CHECK: sqneg s0, s0 ; CHECK: ret %vecext = extractelement <4 x i32> %b, i32 0 - %vqneg.i = tail call i32 @llvm.arm64.neon.sqneg.i32(i32 %vecext) nounwind + %vqneg.i = tail call i32 @llvm.aarch64.neon.sqneg.i32(i32 %vecext) nounwind ret i32 %vqneg.i } @@ -114,21 +114,21 @@ define i64 @qnegd(<2 x i64> %b, <2 x i64> %c) nounwind readnone { ; CHECK: sqneg d0, d0 ; CHECK: ret %vecext = extractelement <2 x i64> %b, i32 0 - %vqneg.i = tail call i64 @llvm.arm64.neon.sqneg.i64(i64 %vecext) nounwind + %vqneg.i = tail call i64 @llvm.aarch64.neon.sqneg.i64(i64 %vecext) nounwind ret i64 %vqneg.i } -declare i64 @llvm.arm64.neon.sqneg.i64(i64) nounwind readnone -declare i32 @llvm.arm64.neon.sqneg.i32(i32) nounwind readnone -declare i64 @llvm.arm64.neon.sqabs.i64(i64) nounwind readnone -declare i32 @llvm.arm64.neon.sqabs.i32(i32) nounwind readnone +declare i64 @llvm.aarch64.neon.sqneg.i64(i64) nounwind readnone +declare i32 @llvm.aarch64.neon.sqneg.i32(i32) nounwind readnone +declare i64 @llvm.aarch64.neon.sqabs.i64(i64) nounwind readnone +declare i32 @llvm.aarch64.neon.sqabs.i32(i32) nounwind readnone define i32 @vqmovund(<2 x i64> %b) nounwind readnone { ; CHECK-LABEL: vqmovund: ; CHECK: sqxtun s0, d0 %vecext = extractelement <2 x i64> %b, i32 0 - %vqmovun.i = tail call i32 @llvm.arm64.neon.scalar.sqxtun.i32.i64(i64 %vecext) nounwind + %vqmovun.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %vecext) nounwind ret i32 %vqmovun.i } @@ -136,7 +136,7 @@ define i32 @vqmovnd_s(<2 x i64> %b) nounwind readnone { ; CHECK-LABEL: vqmovnd_s: ; CHECK: sqxtn s0, d0 %vecext = extractelement <2 x i64> %b, i32 0 - %vqmovn.i = tail call i32 @llvm.arm64.neon.scalar.sqxtn.i32.i64(i64 %vecext) nounwind + %vqmovn.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %vecext) nounwind ret i32 %vqmovn.i } @@ -144,10 +144,10 @@ define i32 @vqmovnd_u(<2 x i64> %b) nounwind readnone { ; CHECK-LABEL: vqmovnd_u: ; CHECK: uqxtn s0, d0 %vecext = extractelement <2 x i64> %b, i32 0 - %vqmovn.i = tail call i32 @llvm.arm64.neon.scalar.uqxtn.i32.i64(i64 %vecext) nounwind + %vqmovn.i = tail call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %vecext) nounwind ret i32 %vqmovn.i } -declare i32 @llvm.arm64.neon.scalar.uqxtn.i32.i64(i64) nounwind readnone -declare i32 @llvm.arm64.neon.scalar.sqxtn.i32.i64(i64) nounwind readnone -declare i32 @llvm.arm64.neon.scalar.sqxtun.i32.i64(i64) nounwind readnone +declare i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64) nounwind readnone +declare i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64) nounwind readnone +declare i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64) nounwind readnone diff --git a/test/CodeGen/ARM64/arith.ll b/test/CodeGen/AArch64/arm64-arith.ll similarity index 90% rename from test/CodeGen/ARM64/arith.ll rename to test/CodeGen/AArch64/arm64-arith.ll index e4be8e98a780..ed9b569e2182 100644 --- a/test/CodeGen/ARM64/arith.ll +++ b/test/CodeGen/AArch64/arm64-arith.ll @@ -168,7 +168,7 @@ entry: ; CHECK-LABEL: t18: ; CHECK: sdiv w0, w0, w1 ; CHECK: ret - %sdiv = call i32 @llvm.arm64.sdiv.i32(i32 %a, i32 %b) + %sdiv = call i32 @llvm.aarch64.sdiv.i32(i32 %a, i32 %b) ret i32 %sdiv } @@ -177,7 +177,7 @@ entry: ; CHECK-LABEL: t19: ; CHECK: sdiv x0, x0, x1 ; CHECK: ret - %sdiv = call i64 @llvm.arm64.sdiv.i64(i64 %a, i64 %b) + %sdiv = call i64 @llvm.aarch64.sdiv.i64(i64 %a, i64 %b) ret i64 %sdiv } @@ -186,7 +186,7 @@ entry: ; CHECK-LABEL: t20: ; CHECK: udiv w0, w0, w1 ; CHECK: ret - %udiv = call i32 @llvm.arm64.udiv.i32(i32 %a, i32 %b) + %udiv = call i32 @llvm.aarch64.udiv.i32(i32 %a, i32 %b) ret i32 %udiv } @@ -195,14 +195,14 @@ entry: ; CHECK-LABEL: t21: ; CHECK: udiv x0, x0, x1 ; CHECK: ret - %udiv = call i64 @llvm.arm64.udiv.i64(i64 %a, i64 %b) + %udiv = call i64 @llvm.aarch64.udiv.i64(i64 %a, i64 %b) ret i64 %udiv } -declare i32 @llvm.arm64.sdiv.i32(i32, i32) nounwind readnone -declare i64 @llvm.arm64.sdiv.i64(i64, i64) nounwind readnone -declare i32 @llvm.arm64.udiv.i32(i32, i32) nounwind readnone -declare i64 @llvm.arm64.udiv.i64(i64, i64) nounwind readnone +declare i32 @llvm.aarch64.sdiv.i32(i32, i32) nounwind readnone +declare i64 @llvm.aarch64.sdiv.i64(i64, i64) nounwind readnone +declare i32 @llvm.aarch64.udiv.i32(i32, i32) nounwind readnone +declare i64 @llvm.aarch64.udiv.i64(i64, i64) nounwind readnone ; 32-bit not. define i32 @inv_32(i32 %x) nounwind ssp { diff --git a/test/CodeGen/ARM64/arm64-dead-def-elimination-flag.ll b/test/CodeGen/AArch64/arm64-arm64-dead-def-elimination-flag.ll similarity index 80% rename from test/CodeGen/ARM64/arm64-dead-def-elimination-flag.ll rename to test/CodeGen/AArch64/arm64-arm64-dead-def-elimination-flag.ll index babf4827693b..0904b62c4032 100644 --- a/test/CodeGen/ARM64/arm64-dead-def-elimination-flag.ll +++ b/test/CodeGen/AArch64/arm64-arm64-dead-def-elimination-flag.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=arm64 -arm64-dead-def-elimination=false < %s | FileCheck %s +; RUN: llc -march=arm64 -aarch64-dead-def-elimination=false < %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-ios7.0.0" diff --git a/test/CodeGen/ARM64/atomic-128.ll b/test/CodeGen/AArch64/arm64-atomic-128.ll similarity index 100% rename from test/CodeGen/ARM64/atomic-128.ll rename to test/CodeGen/AArch64/arm64-atomic-128.ll diff --git a/test/CodeGen/ARM64/atomic.ll b/test/CodeGen/AArch64/arm64-atomic.ll similarity index 100% rename from test/CodeGen/ARM64/atomic.ll rename to test/CodeGen/AArch64/arm64-atomic.ll diff --git a/test/CodeGen/ARM64/basic-pic.ll b/test/CodeGen/AArch64/arm64-basic-pic.ll similarity index 100% rename from test/CodeGen/ARM64/basic-pic.ll rename to test/CodeGen/AArch64/arm64-basic-pic.ll diff --git a/test/CodeGen/ARM64/big-endian-bitconverts.ll b/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll similarity index 99% rename from test/CodeGen/ARM64/big-endian-bitconverts.ll rename to test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll index cb8708b9267c..f0e968b2c177 100644 --- a/test/CodeGen/ARM64/big-endian-bitconverts.ll +++ b/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple arm64_be < %s -arm64-load-store-opt=false -O1 -o - | FileCheck %s -; RUN: llc -mtriple arm64_be < %s -arm64-load-store-opt=false -O0 -fast-isel=true -o - | FileCheck %s +; RUN: llc -mtriple arm64_be < %s -aarch64-load-store-opt=false -O1 -o - | FileCheck %s +; RUN: llc -mtriple arm64_be < %s -aarch64-load-store-opt=false -O0 -fast-isel=true -o - | FileCheck %s ; CHECK-LABEL: test_i64_f64: define void @test_i64_f64(double* %p, i64* %q) { diff --git a/test/CodeGen/ARM64/big-endian-eh.ll b/test/CodeGen/AArch64/arm64-big-endian-eh.ll similarity index 100% rename from test/CodeGen/ARM64/big-endian-eh.ll rename to test/CodeGen/AArch64/arm64-big-endian-eh.ll diff --git a/test/CodeGen/ARM64/big-endian-varargs.ll b/test/CodeGen/AArch64/arm64-big-endian-varargs.ll similarity index 100% rename from test/CodeGen/ARM64/big-endian-varargs.ll rename to test/CodeGen/AArch64/arm64-big-endian-varargs.ll diff --git a/test/CodeGen/ARM64/big-endian-vector-callee.ll b/test/CodeGen/AArch64/arm64-big-endian-vector-callee.ll similarity index 99% rename from test/CodeGen/ARM64/big-endian-vector-callee.ll rename to test/CodeGen/AArch64/arm64-big-endian-vector-callee.ll index 5b9ccace8821..1dcccf106a29 100644 --- a/test/CodeGen/ARM64/big-endian-vector-callee.ll +++ b/test/CodeGen/AArch64/arm64-big-endian-vector-callee.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple arm64_be < %s -arm64-load-store-opt=false -o - | FileCheck %s -; RUN: llc -mtriple arm64_be < %s -fast-isel=true -arm64-load-store-opt=false -o - | FileCheck %s +; RUN: llc -mtriple arm64_be < %s -aarch64-load-store-opt=false -o - | FileCheck %s +; RUN: llc -mtriple arm64_be < %s -fast-isel=true -aarch64-load-store-opt=false -o - | FileCheck %s ; CHECK-LABEL: test_i64_f64: define i64 @test_i64_f64(double %p) { diff --git a/test/CodeGen/ARM64/big-endian-vector-caller.ll b/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll similarity index 99% rename from test/CodeGen/ARM64/big-endian-vector-caller.ll rename to test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll index 194a32139253..9a12b7a01153 100644 --- a/test/CodeGen/ARM64/big-endian-vector-caller.ll +++ b/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple arm64_be < %s -arm64-load-store-opt=false -o - | FileCheck %s -; RUN: llc -mtriple arm64_be < %s -arm64-load-store-opt=false -fast-isel=true -O0 -o - | FileCheck %s +; RUN: llc -mtriple arm64_be < %s -aarch64-load-store-opt=false -o - | FileCheck %s +; RUN: llc -mtriple arm64_be < %s -aarch64-load-store-opt=false -fast-isel=true -O0 -o - | FileCheck %s ; CHECK-LABEL: test_i64_f64: declare i64 @test_i64_f64_helper(double %p) diff --git a/test/CodeGen/ARM64/big-imm-offsets.ll b/test/CodeGen/AArch64/arm64-big-imm-offsets.ll similarity index 100% rename from test/CodeGen/ARM64/big-imm-offsets.ll rename to test/CodeGen/AArch64/arm64-big-imm-offsets.ll diff --git a/test/CodeGen/ARM64/big-stack.ll b/test/CodeGen/AArch64/arm64-big-stack.ll similarity index 100% rename from test/CodeGen/ARM64/big-stack.ll rename to test/CodeGen/AArch64/arm64-big-stack.ll diff --git a/test/CodeGen/ARM64/bitfield-extract.ll b/test/CodeGen/AArch64/arm64-bitfield-extract.ll similarity index 100% rename from test/CodeGen/ARM64/bitfield-extract.ll rename to test/CodeGen/AArch64/arm64-bitfield-extract.ll diff --git a/test/CodeGen/ARM64/blockaddress.ll b/test/CodeGen/AArch64/arm64-blockaddress.ll similarity index 100% rename from test/CodeGen/ARM64/blockaddress.ll rename to test/CodeGen/AArch64/arm64-blockaddress.ll diff --git a/test/CodeGen/ARM64/build-vector.ll b/test/CodeGen/AArch64/arm64-build-vector.ll similarity index 94% rename from test/CodeGen/ARM64/build-vector.ll rename to test/CodeGen/AArch64/arm64-build-vector.ll index 143d6894383f..c109263cedb4 100644 --- a/test/CodeGen/ARM64/build-vector.ll +++ b/test/CodeGen/AArch64/arm64-build-vector.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s ; Check that building up a vector w/ only one non-zero lane initializes ; intelligently. diff --git a/test/CodeGen/ARM64/call-tailcalls.ll b/test/CodeGen/AArch64/arm64-call-tailcalls.ll similarity index 100% rename from test/CodeGen/ARM64/call-tailcalls.ll rename to test/CodeGen/AArch64/arm64-call-tailcalls.ll diff --git a/test/CodeGen/ARM64/cast-opt.ll b/test/CodeGen/AArch64/arm64-cast-opt.ll similarity index 100% rename from test/CodeGen/ARM64/cast-opt.ll rename to test/CodeGen/AArch64/arm64-cast-opt.ll diff --git a/test/CodeGen/ARM64/ccmp-heuristics.ll b/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll similarity index 99% rename from test/CodeGen/ARM64/ccmp-heuristics.ll rename to test/CodeGen/AArch64/arm64-ccmp-heuristics.ll index 5575997e53ed..664a26cafe4d 100644 --- a/test/CodeGen/ARM64/ccmp-heuristics.ll +++ b/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mcpu=cyclone -verify-machineinstrs -arm64-ccmp | FileCheck %s +; RUN: llc < %s -mcpu=cyclone -verify-machineinstrs -aarch64-ccmp | FileCheck %s target triple = "arm64-apple-ios7.0.0" @channelColumns = external global i64 diff --git a/test/CodeGen/ARM64/ccmp.ll b/test/CodeGen/AArch64/arm64-ccmp.ll similarity index 98% rename from test/CodeGen/ARM64/ccmp.ll rename to test/CodeGen/AArch64/arm64-ccmp.ll index 79e6f94e3f6d..63965f9538b5 100644 --- a/test/CodeGen/ARM64/ccmp.ll +++ b/test/CodeGen/AArch64/arm64-ccmp.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mcpu=cyclone -verify-machineinstrs -arm64-ccmp -arm64-stress-ccmp | FileCheck %s +; RUN: llc < %s -mcpu=cyclone -verify-machineinstrs -aarch64-ccmp -aarch64-stress-ccmp | FileCheck %s target triple = "arm64-apple-ios" ; CHECK: single_same diff --git a/test/CodeGen/ARM64/clrsb.ll b/test/CodeGen/AArch64/arm64-clrsb.ll similarity index 100% rename from test/CodeGen/ARM64/clrsb.ll rename to test/CodeGen/AArch64/arm64-clrsb.ll diff --git a/test/CodeGen/ARM64/coalesce-ext.ll b/test/CodeGen/AArch64/arm64-coalesce-ext.ll similarity index 100% rename from test/CodeGen/ARM64/coalesce-ext.ll rename to test/CodeGen/AArch64/arm64-coalesce-ext.ll diff --git a/test/CodeGen/ARM64/code-model-large-abs.ll b/test/CodeGen/AArch64/arm64-code-model-large-abs.ll similarity index 100% rename from test/CodeGen/ARM64/code-model-large-abs.ll rename to test/CodeGen/AArch64/arm64-code-model-large-abs.ll diff --git a/test/CodeGen/ARM64/collect-loh-garbage-crash.ll b/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll similarity index 91% rename from test/CodeGen/ARM64/collect-loh-garbage-crash.ll rename to test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll index 98cb625d2d51..81cee38420aa 100644 --- a/test/CodeGen/ARM64/collect-loh-garbage-crash.ll +++ b/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-apple-ios -O3 -arm64-collect-loh -arm64-collect-loh-bb-only=true -arm64-collect-loh-pre-collect-register=false < %s -o - | FileCheck %s +; RUN: llc -mtriple=arm64-apple-ios -O3 -aarch64-collect-loh -aarch64-collect-loh-bb-only=true -aarch64-collect-loh-pre-collect-register=false < %s -o - | FileCheck %s ; Check that the LOH analysis does not crash when the analysed chained ; contains instructions that are filtered out. ; diff --git a/test/CodeGen/ARM64/collect-loh-str.ll b/test/CodeGen/AArch64/arm64-collect-loh-str.ll similarity index 86% rename from test/CodeGen/ARM64/collect-loh-str.ll rename to test/CodeGen/AArch64/arm64-collect-loh-str.ll index fc63f8bcc2e6..d7bc00e318f7 100644 --- a/test/CodeGen/ARM64/collect-loh-str.ll +++ b/test/CodeGen/AArch64/arm64-collect-loh-str.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-apple-ios -O2 -arm64-collect-loh -arm64-collect-loh-bb-only=false < %s -o - | FileCheck %s +; RUN: llc -mtriple=arm64-apple-ios -O2 -aarch64-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s ; Test case for . ; AdrpAddStr cannot be used when the store uses same ; register as address and value. Indeed, the related diff --git a/test/CodeGen/ARM64/collect-loh.ll b/test/CodeGen/AArch64/arm64-collect-loh.ll similarity index 85% rename from test/CodeGen/ARM64/collect-loh.ll rename to test/CodeGen/AArch64/arm64-collect-loh.ll index e92690b04f8c..6d73daac6209 100644 --- a/test/CodeGen/ARM64/collect-loh.ll +++ b/test/CodeGen/AArch64/arm64-collect-loh.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=arm64-apple-ios -O2 -arm64-collect-loh -arm64-collect-loh-bb-only=false < %s -o - | FileCheck %s -; RUN: llc -mtriple=arm64-linux-gnu -O2 -arm64-collect-loh -arm64-collect-loh-bb-only=false < %s -o - | FileCheck %s --check-prefix=CHECK-ELF +; RUN: llc -mtriple=arm64-apple-ios -O2 -aarch64-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s +; RUN: llc -mtriple=arm64-linux-gnu -O2 -aarch64-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s --check-prefix=CHECK-ELF ; CHECK-ELF-NOT: .loh ; CHECK-ELF-NOT: AdrpAdrp diff --git a/test/CodeGen/ARM64/complex-copy-noneon.ll b/test/CodeGen/AArch64/arm64-complex-copy-noneon.ll similarity index 100% rename from test/CodeGen/ARM64/complex-copy-noneon.ll rename to test/CodeGen/AArch64/arm64-complex-copy-noneon.ll diff --git a/test/CodeGen/ARM64/complex-ret.ll b/test/CodeGen/AArch64/arm64-complex-ret.ll similarity index 100% rename from test/CodeGen/ARM64/complex-ret.ll rename to test/CodeGen/AArch64/arm64-complex-ret.ll diff --git a/test/CodeGen/ARM64/const-addr.ll b/test/CodeGen/AArch64/arm64-const-addr.ll similarity index 100% rename from test/CodeGen/ARM64/const-addr.ll rename to test/CodeGen/AArch64/arm64-const-addr.ll diff --git a/test/CodeGen/ARM64/convert-v2f64-v2i32.ll b/test/CodeGen/AArch64/arm64-convert-v2f64-v2i32.ll similarity index 86% rename from test/CodeGen/ARM64/convert-v2f64-v2i32.ll rename to test/CodeGen/AArch64/arm64-convert-v2f64-v2i32.ll index 1a07c986557d..d862b1e19431 100644 --- a/test/CodeGen/ARM64/convert-v2f64-v2i32.ll +++ b/test/CodeGen/AArch64/arm64-convert-v2f64-v2i32.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s ; CHECK: fptosi_1 ; CHECK: fcvtzs.2d diff --git a/test/CodeGen/ARM64/convert-v2i32-v2f64.ll b/test/CodeGen/AArch64/arm64-convert-v2i32-v2f64.ll similarity index 90% rename from test/CodeGen/ARM64/convert-v2i32-v2f64.ll rename to test/CodeGen/AArch64/arm64-convert-v2i32-v2f64.ll index 63129a4b830f..daaf1e0f87df 100644 --- a/test/CodeGen/ARM64/convert-v2i32-v2f64.ll +++ b/test/CodeGen/AArch64/arm64-convert-v2i32-v2f64.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define <2 x double> @f1(<2 x i32> %v) nounwind readnone { ; CHECK-LABEL: f1: diff --git a/test/CodeGen/ARM64/copy-tuple.ll b/test/CodeGen/AArch64/arm64-copy-tuple.ll similarity index 69% rename from test/CodeGen/ARM64/copy-tuple.ll rename to test/CodeGen/AArch64/arm64-copy-tuple.ll index f99819312467..1803787d729f 100644 --- a/test/CodeGen/ARM64/copy-tuple.ll +++ b/test/CodeGen/AArch64/arm64-copy-tuple.ll @@ -13,14 +13,14 @@ define void @test_D1D2_from_D0D1(i8* %addr) #0 { ; CHECK: mov.8b v1, v0 entry: %addr_v8i8 = bitcast i8* %addr to <8 x i8>* - %vec = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) + %vec = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0 %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1 tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() - tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) + tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) tail call void asm sideeffect "", "~{v0},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() - tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) + tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) ret void } @@ -30,14 +30,14 @@ define void @test_D0D1_from_D1D2(i8* %addr) #0 { ; CHECK: mov.8b v1, v2 entry: %addr_v8i8 = bitcast i8* %addr to <8 x i8>* - %vec = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) + %vec = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0 %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1 tail call void asm sideeffect "", "~{v0},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() - tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) + tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() - tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) + tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) ret void } @@ -47,14 +47,14 @@ define void @test_D0D1_from_D31D0(i8* %addr) #0 { ; CHECK: mov.8b v0, v31 entry: %addr_v8i8 = bitcast i8* %addr to <8 x i8>* - %vec = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) + %vec = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0 %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1 tail call void asm sideeffect "", "~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30}"() - tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) + tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() - tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) + tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) ret void } @@ -64,14 +64,14 @@ define void @test_D31D0_from_D0D1(i8* %addr) #0 { ; CHECK: mov.8b v0, v1 entry: %addr_v8i8 = bitcast i8* %addr to <8 x i8>* - %vec = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) + %vec = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0 %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1 tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() - tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) + tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) tail call void asm sideeffect "", "~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30}"() - tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) + tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) ret void } @@ -82,16 +82,16 @@ define void @test_D2D3D4_from_D0D1D2(i8* %addr) #0 { ; CHECK: mov.8b v2, v0 entry: %addr_v8i8 = bitcast i8* %addr to <8 x i8>* - %vec = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) + %vec = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) %vec0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 0 %vec1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 1 %vec2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 2 tail call void asm sideeffect "", "~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() - tail call void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, <8 x i8> %vec2, i8* %addr) + tail call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, <8 x i8> %vec2, i8* %addr) tail call void asm sideeffect "", "~{v0},~{v1},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() - tail call void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, <8 x i8> %vec2, i8* %addr) + tail call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, <8 x i8> %vec2, i8* %addr) ret void } @@ -102,15 +102,15 @@ define void @test_Q0Q1Q2_from_Q1Q2Q3(i8* %addr) #0 { ; CHECK: mov.16b v2, v3 entry: %addr_v16i8 = bitcast i8* %addr to <16 x i8>* - %vec = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3.v16i8.p0v16i8(<16 x i8>* %addr_v16i8) + %vec = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* %addr_v16i8) %vec0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 0 %vec1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 1 %vec2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 2 tail call void asm sideeffect "", "~{v0},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() - tail call void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, i8* %addr) + tail call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, i8* %addr) tail call void asm sideeffect "", "~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() - tail call void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, i8* %addr) + tail call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, i8* %addr) ret void } @@ -121,26 +121,26 @@ define void @test_Q1Q2Q3Q4_from_Q30Q31Q0Q1(i8* %addr) #0 { ; CHECK: mov.16b v2, v31 ; CHECK: mov.16b v1, v30 %addr_v16i8 = bitcast i8* %addr to <16 x i8>* - %vec = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0v16i8(<16 x i8>* %addr_v16i8) + %vec = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* %addr_v16i8) %vec0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 0 %vec1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 1 %vec2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 2 %vec3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 3 tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}"() - tail call void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, i8* %addr) + tail call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, i8* %addr) tail call void asm sideeffect "", "~{v0},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() - tail call void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, i8* %addr) + tail call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, i8* %addr) ret void } -declare { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>*) -declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3.v8i8.p0v8i8(<8 x i8>*) -declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3.v16i8.p0v16i8(<16 x i8>*) -declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0v16i8(<16 x i8>*) +declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>*) +declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>*) +declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>*) +declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>*) -declare void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) -declare void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) -declare void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) -declare void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) +declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) +declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) +declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) +declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) diff --git a/test/CodeGen/ARM64/crc32.ll b/test/CodeGen/AArch64/arm64-crc32.ll similarity index 58% rename from test/CodeGen/ARM64/crc32.ll rename to test/CodeGen/AArch64/arm64-crc32.ll index 5d759dcce71c..d3099e6bb132 100644 --- a/test/CodeGen/ARM64/crc32.ll +++ b/test/CodeGen/AArch64/arm64-crc32.ll @@ -4,7 +4,7 @@ define i32 @test_crc32b(i32 %cur, i8 %next) { ; CHECK-LABEL: test_crc32b: ; CHECK: crc32b w0, w0, w1 %bits = zext i8 %next to i32 - %val = call i32 @llvm.arm64.crc32b(i32 %cur, i32 %bits) + %val = call i32 @llvm.aarch64.crc32b(i32 %cur, i32 %bits) ret i32 %val } @@ -12,21 +12,21 @@ define i32 @test_crc32h(i32 %cur, i16 %next) { ; CHECK-LABEL: test_crc32h: ; CHECK: crc32h w0, w0, w1 %bits = zext i16 %next to i32 - %val = call i32 @llvm.arm64.crc32h(i32 %cur, i32 %bits) + %val = call i32 @llvm.aarch64.crc32h(i32 %cur, i32 %bits) ret i32 %val } define i32 @test_crc32w(i32 %cur, i32 %next) { ; CHECK-LABEL: test_crc32w: ; CHECK: crc32w w0, w0, w1 - %val = call i32 @llvm.arm64.crc32w(i32 %cur, i32 %next) + %val = call i32 @llvm.aarch64.crc32w(i32 %cur, i32 %next) ret i32 %val } define i32 @test_crc32x(i32 %cur, i64 %next) { ; CHECK-LABEL: test_crc32x: ; CHECK: crc32x w0, w0, x1 - %val = call i32 @llvm.arm64.crc32x(i32 %cur, i64 %next) + %val = call i32 @llvm.aarch64.crc32x(i32 %cur, i64 %next) ret i32 %val } @@ -34,7 +34,7 @@ define i32 @test_crc32cb(i32 %cur, i8 %next) { ; CHECK-LABEL: test_crc32cb: ; CHECK: crc32cb w0, w0, w1 %bits = zext i8 %next to i32 - %val = call i32 @llvm.arm64.crc32cb(i32 %cur, i32 %bits) + %val = call i32 @llvm.aarch64.crc32cb(i32 %cur, i32 %bits) ret i32 %val } @@ -42,30 +42,30 @@ define i32 @test_crc32ch(i32 %cur, i16 %next) { ; CHECK-LABEL: test_crc32ch: ; CHECK: crc32ch w0, w0, w1 %bits = zext i16 %next to i32 - %val = call i32 @llvm.arm64.crc32ch(i32 %cur, i32 %bits) + %val = call i32 @llvm.aarch64.crc32ch(i32 %cur, i32 %bits) ret i32 %val } define i32 @test_crc32cw(i32 %cur, i32 %next) { ; CHECK-LABEL: test_crc32cw: ; CHECK: crc32cw w0, w0, w1 - %val = call i32 @llvm.arm64.crc32cw(i32 %cur, i32 %next) + %val = call i32 @llvm.aarch64.crc32cw(i32 %cur, i32 %next) ret i32 %val } define i32 @test_crc32cx(i32 %cur, i64 %next) { ; CHECK-LABEL: test_crc32cx: ; CHECK: crc32cx w0, w0, x1 - %val = call i32 @llvm.arm64.crc32cx(i32 %cur, i64 %next) + %val = call i32 @llvm.aarch64.crc32cx(i32 %cur, i64 %next) ret i32 %val } -declare i32 @llvm.arm64.crc32b(i32, i32) -declare i32 @llvm.arm64.crc32h(i32, i32) -declare i32 @llvm.arm64.crc32w(i32, i32) -declare i32 @llvm.arm64.crc32x(i32, i64) +declare i32 @llvm.aarch64.crc32b(i32, i32) +declare i32 @llvm.aarch64.crc32h(i32, i32) +declare i32 @llvm.aarch64.crc32w(i32, i32) +declare i32 @llvm.aarch64.crc32x(i32, i64) -declare i32 @llvm.arm64.crc32cb(i32, i32) -declare i32 @llvm.arm64.crc32ch(i32, i32) -declare i32 @llvm.arm64.crc32cw(i32, i32) -declare i32 @llvm.arm64.crc32cx(i32, i64) +declare i32 @llvm.aarch64.crc32cb(i32, i32) +declare i32 @llvm.aarch64.crc32ch(i32, i32) +declare i32 @llvm.aarch64.crc32cw(i32, i32) +declare i32 @llvm.aarch64.crc32cx(i32, i64) diff --git a/test/CodeGen/ARM64/crypto.ll b/test/CodeGen/AArch64/arm64-crypto.ll similarity index 50% rename from test/CodeGen/ARM64/crypto.ll rename to test/CodeGen/AArch64/arm64-crypto.ll index 0020865bcd5f..2908b336b1bd 100644 --- a/test/CodeGen/ARM64/crypto.ll +++ b/test/CodeGen/AArch64/arm64-crypto.ll @@ -1,50 +1,50 @@ -; RUN: llc -march=arm64 -mattr=crypto -arm64-neon-syntax=apple -o - %s | FileCheck %s +; RUN: llc -march=arm64 -mattr=crypto -aarch64-neon-syntax=apple -o - %s | FileCheck %s -declare <16 x i8> @llvm.arm64.crypto.aese(<16 x i8> %data, <16 x i8> %key) -declare <16 x i8> @llvm.arm64.crypto.aesd(<16 x i8> %data, <16 x i8> %key) -declare <16 x i8> @llvm.arm64.crypto.aesmc(<16 x i8> %data) -declare <16 x i8> @llvm.arm64.crypto.aesimc(<16 x i8> %data) +declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data, <16 x i8> %key) +declare <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %data, <16 x i8> %key) +declare <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %data) +declare <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %data) define <16 x i8> @test_aese(<16 x i8> %data, <16 x i8> %key) { ; CHECK-LABEL: test_aese: ; CHECK: aese.16b v0, v1 - %res = call <16 x i8> @llvm.arm64.crypto.aese(<16 x i8> %data, <16 x i8> %key) + %res = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data, <16 x i8> %key) ret <16 x i8> %res } define <16 x i8> @test_aesd(<16 x i8> %data, <16 x i8> %key) { ; CHECK-LABEL: test_aesd: ; CHECK: aesd.16b v0, v1 - %res = call <16 x i8> @llvm.arm64.crypto.aesd(<16 x i8> %data, <16 x i8> %key) + %res = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %data, <16 x i8> %key) ret <16 x i8> %res } define <16 x i8> @test_aesmc(<16 x i8> %data) { ; CHECK-LABEL: test_aesmc: ; CHECK: aesmc.16b v0, v0 - %res = call <16 x i8> @llvm.arm64.crypto.aesmc(<16 x i8> %data) + %res = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %data) ret <16 x i8> %res } define <16 x i8> @test_aesimc(<16 x i8> %data) { ; CHECK-LABEL: test_aesimc: ; CHECK: aesimc.16b v0, v0 - %res = call <16 x i8> @llvm.arm64.crypto.aesimc(<16 x i8> %data) + %res = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %data) ret <16 x i8> %res } -declare <4 x i32> @llvm.arm64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) -declare <4 x i32> @llvm.arm64.crypto.sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) -declare <4 x i32> @llvm.arm64.crypto.sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) -declare i32 @llvm.arm64.crypto.sha1h(i32 %hash_e) -declare <4 x i32> @llvm.arm64.crypto.sha1su0(<4 x i32> %wk0_3, <4 x i32> %wk4_7, <4 x i32> %wk8_11) -declare <4 x i32> @llvm.arm64.crypto.sha1su1(<4 x i32> %wk0_3, <4 x i32> %wk12_15) +declare <4 x i32> @llvm.aarch64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) +declare <4 x i32> @llvm.aarch64.crypto.sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) +declare <4 x i32> @llvm.aarch64.crypto.sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) +declare i32 @llvm.aarch64.crypto.sha1h(i32 %hash_e) +declare <4 x i32> @llvm.aarch64.crypto.sha1su0(<4 x i32> %wk0_3, <4 x i32> %wk4_7, <4 x i32> %wk8_11) +declare <4 x i32> @llvm.aarch64.crypto.sha1su1(<4 x i32> %wk0_3, <4 x i32> %wk12_15) define <4 x i32> @test_sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) { ; CHECK-LABEL: test_sha1c: ; CHECK: fmov [[HASH_E:s[0-9]+]], w0 ; CHECK: sha1c.4s q0, [[HASH_E]], v1 - %res = call <4 x i32> @llvm.arm64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) + %res = call <4 x i32> @llvm.aarch64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) ret <4 x i32> %res } @@ -55,9 +55,9 @@ define <4 x i32> @test_sha1c_in_a_row(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i3 ; CHECK: sha1c.4s q[[SHA1RES:[0-9]+]], [[HASH_E]], v1 ; CHECK-NOT: fmov ; CHECK: sha1c.4s q0, s[[SHA1RES]], v1 - %res = call <4 x i32> @llvm.arm64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) + %res = call <4 x i32> @llvm.aarch64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) %extract = extractelement <4 x i32> %res, i32 0 - %res2 = call <4 x i32> @llvm.arm64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %extract, <4 x i32> %wk) + %res2 = call <4 x i32> @llvm.aarch64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %extract, <4 x i32> %wk) ret <4 x i32> %res2 } @@ -65,7 +65,7 @@ define <4 x i32> @test_sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) { ; CHECK-LABEL: test_sha1p: ; CHECK: fmov [[HASH_E:s[0-9]+]], w0 ; CHECK: sha1p.4s q0, [[HASH_E]], v1 - %res = call <4 x i32> @llvm.arm64.crypto.sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) + %res = call <4 x i32> @llvm.aarch64.crypto.sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) ret <4 x i32> %res } @@ -73,7 +73,7 @@ define <4 x i32> @test_sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) { ; CHECK-LABEL: test_sha1m: ; CHECK: fmov [[HASH_E:s[0-9]+]], w0 ; CHECK: sha1m.4s q0, [[HASH_E]], v1 - %res = call <4 x i32> @llvm.arm64.crypto.sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) + %res = call <4 x i32> @llvm.aarch64.crypto.sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) ret <4 x i32> %res } @@ -82,33 +82,33 @@ define i32 @test_sha1h(i32 %hash_e) { ; CHECK: fmov [[HASH_E:s[0-9]+]], w0 ; CHECK: sha1h [[RES:s[0-9]+]], [[HASH_E]] ; CHECK: fmov w0, [[RES]] - %res = call i32 @llvm.arm64.crypto.sha1h(i32 %hash_e) + %res = call i32 @llvm.aarch64.crypto.sha1h(i32 %hash_e) ret i32 %res } define <4 x i32> @test_sha1su0(<4 x i32> %wk0_3, <4 x i32> %wk4_7, <4 x i32> %wk8_11) { ; CHECK-LABEL: test_sha1su0: ; CHECK: sha1su0.4s v0, v1, v2 - %res = call <4 x i32> @llvm.arm64.crypto.sha1su0(<4 x i32> %wk0_3, <4 x i32> %wk4_7, <4 x i32> %wk8_11) + %res = call <4 x i32> @llvm.aarch64.crypto.sha1su0(<4 x i32> %wk0_3, <4 x i32> %wk4_7, <4 x i32> %wk8_11) ret <4 x i32> %res } define <4 x i32> @test_sha1su1(<4 x i32> %wk0_3, <4 x i32> %wk12_15) { ; CHECK-LABEL: test_sha1su1: ; CHECK: sha1su1.4s v0, v1 - %res = call <4 x i32> @llvm.arm64.crypto.sha1su1(<4 x i32> %wk0_3, <4 x i32> %wk12_15) + %res = call <4 x i32> @llvm.aarch64.crypto.sha1su1(<4 x i32> %wk0_3, <4 x i32> %wk12_15) ret <4 x i32> %res } -declare <4 x i32> @llvm.arm64.crypto.sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) -declare <4 x i32> @llvm.arm64.crypto.sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk) -declare <4 x i32> @llvm.arm64.crypto.sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7) -declare <4 x i32> @llvm.arm64.crypto.sha256su1(<4 x i32> %w0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) +declare <4 x i32> @llvm.aarch64.crypto.sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) +declare <4 x i32> @llvm.aarch64.crypto.sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk) +declare <4 x i32> @llvm.aarch64.crypto.sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7) +declare <4 x i32> @llvm.aarch64.crypto.sha256su1(<4 x i32> %w0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) define <4 x i32> @test_sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) { ; CHECK-LABEL: test_sha256h: ; CHECK: sha256h.4s q0, q1, v2 - %res = call <4 x i32> @llvm.arm64.crypto.sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) + %res = call <4 x i32> @llvm.aarch64.crypto.sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) ret <4 x i32> %res } @@ -116,20 +116,20 @@ define <4 x i32> @test_sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x ; CHECK-LABEL: test_sha256h2: ; CHECK: sha256h2.4s q0, q1, v2 - %res = call <4 x i32> @llvm.arm64.crypto.sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk) + %res = call <4 x i32> @llvm.aarch64.crypto.sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk) ret <4 x i32> %res } define <4 x i32> @test_sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7) { ; CHECK-LABEL: test_sha256su0: ; CHECK: sha256su0.4s v0, v1 - %res = call <4 x i32> @llvm.arm64.crypto.sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7) + %res = call <4 x i32> @llvm.aarch64.crypto.sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7) ret <4 x i32> %res } define <4 x i32> @test_sha256su1(<4 x i32> %w0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) { ; CHECK-LABEL: test_sha256su1: ; CHECK: sha256su1.4s v0, v1, v2 - %res = call <4 x i32> @llvm.arm64.crypto.sha256su1(<4 x i32> %w0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) + %res = call <4 x i32> @llvm.aarch64.crypto.sha256su1(<4 x i32> %w0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) ret <4 x i32> %res } diff --git a/test/CodeGen/ARM64/cse.ll b/test/CodeGen/AArch64/arm64-cse.ll similarity index 100% rename from test/CodeGen/ARM64/cse.ll rename to test/CodeGen/AArch64/arm64-cse.ll diff --git a/test/CodeGen/ARM64/csel.ll b/test/CodeGen/AArch64/arm64-csel.ll similarity index 100% rename from test/CodeGen/ARM64/csel.ll rename to test/CodeGen/AArch64/arm64-csel.ll diff --git a/test/CodeGen/ARM64/cvt.ll b/test/CodeGen/AArch64/arm64-cvt.ll similarity index 52% rename from test/CodeGen/ARM64/cvt.ll rename to test/CodeGen/AArch64/arm64-cvt.ll index b55a42fdf853..420a8bc04833 100644 --- a/test/CodeGen/ARM64/cvt.ll +++ b/test/CodeGen/AArch64/arm64-cvt.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s ; ; Floating-point scalar convert to signed integer (to nearest with ties to away) @@ -7,7 +7,7 @@ define i32 @fcvtas_1w1s(float %A) nounwind { ;CHECK-LABEL: fcvtas_1w1s: ;CHECK: fcvtas w0, s0 ;CHECK-NEXT: ret - %tmp3 = call i32 @llvm.arm64.neon.fcvtas.i32.f32(float %A) + %tmp3 = call i32 @llvm.aarch64.neon.fcvtas.i32.f32(float %A) ret i32 %tmp3 } @@ -15,7 +15,7 @@ define i64 @fcvtas_1x1s(float %A) nounwind { ;CHECK-LABEL: fcvtas_1x1s: ;CHECK: fcvtas x0, s0 ;CHECK-NEXT: ret - %tmp3 = call i64 @llvm.arm64.neon.fcvtas.i64.f32(float %A) + %tmp3 = call i64 @llvm.aarch64.neon.fcvtas.i64.f32(float %A) ret i64 %tmp3 } @@ -23,7 +23,7 @@ define i32 @fcvtas_1w1d(double %A) nounwind { ;CHECK-LABEL: fcvtas_1w1d: ;CHECK: fcvtas w0, d0 ;CHECK-NEXT: ret - %tmp3 = call i32 @llvm.arm64.neon.fcvtas.i32.f64(double %A) + %tmp3 = call i32 @llvm.aarch64.neon.fcvtas.i32.f64(double %A) ret i32 %tmp3 } @@ -31,14 +31,14 @@ define i64 @fcvtas_1x1d(double %A) nounwind { ;CHECK-LABEL: fcvtas_1x1d: ;CHECK: fcvtas x0, d0 ;CHECK-NEXT: ret - %tmp3 = call i64 @llvm.arm64.neon.fcvtas.i64.f64(double %A) + %tmp3 = call i64 @llvm.aarch64.neon.fcvtas.i64.f64(double %A) ret i64 %tmp3 } -declare i32 @llvm.arm64.neon.fcvtas.i32.f32(float) nounwind readnone -declare i64 @llvm.arm64.neon.fcvtas.i64.f32(float) nounwind readnone -declare i32 @llvm.arm64.neon.fcvtas.i32.f64(double) nounwind readnone -declare i64 @llvm.arm64.neon.fcvtas.i64.f64(double) nounwind readnone +declare i32 @llvm.aarch64.neon.fcvtas.i32.f32(float) nounwind readnone +declare i64 @llvm.aarch64.neon.fcvtas.i64.f32(float) nounwind readnone +declare i32 @llvm.aarch64.neon.fcvtas.i32.f64(double) nounwind readnone +declare i64 @llvm.aarch64.neon.fcvtas.i64.f64(double) nounwind readnone ; ; Floating-point scalar convert to unsigned integer @@ -47,7 +47,7 @@ define i32 @fcvtau_1w1s(float %A) nounwind { ;CHECK-LABEL: fcvtau_1w1s: ;CHECK: fcvtau w0, s0 ;CHECK-NEXT: ret - %tmp3 = call i32 @llvm.arm64.neon.fcvtau.i32.f32(float %A) + %tmp3 = call i32 @llvm.aarch64.neon.fcvtau.i32.f32(float %A) ret i32 %tmp3 } @@ -55,7 +55,7 @@ define i64 @fcvtau_1x1s(float %A) nounwind { ;CHECK-LABEL: fcvtau_1x1s: ;CHECK: fcvtau x0, s0 ;CHECK-NEXT: ret - %tmp3 = call i64 @llvm.arm64.neon.fcvtau.i64.f32(float %A) + %tmp3 = call i64 @llvm.aarch64.neon.fcvtau.i64.f32(float %A) ret i64 %tmp3 } @@ -63,7 +63,7 @@ define i32 @fcvtau_1w1d(double %A) nounwind { ;CHECK-LABEL: fcvtau_1w1d: ;CHECK: fcvtau w0, d0 ;CHECK-NEXT: ret - %tmp3 = call i32 @llvm.arm64.neon.fcvtau.i32.f64(double %A) + %tmp3 = call i32 @llvm.aarch64.neon.fcvtau.i32.f64(double %A) ret i32 %tmp3 } @@ -71,14 +71,14 @@ define i64 @fcvtau_1x1d(double %A) nounwind { ;CHECK-LABEL: fcvtau_1x1d: ;CHECK: fcvtau x0, d0 ;CHECK-NEXT: ret - %tmp3 = call i64 @llvm.arm64.neon.fcvtau.i64.f64(double %A) + %tmp3 = call i64 @llvm.aarch64.neon.fcvtau.i64.f64(double %A) ret i64 %tmp3 } -declare i32 @llvm.arm64.neon.fcvtau.i32.f32(float) nounwind readnone -declare i64 @llvm.arm64.neon.fcvtau.i64.f32(float) nounwind readnone -declare i32 @llvm.arm64.neon.fcvtau.i32.f64(double) nounwind readnone -declare i64 @llvm.arm64.neon.fcvtau.i64.f64(double) nounwind readnone +declare i32 @llvm.aarch64.neon.fcvtau.i32.f32(float) nounwind readnone +declare i64 @llvm.aarch64.neon.fcvtau.i64.f32(float) nounwind readnone +declare i32 @llvm.aarch64.neon.fcvtau.i32.f64(double) nounwind readnone +declare i64 @llvm.aarch64.neon.fcvtau.i64.f64(double) nounwind readnone ; ; Floating-point scalar convert to signed integer (toward -Inf) @@ -87,7 +87,7 @@ define i32 @fcvtms_1w1s(float %A) nounwind { ;CHECK-LABEL: fcvtms_1w1s: ;CHECK: fcvtms w0, s0 ;CHECK-NEXT: ret - %tmp3 = call i32 @llvm.arm64.neon.fcvtms.i32.f32(float %A) + %tmp3 = call i32 @llvm.aarch64.neon.fcvtms.i32.f32(float %A) ret i32 %tmp3 } @@ -95,7 +95,7 @@ define i64 @fcvtms_1x1s(float %A) nounwind { ;CHECK-LABEL: fcvtms_1x1s: ;CHECK: fcvtms x0, s0 ;CHECK-NEXT: ret - %tmp3 = call i64 @llvm.arm64.neon.fcvtms.i64.f32(float %A) + %tmp3 = call i64 @llvm.aarch64.neon.fcvtms.i64.f32(float %A) ret i64 %tmp3 } @@ -103,7 +103,7 @@ define i32 @fcvtms_1w1d(double %A) nounwind { ;CHECK-LABEL: fcvtms_1w1d: ;CHECK: fcvtms w0, d0 ;CHECK-NEXT: ret - %tmp3 = call i32 @llvm.arm64.neon.fcvtms.i32.f64(double %A) + %tmp3 = call i32 @llvm.aarch64.neon.fcvtms.i32.f64(double %A) ret i32 %tmp3 } @@ -111,14 +111,14 @@ define i64 @fcvtms_1x1d(double %A) nounwind { ;CHECK-LABEL: fcvtms_1x1d: ;CHECK: fcvtms x0, d0 ;CHECK-NEXT: ret - %tmp3 = call i64 @llvm.arm64.neon.fcvtms.i64.f64(double %A) + %tmp3 = call i64 @llvm.aarch64.neon.fcvtms.i64.f64(double %A) ret i64 %tmp3 } -declare i32 @llvm.arm64.neon.fcvtms.i32.f32(float) nounwind readnone -declare i64 @llvm.arm64.neon.fcvtms.i64.f32(float) nounwind readnone -declare i32 @llvm.arm64.neon.fcvtms.i32.f64(double) nounwind readnone -declare i64 @llvm.arm64.neon.fcvtms.i64.f64(double) nounwind readnone +declare i32 @llvm.aarch64.neon.fcvtms.i32.f32(float) nounwind readnone +declare i64 @llvm.aarch64.neon.fcvtms.i64.f32(float) nounwind readnone +declare i32 @llvm.aarch64.neon.fcvtms.i32.f64(double) nounwind readnone +declare i64 @llvm.aarch64.neon.fcvtms.i64.f64(double) nounwind readnone ; ; Floating-point scalar convert to unsigned integer (toward -Inf) @@ -127,7 +127,7 @@ define i32 @fcvtmu_1w1s(float %A) nounwind { ;CHECK-LABEL: fcvtmu_1w1s: ;CHECK: fcvtmu w0, s0 ;CHECK-NEXT: ret - %tmp3 = call i32 @llvm.arm64.neon.fcvtmu.i32.f32(float %A) + %tmp3 = call i32 @llvm.aarch64.neon.fcvtmu.i32.f32(float %A) ret i32 %tmp3 } @@ -135,7 +135,7 @@ define i64 @fcvtmu_1x1s(float %A) nounwind { ;CHECK-LABEL: fcvtmu_1x1s: ;CHECK: fcvtmu x0, s0 ;CHECK-NEXT: ret - %tmp3 = call i64 @llvm.arm64.neon.fcvtmu.i64.f32(float %A) + %tmp3 = call i64 @llvm.aarch64.neon.fcvtmu.i64.f32(float %A) ret i64 %tmp3 } @@ -143,7 +143,7 @@ define i32 @fcvtmu_1w1d(double %A) nounwind { ;CHECK-LABEL: fcvtmu_1w1d: ;CHECK: fcvtmu w0, d0 ;CHECK-NEXT: ret - %tmp3 = call i32 @llvm.arm64.neon.fcvtmu.i32.f64(double %A) + %tmp3 = call i32 @llvm.aarch64.neon.fcvtmu.i32.f64(double %A) ret i32 %tmp3 } @@ -151,14 +151,14 @@ define i64 @fcvtmu_1x1d(double %A) nounwind { ;CHECK-LABEL: fcvtmu_1x1d: ;CHECK: fcvtmu x0, d0 ;CHECK-NEXT: ret - %tmp3 = call i64 @llvm.arm64.neon.fcvtmu.i64.f64(double %A) + %tmp3 = call i64 @llvm.aarch64.neon.fcvtmu.i64.f64(double %A) ret i64 %tmp3 } -declare i32 @llvm.arm64.neon.fcvtmu.i32.f32(float) nounwind readnone -declare i64 @llvm.arm64.neon.fcvtmu.i64.f32(float) nounwind readnone -declare i32 @llvm.arm64.neon.fcvtmu.i32.f64(double) nounwind readnone -declare i64 @llvm.arm64.neon.fcvtmu.i64.f64(double) nounwind readnone +declare i32 @llvm.aarch64.neon.fcvtmu.i32.f32(float) nounwind readnone +declare i64 @llvm.aarch64.neon.fcvtmu.i64.f32(float) nounwind readnone +declare i32 @llvm.aarch64.neon.fcvtmu.i32.f64(double) nounwind readnone +declare i64 @llvm.aarch64.neon.fcvtmu.i64.f64(double) nounwind readnone ; ; Floating-point scalar convert to signed integer (to nearest with ties to even) @@ -167,7 +167,7 @@ define i32 @fcvtns_1w1s(float %A) nounwind { ;CHECK-LABEL: fcvtns_1w1s: ;CHECK: fcvtns w0, s0 ;CHECK-NEXT: ret - %tmp3 = call i32 @llvm.arm64.neon.fcvtns.i32.f32(float %A) + %tmp3 = call i32 @llvm.aarch64.neon.fcvtns.i32.f32(float %A) ret i32 %tmp3 } @@ -175,7 +175,7 @@ define i64 @fcvtns_1x1s(float %A) nounwind { ;CHECK-LABEL: fcvtns_1x1s: ;CHECK: fcvtns x0, s0 ;CHECK-NEXT: ret - %tmp3 = call i64 @llvm.arm64.neon.fcvtns.i64.f32(float %A) + %tmp3 = call i64 @llvm.aarch64.neon.fcvtns.i64.f32(float %A) ret i64 %tmp3 } @@ -183,7 +183,7 @@ define i32 @fcvtns_1w1d(double %A) nounwind { ;CHECK-LABEL: fcvtns_1w1d: ;CHECK: fcvtns w0, d0 ;CHECK-NEXT: ret - %tmp3 = call i32 @llvm.arm64.neon.fcvtns.i32.f64(double %A) + %tmp3 = call i32 @llvm.aarch64.neon.fcvtns.i32.f64(double %A) ret i32 %tmp3 } @@ -191,14 +191,14 @@ define i64 @fcvtns_1x1d(double %A) nounwind { ;CHECK-LABEL: fcvtns_1x1d: ;CHECK: fcvtns x0, d0 ;CHECK-NEXT: ret - %tmp3 = call i64 @llvm.arm64.neon.fcvtns.i64.f64(double %A) + %tmp3 = call i64 @llvm.aarch64.neon.fcvtns.i64.f64(double %A) ret i64 %tmp3 } -declare i32 @llvm.arm64.neon.fcvtns.i32.f32(float) nounwind readnone -declare i64 @llvm.arm64.neon.fcvtns.i64.f32(float) nounwind readnone -declare i32 @llvm.arm64.neon.fcvtns.i32.f64(double) nounwind readnone -declare i64 @llvm.arm64.neon.fcvtns.i64.f64(double) nounwind readnone +declare i32 @llvm.aarch64.neon.fcvtns.i32.f32(float) nounwind readnone +declare i64 @llvm.aarch64.neon.fcvtns.i64.f32(float) nounwind readnone +declare i32 @llvm.aarch64.neon.fcvtns.i32.f64(double) nounwind readnone +declare i64 @llvm.aarch64.neon.fcvtns.i64.f64(double) nounwind readnone ; ; Floating-point scalar convert to unsigned integer (to nearest with ties to even) @@ -207,7 +207,7 @@ define i32 @fcvtnu_1w1s(float %A) nounwind { ;CHECK-LABEL: fcvtnu_1w1s: ;CHECK: fcvtnu w0, s0 ;CHECK-NEXT: ret - %tmp3 = call i32 @llvm.arm64.neon.fcvtnu.i32.f32(float %A) + %tmp3 = call i32 @llvm.aarch64.neon.fcvtnu.i32.f32(float %A) ret i32 %tmp3 } @@ -215,7 +215,7 @@ define i64 @fcvtnu_1x1s(float %A) nounwind { ;CHECK-LABEL: fcvtnu_1x1s: ;CHECK: fcvtnu x0, s0 ;CHECK-NEXT: ret - %tmp3 = call i64 @llvm.arm64.neon.fcvtnu.i64.f32(float %A) + %tmp3 = call i64 @llvm.aarch64.neon.fcvtnu.i64.f32(float %A) ret i64 %tmp3 } @@ -223,7 +223,7 @@ define i32 @fcvtnu_1w1d(double %A) nounwind { ;CHECK-LABEL: fcvtnu_1w1d: ;CHECK: fcvtnu w0, d0 ;CHECK-NEXT: ret - %tmp3 = call i32 @llvm.arm64.neon.fcvtnu.i32.f64(double %A) + %tmp3 = call i32 @llvm.aarch64.neon.fcvtnu.i32.f64(double %A) ret i32 %tmp3 } @@ -231,14 +231,14 @@ define i64 @fcvtnu_1x1d(double %A) nounwind { ;CHECK-LABEL: fcvtnu_1x1d: ;CHECK: fcvtnu x0, d0 ;CHECK-NEXT: ret - %tmp3 = call i64 @llvm.arm64.neon.fcvtnu.i64.f64(double %A) + %tmp3 = call i64 @llvm.aarch64.neon.fcvtnu.i64.f64(double %A) ret i64 %tmp3 } -declare i32 @llvm.arm64.neon.fcvtnu.i32.f32(float) nounwind readnone -declare i64 @llvm.arm64.neon.fcvtnu.i64.f32(float) nounwind readnone -declare i32 @llvm.arm64.neon.fcvtnu.i32.f64(double) nounwind readnone -declare i64 @llvm.arm64.neon.fcvtnu.i64.f64(double) nounwind readnone +declare i32 @llvm.aarch64.neon.fcvtnu.i32.f32(float) nounwind readnone +declare i64 @llvm.aarch64.neon.fcvtnu.i64.f32(float) nounwind readnone +declare i32 @llvm.aarch64.neon.fcvtnu.i32.f64(double) nounwind readnone +declare i64 @llvm.aarch64.neon.fcvtnu.i64.f64(double) nounwind readnone ; ; Floating-point scalar convert to signed integer (toward +Inf) @@ -247,7 +247,7 @@ define i32 @fcvtps_1w1s(float %A) nounwind { ;CHECK-LABEL: fcvtps_1w1s: ;CHECK: fcvtps w0, s0 ;CHECK-NEXT: ret - %tmp3 = call i32 @llvm.arm64.neon.fcvtps.i32.f32(float %A) + %tmp3 = call i32 @llvm.aarch64.neon.fcvtps.i32.f32(float %A) ret i32 %tmp3 } @@ -255,7 +255,7 @@ define i64 @fcvtps_1x1s(float %A) nounwind { ;CHECK-LABEL: fcvtps_1x1s: ;CHECK: fcvtps x0, s0 ;CHECK-NEXT: ret - %tmp3 = call i64 @llvm.arm64.neon.fcvtps.i64.f32(float %A) + %tmp3 = call i64 @llvm.aarch64.neon.fcvtps.i64.f32(float %A) ret i64 %tmp3 } @@ -263,7 +263,7 @@ define i32 @fcvtps_1w1d(double %A) nounwind { ;CHECK-LABEL: fcvtps_1w1d: ;CHECK: fcvtps w0, d0 ;CHECK-NEXT: ret - %tmp3 = call i32 @llvm.arm64.neon.fcvtps.i32.f64(double %A) + %tmp3 = call i32 @llvm.aarch64.neon.fcvtps.i32.f64(double %A) ret i32 %tmp3 } @@ -271,14 +271,14 @@ define i64 @fcvtps_1x1d(double %A) nounwind { ;CHECK-LABEL: fcvtps_1x1d: ;CHECK: fcvtps x0, d0 ;CHECK-NEXT: ret - %tmp3 = call i64 @llvm.arm64.neon.fcvtps.i64.f64(double %A) + %tmp3 = call i64 @llvm.aarch64.neon.fcvtps.i64.f64(double %A) ret i64 %tmp3 } -declare i32 @llvm.arm64.neon.fcvtps.i32.f32(float) nounwind readnone -declare i64 @llvm.arm64.neon.fcvtps.i64.f32(float) nounwind readnone -declare i32 @llvm.arm64.neon.fcvtps.i32.f64(double) nounwind readnone -declare i64 @llvm.arm64.neon.fcvtps.i64.f64(double) nounwind readnone +declare i32 @llvm.aarch64.neon.fcvtps.i32.f32(float) nounwind readnone +declare i64 @llvm.aarch64.neon.fcvtps.i64.f32(float) nounwind readnone +declare i32 @llvm.aarch64.neon.fcvtps.i32.f64(double) nounwind readnone +declare i64 @llvm.aarch64.neon.fcvtps.i64.f64(double) nounwind readnone ; ; Floating-point scalar convert to unsigned integer (toward +Inf) @@ -287,7 +287,7 @@ define i32 @fcvtpu_1w1s(float %A) nounwind { ;CHECK-LABEL: fcvtpu_1w1s: ;CHECK: fcvtpu w0, s0 ;CHECK-NEXT: ret - %tmp3 = call i32 @llvm.arm64.neon.fcvtpu.i32.f32(float %A) + %tmp3 = call i32 @llvm.aarch64.neon.fcvtpu.i32.f32(float %A) ret i32 %tmp3 } @@ -295,7 +295,7 @@ define i64 @fcvtpu_1x1s(float %A) nounwind { ;CHECK-LABEL: fcvtpu_1x1s: ;CHECK: fcvtpu x0, s0 ;CHECK-NEXT: ret - %tmp3 = call i64 @llvm.arm64.neon.fcvtpu.i64.f32(float %A) + %tmp3 = call i64 @llvm.aarch64.neon.fcvtpu.i64.f32(float %A) ret i64 %tmp3 } @@ -303,7 +303,7 @@ define i32 @fcvtpu_1w1d(double %A) nounwind { ;CHECK-LABEL: fcvtpu_1w1d: ;CHECK: fcvtpu w0, d0 ;CHECK-NEXT: ret - %tmp3 = call i32 @llvm.arm64.neon.fcvtpu.i32.f64(double %A) + %tmp3 = call i32 @llvm.aarch64.neon.fcvtpu.i32.f64(double %A) ret i32 %tmp3 } @@ -311,14 +311,14 @@ define i64 @fcvtpu_1x1d(double %A) nounwind { ;CHECK-LABEL: fcvtpu_1x1d: ;CHECK: fcvtpu x0, d0 ;CHECK-NEXT: ret - %tmp3 = call i64 @llvm.arm64.neon.fcvtpu.i64.f64(double %A) + %tmp3 = call i64 @llvm.aarch64.neon.fcvtpu.i64.f64(double %A) ret i64 %tmp3 } -declare i32 @llvm.arm64.neon.fcvtpu.i32.f32(float) nounwind readnone -declare i64 @llvm.arm64.neon.fcvtpu.i64.f32(float) nounwind readnone -declare i32 @llvm.arm64.neon.fcvtpu.i32.f64(double) nounwind readnone -declare i64 @llvm.arm64.neon.fcvtpu.i64.f64(double) nounwind readnone +declare i32 @llvm.aarch64.neon.fcvtpu.i32.f32(float) nounwind readnone +declare i64 @llvm.aarch64.neon.fcvtpu.i64.f32(float) nounwind readnone +declare i32 @llvm.aarch64.neon.fcvtpu.i32.f64(double) nounwind readnone +declare i64 @llvm.aarch64.neon.fcvtpu.i64.f64(double) nounwind readnone ; ; Floating-point scalar convert to signed integer (toward zero) @@ -327,7 +327,7 @@ define i32 @fcvtzs_1w1s(float %A) nounwind { ;CHECK-LABEL: fcvtzs_1w1s: ;CHECK: fcvtzs w0, s0 ;CHECK-NEXT: ret - %tmp3 = call i32 @llvm.arm64.neon.fcvtzs.i32.f32(float %A) + %tmp3 = call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %A) ret i32 %tmp3 } @@ -335,7 +335,7 @@ define i64 @fcvtzs_1x1s(float %A) nounwind { ;CHECK-LABEL: fcvtzs_1x1s: ;CHECK: fcvtzs x0, s0 ;CHECK-NEXT: ret - %tmp3 = call i64 @llvm.arm64.neon.fcvtzs.i64.f32(float %A) + %tmp3 = call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %A) ret i64 %tmp3 } @@ -343,7 +343,7 @@ define i32 @fcvtzs_1w1d(double %A) nounwind { ;CHECK-LABEL: fcvtzs_1w1d: ;CHECK: fcvtzs w0, d0 ;CHECK-NEXT: ret - %tmp3 = call i32 @llvm.arm64.neon.fcvtzs.i32.f64(double %A) + %tmp3 = call i32 @llvm.aarch64.neon.fcvtzs.i32.f64(double %A) ret i32 %tmp3 } @@ -351,14 +351,14 @@ define i64 @fcvtzs_1x1d(double %A) nounwind { ;CHECK-LABEL: fcvtzs_1x1d: ;CHECK: fcvtzs x0, d0 ;CHECK-NEXT: ret - %tmp3 = call i64 @llvm.arm64.neon.fcvtzs.i64.f64(double %A) + %tmp3 = call i64 @llvm.aarch64.neon.fcvtzs.i64.f64(double %A) ret i64 %tmp3 } -declare i32 @llvm.arm64.neon.fcvtzs.i32.f32(float) nounwind readnone -declare i64 @llvm.arm64.neon.fcvtzs.i64.f32(float) nounwind readnone -declare i32 @llvm.arm64.neon.fcvtzs.i32.f64(double) nounwind readnone -declare i64 @llvm.arm64.neon.fcvtzs.i64.f64(double) nounwind readnone +declare i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float) nounwind readnone +declare i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float) nounwind readnone +declare i32 @llvm.aarch64.neon.fcvtzs.i32.f64(double) nounwind readnone +declare i64 @llvm.aarch64.neon.fcvtzs.i64.f64(double) nounwind readnone ; ; Floating-point scalar convert to unsigned integer (toward zero) @@ -367,7 +367,7 @@ define i32 @fcvtzu_1w1s(float %A) nounwind { ;CHECK-LABEL: fcvtzu_1w1s: ;CHECK: fcvtzu w0, s0 ;CHECK-NEXT: ret - %tmp3 = call i32 @llvm.arm64.neon.fcvtzu.i32.f32(float %A) + %tmp3 = call i32 @llvm.aarch64.neon.fcvtzu.i32.f32(float %A) ret i32 %tmp3 } @@ -375,7 +375,7 @@ define i64 @fcvtzu_1x1s(float %A) nounwind { ;CHECK-LABEL: fcvtzu_1x1s: ;CHECK: fcvtzu x0, s0 ;CHECK-NEXT: ret - %tmp3 = call i64 @llvm.arm64.neon.fcvtzu.i64.f32(float %A) + %tmp3 = call i64 @llvm.aarch64.neon.fcvtzu.i64.f32(float %A) ret i64 %tmp3 } @@ -383,7 +383,7 @@ define i32 @fcvtzu_1w1d(double %A) nounwind { ;CHECK-LABEL: fcvtzu_1w1d: ;CHECK: fcvtzu w0, d0 ;CHECK-NEXT: ret - %tmp3 = call i32 @llvm.arm64.neon.fcvtzu.i32.f64(double %A) + %tmp3 = call i32 @llvm.aarch64.neon.fcvtzu.i32.f64(double %A) ret i32 %tmp3 } @@ -391,11 +391,11 @@ define i64 @fcvtzu_1x1d(double %A) nounwind { ;CHECK-LABEL: fcvtzu_1x1d: ;CHECK: fcvtzu x0, d0 ;CHECK-NEXT: ret - %tmp3 = call i64 @llvm.arm64.neon.fcvtzu.i64.f64(double %A) + %tmp3 = call i64 @llvm.aarch64.neon.fcvtzu.i64.f64(double %A) ret i64 %tmp3 } -declare i32 @llvm.arm64.neon.fcvtzu.i32.f32(float) nounwind readnone -declare i64 @llvm.arm64.neon.fcvtzu.i64.f32(float) nounwind readnone -declare i32 @llvm.arm64.neon.fcvtzu.i32.f64(double) nounwind readnone -declare i64 @llvm.arm64.neon.fcvtzu.i64.f64(double) nounwind readnone +declare i32 @llvm.aarch64.neon.fcvtzu.i32.f32(float) nounwind readnone +declare i64 @llvm.aarch64.neon.fcvtzu.i64.f32(float) nounwind readnone +declare i32 @llvm.aarch64.neon.fcvtzu.i32.f64(double) nounwind readnone +declare i64 @llvm.aarch64.neon.fcvtzu.i64.f64(double) nounwind readnone diff --git a/test/CodeGen/ARM64/dagcombiner-convergence.ll b/test/CodeGen/AArch64/arm64-dagcombiner-convergence.ll similarity index 100% rename from test/CodeGen/ARM64/dagcombiner-convergence.ll rename to test/CodeGen/AArch64/arm64-dagcombiner-convergence.ll diff --git a/test/CodeGen/ARM64/dagcombiner-dead-indexed-load.ll b/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll similarity index 100% rename from test/CodeGen/ARM64/dagcombiner-dead-indexed-load.ll rename to test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll diff --git a/test/CodeGen/ARM64/dagcombiner-indexed-load.ll b/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll similarity index 100% rename from test/CodeGen/ARM64/dagcombiner-indexed-load.ll rename to test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll diff --git a/test/CodeGen/ARM64/dagcombiner-load-slicing.ll b/test/CodeGen/AArch64/arm64-dagcombiner-load-slicing.ll similarity index 100% rename from test/CodeGen/ARM64/dagcombiner-load-slicing.ll rename to test/CodeGen/AArch64/arm64-dagcombiner-load-slicing.ll diff --git a/test/CodeGen/ARM64/dead-def-frame-index.ll b/test/CodeGen/AArch64/arm64-dead-def-frame-index.ll similarity index 100% rename from test/CodeGen/ARM64/dead-def-frame-index.ll rename to test/CodeGen/AArch64/arm64-dead-def-frame-index.ll diff --git a/test/CodeGen/ARM64/dead-register-def-bug.ll b/test/CodeGen/AArch64/arm64-dead-register-def-bug.ll similarity index 100% rename from test/CodeGen/ARM64/dead-register-def-bug.ll rename to test/CodeGen/AArch64/arm64-dead-register-def-bug.ll diff --git a/test/CodeGen/ARM64/dup.ll b/test/CodeGen/AArch64/arm64-dup.ll similarity index 99% rename from test/CodeGen/ARM64/dup.ll rename to test/CodeGen/AArch64/arm64-dup.ll index 97774a7d18f4..0c56b46c4176 100644 --- a/test/CodeGen/ARM64/dup.ll +++ b/test/CodeGen/AArch64/arm64-dup.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -asm-verbose=false | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s define <8 x i8> @v_dup8(i8 %A) nounwind { ;CHECK-LABEL: v_dup8: diff --git a/test/CodeGen/ARM64/early-ifcvt.ll b/test/CodeGen/AArch64/arm64-early-ifcvt.ll similarity index 100% rename from test/CodeGen/ARM64/early-ifcvt.ll rename to test/CodeGen/AArch64/arm64-early-ifcvt.ll diff --git a/test/CodeGen/ARM64/elf-calls.ll b/test/CodeGen/AArch64/arm64-elf-calls.ll similarity index 100% rename from test/CodeGen/ARM64/elf-calls.ll rename to test/CodeGen/AArch64/arm64-elf-calls.ll diff --git a/test/CodeGen/ARM64/elf-constpool.ll b/test/CodeGen/AArch64/arm64-elf-constpool.ll similarity index 100% rename from test/CodeGen/ARM64/elf-constpool.ll rename to test/CodeGen/AArch64/arm64-elf-constpool.ll diff --git a/test/CodeGen/ARM64/elf-globals.ll b/test/CodeGen/AArch64/arm64-elf-globals.ll similarity index 100% rename from test/CodeGen/ARM64/elf-globals.ll rename to test/CodeGen/AArch64/arm64-elf-globals.ll diff --git a/test/CodeGen/ARM64/ext.ll b/test/CodeGen/AArch64/arm64-ext.ll similarity index 98% rename from test/CodeGen/ARM64/ext.ll rename to test/CodeGen/AArch64/arm64-ext.ll index d368eef172e8..67860de51b0f 100644 --- a/test/CodeGen/ARM64/ext.ll +++ b/test/CodeGen/AArch64/arm64-ext.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: test_vextd: diff --git a/test/CodeGen/ARM64/extend-int-to-fp.ll b/test/CodeGen/AArch64/arm64-extend-int-to-fp.ll similarity index 86% rename from test/CodeGen/ARM64/extend-int-to-fp.ll rename to test/CodeGen/AArch64/arm64-extend-int-to-fp.ll index 599a697a3103..048fdb083a41 100644 --- a/test/CodeGen/ARM64/extend-int-to-fp.ll +++ b/test/CodeGen/AArch64/arm64-extend-int-to-fp.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define <4 x float> @foo(<4 x i16> %a) nounwind { ; CHECK-LABEL: foo: diff --git a/test/CodeGen/ARM64/extend.ll b/test/CodeGen/AArch64/arm64-extend.ll similarity index 100% rename from test/CodeGen/ARM64/extend.ll rename to test/CodeGen/AArch64/arm64-extend.ll diff --git a/test/CodeGen/ARM64/extern-weak.ll b/test/CodeGen/AArch64/arm64-extern-weak.ll similarity index 100% rename from test/CodeGen/ARM64/extern-weak.ll rename to test/CodeGen/AArch64/arm64-extern-weak.ll diff --git a/test/CodeGen/ARM64/extload-knownzero.ll b/test/CodeGen/AArch64/arm64-extload-knownzero.ll similarity index 100% rename from test/CodeGen/ARM64/extload-knownzero.ll rename to test/CodeGen/AArch64/arm64-extload-knownzero.ll diff --git a/test/CodeGen/ARM64/extract.ll b/test/CodeGen/AArch64/arm64-extract.ll similarity index 95% rename from test/CodeGen/ARM64/extract.ll rename to test/CodeGen/AArch64/arm64-extract.ll index 02e42189bb4a..01984662d23a 100644 --- a/test/CodeGen/ARM64/extract.ll +++ b/test/CodeGen/AArch64/arm64-extract.ll @@ -1,4 +1,4 @@ -; RUN: llc -arm64-extr-generation=true -verify-machineinstrs < %s \ +; RUN: llc -aarch64-extr-generation=true -verify-machineinstrs < %s \ ; RUN: -march=arm64 | FileCheck %s define i64 @ror_i64(i64 %in) { diff --git a/test/CodeGen/ARM64/extract_subvector.ll b/test/CodeGen/AArch64/arm64-extract_subvector.ll similarity index 95% rename from test/CodeGen/ARM64/extract_subvector.ll rename to test/CodeGen/AArch64/arm64-extract_subvector.ll index 20c05fb23265..8b15a6453b2b 100644 --- a/test/CodeGen/ARM64/extract_subvector.ll +++ b/test/CodeGen/AArch64/arm64-extract_subvector.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s +; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s ; Extract of an upper half of a vector is an "ext.16b v0, v0, v0, #8" insn. diff --git a/test/CodeGen/ARM64/fast-isel-addr-offset.ll b/test/CodeGen/AArch64/arm64-fast-isel-addr-offset.ll similarity index 100% rename from test/CodeGen/ARM64/fast-isel-addr-offset.ll rename to test/CodeGen/AArch64/arm64-fast-isel-addr-offset.ll diff --git a/test/CodeGen/ARM64/fast-isel-alloca.ll b/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll similarity index 100% rename from test/CodeGen/ARM64/fast-isel-alloca.ll rename to test/CodeGen/AArch64/arm64-fast-isel-alloca.ll diff --git a/test/CodeGen/ARM64/fast-isel-br.ll b/test/CodeGen/AArch64/arm64-fast-isel-br.ll similarity index 100% rename from test/CodeGen/ARM64/fast-isel-br.ll rename to test/CodeGen/AArch64/arm64-fast-isel-br.ll diff --git a/test/CodeGen/ARM64/fast-isel-call.ll b/test/CodeGen/AArch64/arm64-fast-isel-call.ll similarity index 100% rename from test/CodeGen/ARM64/fast-isel-call.ll rename to test/CodeGen/AArch64/arm64-fast-isel-call.ll diff --git a/test/CodeGen/ARM64/fast-isel-conversion.ll b/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll similarity index 100% rename from test/CodeGen/ARM64/fast-isel-conversion.ll rename to test/CodeGen/AArch64/arm64-fast-isel-conversion.ll diff --git a/test/CodeGen/ARM64/fast-isel-fcmp.ll b/test/CodeGen/AArch64/arm64-fast-isel-fcmp.ll similarity index 100% rename from test/CodeGen/ARM64/fast-isel-fcmp.ll rename to test/CodeGen/AArch64/arm64-fast-isel-fcmp.ll diff --git a/test/CodeGen/ARM64/fast-isel-gv.ll b/test/CodeGen/AArch64/arm64-fast-isel-gv.ll similarity index 100% rename from test/CodeGen/ARM64/fast-isel-gv.ll rename to test/CodeGen/AArch64/arm64-fast-isel-gv.ll diff --git a/test/CodeGen/ARM64/fast-isel-icmp.ll b/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll similarity index 100% rename from test/CodeGen/ARM64/fast-isel-icmp.ll rename to test/CodeGen/AArch64/arm64-fast-isel-icmp.ll diff --git a/test/CodeGen/ARM64/fast-isel-indirectbr.ll b/test/CodeGen/AArch64/arm64-fast-isel-indirectbr.ll similarity index 100% rename from test/CodeGen/ARM64/fast-isel-indirectbr.ll rename to test/CodeGen/AArch64/arm64-fast-isel-indirectbr.ll diff --git a/test/CodeGen/ARM64/fast-isel-intrinsic.ll b/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll similarity index 100% rename from test/CodeGen/ARM64/fast-isel-intrinsic.ll rename to test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll diff --git a/test/CodeGen/ARM64/fast-isel-materialize.ll b/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll similarity index 100% rename from test/CodeGen/ARM64/fast-isel-materialize.ll rename to test/CodeGen/AArch64/arm64-fast-isel-materialize.ll diff --git a/test/CodeGen/ARM64/fast-isel-noconvert.ll b/test/CodeGen/AArch64/arm64-fast-isel-noconvert.ll similarity index 100% rename from test/CodeGen/ARM64/fast-isel-noconvert.ll rename to test/CodeGen/AArch64/arm64-fast-isel-noconvert.ll diff --git a/test/CodeGen/ARM64/fast-isel-rem.ll b/test/CodeGen/AArch64/arm64-fast-isel-rem.ll similarity index 100% rename from test/CodeGen/ARM64/fast-isel-rem.ll rename to test/CodeGen/AArch64/arm64-fast-isel-rem.ll diff --git a/test/CodeGen/ARM64/fast-isel-ret.ll b/test/CodeGen/AArch64/arm64-fast-isel-ret.ll similarity index 100% rename from test/CodeGen/ARM64/fast-isel-ret.ll rename to test/CodeGen/AArch64/arm64-fast-isel-ret.ll diff --git a/test/CodeGen/ARM64/fast-isel-select.ll b/test/CodeGen/AArch64/arm64-fast-isel-select.ll similarity index 100% rename from test/CodeGen/ARM64/fast-isel-select.ll rename to test/CodeGen/AArch64/arm64-fast-isel-select.ll diff --git a/test/CodeGen/ARM64/fast-isel.ll b/test/CodeGen/AArch64/arm64-fast-isel.ll similarity index 100% rename from test/CodeGen/ARM64/fast-isel.ll rename to test/CodeGen/AArch64/arm64-fast-isel.ll diff --git a/test/CodeGen/ARM64/fastcc-tailcall.ll b/test/CodeGen/AArch64/arm64-fastcc-tailcall.ll similarity index 100% rename from test/CodeGen/ARM64/fastcc-tailcall.ll rename to test/CodeGen/AArch64/arm64-fastcc-tailcall.ll diff --git a/test/CodeGen/ARM64/fastisel-gep-promote-before-add.ll b/test/CodeGen/AArch64/arm64-fastisel-gep-promote-before-add.ll similarity index 100% rename from test/CodeGen/ARM64/fastisel-gep-promote-before-add.ll rename to test/CodeGen/AArch64/arm64-fastisel-gep-promote-before-add.ll diff --git a/test/CodeGen/ARM64/fcmp-opt.ll b/test/CodeGen/AArch64/arm64-fcmp-opt.ll similarity index 98% rename from test/CodeGen/ARM64/fcmp-opt.ll rename to test/CodeGen/AArch64/arm64-fcmp-opt.ll index e79eb9c6fb1f..41027d4b5c74 100644 --- a/test/CodeGen/ARM64/fcmp-opt.ll +++ b/test/CodeGen/AArch64/arm64-fcmp-opt.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -mcpu=cyclone -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -mcpu=cyclone -aarch64-neon-syntax=apple | FileCheck %s ; rdar://10263824 define i1 @fcmp_float1(float %a) nounwind ssp { diff --git a/test/CodeGen/ARM64/fcopysign.ll b/test/CodeGen/AArch64/arm64-fcopysign.ll similarity index 100% rename from test/CodeGen/ARM64/fcopysign.ll rename to test/CodeGen/AArch64/arm64-fcopysign.ll diff --git a/test/CodeGen/ARM64/fixed-point-scalar-cvt-dagcombine.ll b/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll similarity index 62% rename from test/CodeGen/ARM64/fixed-point-scalar-cvt-dagcombine.ll rename to test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll index 77981f292bd8..e51c38b2b95e 100644 --- a/test/CodeGen/ARM64/fixed-point-scalar-cvt-dagcombine.ll +++ b/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s ; DAGCombine to transform a conversion of an extract_vector_elt to an ; extract_vector_elt of a conversion, which saves a round trip of copies @@ -8,8 +8,8 @@ define double @foo0(<2 x i64> %a) nounwind { ; CHECK: scvtf.2d [[REG:v[0-9]+]], v0, #9 ; CHECK-NEXT: ins.d v0[0], [[REG]][1] %vecext = extractelement <2 x i64> %a, i32 1 - %fcvt_n = tail call double @llvm.arm64.neon.vcvtfxs2fp.f64.i64(i64 %vecext, i32 9) + %fcvt_n = tail call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %vecext, i32 9) ret double %fcvt_n } -declare double @llvm.arm64.neon.vcvtfxs2fp.f64.i64(i64, i32) nounwind readnone +declare double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64, i32) nounwind readnone diff --git a/test/CodeGen/ARM64/fmadd.ll b/test/CodeGen/AArch64/arm64-fmadd.ll similarity index 100% rename from test/CodeGen/ARM64/fmadd.ll rename to test/CodeGen/AArch64/arm64-fmadd.ll diff --git a/test/CodeGen/ARM64/fmax.ll b/test/CodeGen/AArch64/arm64-fmax.ll similarity index 100% rename from test/CodeGen/ARM64/fmax.ll rename to test/CodeGen/AArch64/arm64-fmax.ll diff --git a/test/CodeGen/AArch64/arm64-fminv.ll b/test/CodeGen/AArch64/arm64-fminv.ll new file mode 100644 index 000000000000..f4c97355dd19 --- /dev/null +++ b/test/CodeGen/AArch64/arm64-fminv.ll @@ -0,0 +1,101 @@ +; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s + +define float @test_fminv_v2f32(<2 x float> %in) { +; CHECK: test_fminv_v2f32: +; CHECK: fminp s0, v0.2s + %min = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %in) + ret float %min +} + +define float @test_fminv_v4f32(<4 x float> %in) { +; CHECK: test_fminv_v4f32: +; CHECK: fminv s0, v0.4s + %min = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %in) + ret float %min +} + +define double @test_fminv_v2f64(<2 x double> %in) { +; CHECK: test_fminv_v2f64: +; CHECK: fminp d0, v0.2d + %min = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %in) + ret double %min +} + +declare float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float>) +declare float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float>) +declare double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double>) + +define float @test_fmaxv_v2f32(<2 x float> %in) { +; CHECK: test_fmaxv_v2f32: +; CHECK: fmaxp s0, v0.2s + %max = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %in) + ret float %max +} + +define float @test_fmaxv_v4f32(<4 x float> %in) { +; CHECK: test_fmaxv_v4f32: +; CHECK: fmaxv s0, v0.4s + %max = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %in) + ret float %max +} + +define double @test_fmaxv_v2f64(<2 x double> %in) { +; CHECK: test_fmaxv_v2f64: +; CHECK: fmaxp d0, v0.2d + %max = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %in) + ret double %max +} + +declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>) +declare float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float>) +declare double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double>) + +define float @test_fminnmv_v2f32(<2 x float> %in) { +; CHECK: test_fminnmv_v2f32: +; CHECK: fminnmp s0, v0.2s + %minnm = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %in) + ret float %minnm +} + +define float @test_fminnmv_v4f32(<4 x float> %in) { +; CHECK: test_fminnmv_v4f32: +; CHECK: fminnmv s0, v0.4s + %minnm = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %in) + ret float %minnm +} + +define double @test_fminnmv_v2f64(<2 x double> %in) { +; CHECK: test_fminnmv_v2f64: +; CHECK: fminnmp d0, v0.2d + %minnm = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %in) + ret double %minnm +} + +declare float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float>) +declare float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float>) +declare double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double>) + +define float @test_fmaxnmv_v2f32(<2 x float> %in) { +; CHECK: test_fmaxnmv_v2f32: +; CHECK: fmaxnmp s0, v0.2s + %maxnm = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %in) + ret float %maxnm +} + +define float @test_fmaxnmv_v4f32(<4 x float> %in) { +; CHECK: test_fmaxnmv_v4f32: +; CHECK: fmaxnmv s0, v0.4s + %maxnm = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %in) + ret float %maxnm +} + +define double @test_fmaxnmv_v2f64(<2 x double> %in) { +; CHECK: test_fmaxnmv_v2f64: +; CHECK: fmaxnmp d0, v0.2d + %maxnm = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %in) + ret double %maxnm +} + +declare float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float>) +declare float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float>) +declare double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double>) diff --git a/test/CodeGen/ARM64/fmuladd.ll b/test/CodeGen/AArch64/arm64-fmuladd.ll similarity index 97% rename from test/CodeGen/ARM64/fmuladd.ll rename to test/CodeGen/AArch64/arm64-fmuladd.ll index 174d83076718..6c5eecabd755 100644 --- a/test/CodeGen/ARM64/fmuladd.ll +++ b/test/CodeGen/AArch64/arm64-fmuladd.ll @@ -1,4 +1,4 @@ -; RUN: llc -asm-verbose=false < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc -asm-verbose=false < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define float @test_f32(float* %A, float* %B, float* %C) nounwind { ;CHECK-LABEL: test_f32: diff --git a/test/CodeGen/ARM64/fold-address.ll b/test/CodeGen/AArch64/arm64-fold-address.ll similarity index 100% rename from test/CodeGen/ARM64/fold-address.ll rename to test/CodeGen/AArch64/arm64-fold-address.ll diff --git a/test/CodeGen/ARM64/fold-lsl.ll b/test/CodeGen/AArch64/arm64-fold-lsl.ll similarity index 97% rename from test/CodeGen/ARM64/fold-lsl.ll rename to test/CodeGen/AArch64/arm64-fold-lsl.ll index 2e5762dd2628..ec65e467e37d 100644 --- a/test/CodeGen/ARM64/fold-lsl.ll +++ b/test/CodeGen/AArch64/arm64-fold-lsl.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s ; ; diff --git a/test/CodeGen/ARM64/fp-contract-zero.ll b/test/CodeGen/AArch64/arm64-fp-contract-zero.ll similarity index 100% rename from test/CodeGen/ARM64/fp-contract-zero.ll rename to test/CodeGen/AArch64/arm64-fp-contract-zero.ll diff --git a/test/CodeGen/ARM64/fp-imm.ll b/test/CodeGen/AArch64/arm64-fp-imm.ll similarity index 100% rename from test/CodeGen/ARM64/fp-imm.ll rename to test/CodeGen/AArch64/arm64-fp-imm.ll diff --git a/test/CodeGen/ARM64/fp.ll b/test/CodeGen/AArch64/arm64-fp.ll similarity index 100% rename from test/CodeGen/ARM64/fp.ll rename to test/CodeGen/AArch64/arm64-fp.ll diff --git a/test/CodeGen/ARM64/fp128-folding.ll b/test/CodeGen/AArch64/arm64-fp128-folding.ll similarity index 100% rename from test/CodeGen/ARM64/fp128-folding.ll rename to test/CodeGen/AArch64/arm64-fp128-folding.ll diff --git a/test/CodeGen/ARM64/fp128.ll b/test/CodeGen/AArch64/arm64-fp128.ll similarity index 100% rename from test/CodeGen/ARM64/fp128.ll rename to test/CodeGen/AArch64/arm64-fp128.ll diff --git a/test/CodeGen/ARM64/frame-index.ll b/test/CodeGen/AArch64/arm64-frame-index.ll similarity index 100% rename from test/CodeGen/ARM64/frame-index.ll rename to test/CodeGen/AArch64/arm64-frame-index.ll diff --git a/test/CodeGen/ARM64/frameaddr.ll b/test/CodeGen/AArch64/arm64-frameaddr.ll similarity index 100% rename from test/CodeGen/ARM64/frameaddr.ll rename to test/CodeGen/AArch64/arm64-frameaddr.ll diff --git a/test/CodeGen/ARM64/global-address.ll b/test/CodeGen/AArch64/arm64-global-address.ll similarity index 100% rename from test/CodeGen/ARM64/global-address.ll rename to test/CodeGen/AArch64/arm64-global-address.ll diff --git a/test/CodeGen/ARM64/hello.ll b/test/CodeGen/AArch64/arm64-hello.ll similarity index 100% rename from test/CodeGen/ARM64/hello.ll rename to test/CodeGen/AArch64/arm64-hello.ll diff --git a/test/CodeGen/ARM64/i16-subreg-extract.ll b/test/CodeGen/AArch64/arm64-i16-subreg-extract.ll similarity index 80% rename from test/CodeGen/ARM64/i16-subreg-extract.ll rename to test/CodeGen/AArch64/arm64-i16-subreg-extract.ll index fc2e8b58ac89..ba759e32aae5 100644 --- a/test/CodeGen/ARM64/i16-subreg-extract.ll +++ b/test/CodeGen/AArch64/arm64-i16-subreg-extract.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define i32 @foo(<4 x i16>* %__a) nounwind { ; CHECK-LABEL: foo: diff --git a/test/CodeGen/ARM64/icmp-opt.ll b/test/CodeGen/AArch64/arm64-icmp-opt.ll similarity index 100% rename from test/CodeGen/ARM64/icmp-opt.ll rename to test/CodeGen/AArch64/arm64-icmp-opt.ll diff --git a/test/CodeGen/ARM64/illegal-float-ops.ll b/test/CodeGen/AArch64/arm64-illegal-float-ops.ll similarity index 100% rename from test/CodeGen/ARM64/illegal-float-ops.ll rename to test/CodeGen/AArch64/arm64-illegal-float-ops.ll diff --git a/test/CodeGen/ARM64/indexed-memory.ll b/test/CodeGen/AArch64/arm64-indexed-memory.ll similarity index 99% rename from test/CodeGen/ARM64/indexed-memory.ll rename to test/CodeGen/AArch64/arm64-indexed-memory.ll index e390ed7ece51..e501c6e403bd 100644 --- a/test/CodeGen/ARM64/indexed-memory.ll +++ b/test/CodeGen/AArch64/arm64-indexed-memory.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-redzone | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-redzone | FileCheck %s define void @store64(i64** nocapture %out, i64 %index, i64 %spacing) nounwind noinline ssp { ; CHECK-LABEL: store64: diff --git a/test/CodeGen/ARM64/indexed-vector-ldst-2.ll b/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll similarity index 100% rename from test/CodeGen/ARM64/indexed-vector-ldst-2.ll rename to test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll diff --git a/test/CodeGen/ARM64/indexed-vector-ldst.ll b/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll similarity index 75% rename from test/CodeGen/ARM64/indexed-vector-ldst.ll rename to test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll index 3d8ff53137b6..9ee4063658b2 100644 --- a/test/CodeGen/ARM64/indexed-vector-ldst.ll +++ b/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -615,7 +615,7 @@ define float* @test_v2f32_post_reg_st1_lane(<2 x float> %in, float* %addr) { define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) { ;CHECK-LABEL: test_v16i8_post_imm_ld2: ;CHECK: ld2.16b { v0, v1 }, [x0], #32 - %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2.v16i8.p0i8(i8* %A) + %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i32 32 store i8* %tmp, i8** %ptr ret { <16 x i8>, <16 x i8> } %ld2 @@ -624,19 +624,19 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) { define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2(i8* %A, i8** %ptr, i64 %inc) { ;CHECK-LABEL: test_v16i8_post_reg_ld2: ;CHECK: ld2.16b { v0, v1 }, [x0], x{{[0-9]+}} - %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2.v16i8.p0i8(i8* %A) + %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i64 %inc store i8* %tmp, i8** %ptr ret { <16 x i8>, <16 x i8> } %ld2 } -declare { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2.v16i8.p0i8(i8*) +declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*) define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2(i8* %A, i8** %ptr) { ;CHECK-LABEL: test_v8i8_post_imm_ld2: ;CHECK: ld2.8b { v0, v1 }, [x0], #16 - %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0i8(i8* %A) + %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i32 16 store i8* %tmp, i8** %ptr ret { <8 x i8>, <8 x i8> } %ld2 @@ -645,19 +645,19 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2(i8* %A, i8** %ptr) { define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2(i8* %A, i8** %ptr, i64 %inc) { ;CHECK-LABEL: test_v8i8_post_reg_ld2: ;CHECK: ld2.8b { v0, v1 }, [x0], x{{[0-9]+}} - %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0i8(i8* %A) + %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i64 %inc store i8* %tmp, i8** %ptr ret { <8 x i8>, <8 x i8> } %ld2 } -declare { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0i8(i8*) +declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8*) define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2(i16* %A, i16** %ptr) { ;CHECK-LABEL: test_v8i16_post_imm_ld2: ;CHECK: ld2.8h { v0, v1 }, [x0], #32 - %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2.v8i16.p0i16(i16* %A) + %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i32 16 store i16* %tmp, i16** %ptr ret { <8 x i16>, <8 x i16> } %ld2 @@ -666,19 +666,19 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2(i16* %A, i16** %ptr) { define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2(i16* %A, i16** %ptr, i64 %inc) { ;CHECK-LABEL: test_v8i16_post_reg_ld2: ;CHECK: ld2.8h { v0, v1 }, [x0], x{{[0-9]+}} - %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2.v8i16.p0i16(i16* %A) + %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i64 %inc store i16* %tmp, i16** %ptr ret { <8 x i16>, <8 x i16> } %ld2 } -declare { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2.v8i16.p0i16(i16*) +declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16*) define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2(i16* %A, i16** %ptr) { ;CHECK-LABEL: test_v4i16_post_imm_ld2: ;CHECK: ld2.4h { v0, v1 }, [x0], #16 - %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2.v4i16.p0i16(i16* %A) + %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i32 8 store i16* %tmp, i16** %ptr ret { <4 x i16>, <4 x i16> } %ld2 @@ -687,19 +687,19 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2(i16* %A, i16** %ptr) { define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2(i16* %A, i16** %ptr, i64 %inc) { ;CHECK-LABEL: test_v4i16_post_reg_ld2: ;CHECK: ld2.4h { v0, v1 }, [x0], x{{[0-9]+}} - %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2.v4i16.p0i16(i16* %A) + %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i64 %inc store i16* %tmp, i16** %ptr ret { <4 x i16>, <4 x i16> } %ld2 } -declare { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2.v4i16.p0i16(i16*) +declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16*) define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2(i32* %A, i32** %ptr) { ;CHECK-LABEL: test_v4i32_post_imm_ld2: ;CHECK: ld2.4s { v0, v1 }, [x0], #32 - %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2.v4i32.p0i32(i32* %A) + %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i32 8 store i32* %tmp, i32** %ptr ret { <4 x i32>, <4 x i32> } %ld2 @@ -708,19 +708,19 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2(i32* %A, i32** %ptr) { define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2(i32* %A, i32** %ptr, i64 %inc) { ;CHECK-LABEL: test_v4i32_post_reg_ld2: ;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}} - %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2.v4i32.p0i32(i32* %A) + %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i64 %inc store i32* %tmp, i32** %ptr ret { <4 x i32>, <4 x i32> } %ld2 } -declare { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2.v4i32.p0i32(i32*) +declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32*) define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2(i32* %A, i32** %ptr) { ;CHECK-LABEL: test_v2i32_post_imm_ld2: ;CHECK: ld2.2s { v0, v1 }, [x0], #16 - %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2.v2i32.p0i32(i32* %A) + %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i32 4 store i32* %tmp, i32** %ptr ret { <2 x i32>, <2 x i32> } %ld2 @@ -729,19 +729,19 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2(i32* %A, i32** %ptr) { define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2(i32* %A, i32** %ptr, i64 %inc) { ;CHECK-LABEL: test_v2i32_post_reg_ld2: ;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}} - %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2.v2i32.p0i32(i32* %A) + %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i64 %inc store i32* %tmp, i32** %ptr ret { <2 x i32>, <2 x i32> } %ld2 } -declare { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2.v2i32.p0i32(i32*) +declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32*) define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2(i64* %A, i64** %ptr) { ;CHECK-LABEL: test_v2i64_post_imm_ld2: ;CHECK: ld2.2d { v0, v1 }, [x0], #32 - %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2.v2i64.p0i64(i64* %A) + %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i32 4 store i64* %tmp, i64** %ptr ret { <2 x i64>, <2 x i64> } %ld2 @@ -750,19 +750,19 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2(i64* %A, i64** %ptr) { define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2(i64* %A, i64** %ptr, i64 %inc) { ;CHECK-LABEL: test_v2i64_post_reg_ld2: ;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}} - %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2.v2i64.p0i64(i64* %A) + %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i64 %inc store i64* %tmp, i64** %ptr ret { <2 x i64>, <2 x i64> } %ld2 } -declare { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2.v2i64.p0i64(i64*) +declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64*) define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2(i64* %A, i64** %ptr) { ;CHECK-LABEL: test_v1i64_post_imm_ld2: ;CHECK: ld1.1d { v0, v1 }, [x0], #16 - %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2.v1i64.p0i64(i64* %A) + %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i32 2 store i64* %tmp, i64** %ptr ret { <1 x i64>, <1 x i64> } %ld2 @@ -771,19 +771,19 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2(i64* %A, i64** %ptr) { define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2(i64* %A, i64** %ptr, i64 %inc) { ;CHECK-LABEL: test_v1i64_post_reg_ld2: ;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}} - %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2.v1i64.p0i64(i64* %A) + %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i64 %inc store i64* %tmp, i64** %ptr ret { <1 x i64>, <1 x i64> } %ld2 } -declare { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2.v1i64.p0i64(i64*) +declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64*) define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2(float* %A, float** %ptr) { ;CHECK-LABEL: test_v4f32_post_imm_ld2: ;CHECK: ld2.4s { v0, v1 }, [x0], #32 - %ld2 = tail call { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2.v4f32.p0f32(float* %A) + %ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float* %A) %tmp = getelementptr float* %A, i32 8 store float* %tmp, float** %ptr ret { <4 x float>, <4 x float> } %ld2 @@ -792,19 +792,19 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2(float* %A, float** define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2(float* %A, float** %ptr, i64 %inc) { ;CHECK-LABEL: test_v4f32_post_reg_ld2: ;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}} - %ld2 = tail call { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2.v4f32.p0f32(float* %A) + %ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float* %A) %tmp = getelementptr float* %A, i64 %inc store float* %tmp, float** %ptr ret { <4 x float>, <4 x float> } %ld2 } -declare { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2.v4f32.p0f32(float*) +declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float*) define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2(float* %A, float** %ptr) { ;CHECK-LABEL: test_v2f32_post_imm_ld2: ;CHECK: ld2.2s { v0, v1 }, [x0], #16 - %ld2 = tail call { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2.v2f32.p0f32(float* %A) + %ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %A) %tmp = getelementptr float* %A, i32 4 store float* %tmp, float** %ptr ret { <2 x float>, <2 x float> } %ld2 @@ -813,19 +813,19 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2(float* %A, float** define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2(float* %A, float** %ptr, i64 %inc) { ;CHECK-LABEL: test_v2f32_post_reg_ld2: ;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}} - %ld2 = tail call { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2.v2f32.p0f32(float* %A) + %ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %A) %tmp = getelementptr float* %A, i64 %inc store float* %tmp, float** %ptr ret { <2 x float>, <2 x float> } %ld2 } -declare { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2.v2f32.p0f32(float*) +declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float*) define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2(double* %A, double** %ptr) { ;CHECK-LABEL: test_v2f64_post_imm_ld2: ;CHECK: ld2.2d { v0, v1 }, [x0], #32 - %ld2 = tail call { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2.v2f64.p0f64(double* %A) + %ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double* %A) %tmp = getelementptr double* %A, i32 4 store double* %tmp, double** %ptr ret { <2 x double>, <2 x double> } %ld2 @@ -834,19 +834,19 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2(double* %A, doubl define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2(double* %A, double** %ptr, i64 %inc) { ;CHECK-LABEL: test_v2f64_post_reg_ld2: ;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}} - %ld2 = tail call { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2.v2f64.p0f64(double* %A) + %ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double* %A) %tmp = getelementptr double* %A, i64 %inc store double* %tmp, double** %ptr ret { <2 x double>, <2 x double> } %ld2 } -declare { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2.v2f64.p0f64(double*) +declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double*) define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2(double* %A, double** %ptr) { ;CHECK-LABEL: test_v1f64_post_imm_ld2: ;CHECK: ld1.1d { v0, v1 }, [x0], #16 - %ld2 = tail call { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2.v1f64.p0f64(double* %A) + %ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A) %tmp = getelementptr double* %A, i32 2 store double* %tmp, double** %ptr ret { <1 x double>, <1 x double> } %ld2 @@ -855,19 +855,19 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2(double* %A, doubl define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2(double* %A, double** %ptr, i64 %inc) { ;CHECK-LABEL: test_v1f64_post_reg_ld2: ;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}} - %ld2 = tail call { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2.v1f64.p0f64(double* %A) + %ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A) %tmp = getelementptr double* %A, i64 %inc store double* %tmp, double** %ptr ret { <1 x double>, <1 x double> } %ld2 } -declare { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2.v1f64.p0f64(double*) +declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double*) define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3(i8* %A, i8** %ptr) { ;CHECK-LABEL: test_v16i8_post_imm_ld3: ;CHECK: ld3.16b { v0, v1, v2 }, [x0], #48 - %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3.v16i8.p0i8(i8* %A) + %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i32 48 store i8* %tmp, i8** %ptr ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3 @@ -876,19 +876,19 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3(i8* %A, i8** define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3(i8* %A, i8** %ptr, i64 %inc) { ;CHECK-LABEL: test_v16i8_post_reg_ld3: ;CHECK: ld3.16b { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3.v16i8.p0i8(i8* %A) + %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i64 %inc store i8* %tmp, i8** %ptr ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3 } -declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3.v16i8.p0i8(i8*) +declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8*) define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3(i8* %A, i8** %ptr) { ;CHECK-LABEL: test_v8i8_post_imm_ld3: ;CHECK: ld3.8b { v0, v1, v2 }, [x0], #24 - %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3.v8i8.p0i8(i8* %A) + %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i32 24 store i8* %tmp, i8** %ptr ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3 @@ -897,19 +897,19 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3(i8* %A, i8** %pt define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3(i8* %A, i8** %ptr, i64 %inc) { ;CHECK-LABEL: test_v8i8_post_reg_ld3: ;CHECK: ld3.8b { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3.v8i8.p0i8(i8* %A) + %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i64 %inc store i8* %tmp, i8** %ptr ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3 } -declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3.v8i8.p0i8(i8*) +declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8*) define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3(i16* %A, i16** %ptr) { ;CHECK-LABEL: test_v8i16_post_imm_ld3: ;CHECK: ld3.8h { v0, v1, v2 }, [x0], #48 - %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3.v8i16.p0i16(i16* %A) + %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i32 24 store i16* %tmp, i16** %ptr ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3 @@ -918,19 +918,19 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3(i16* %A, i16 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3(i16* %A, i16** %ptr, i64 %inc) { ;CHECK-LABEL: test_v8i16_post_reg_ld3: ;CHECK: ld3.8h { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3.v8i16.p0i16(i16* %A) + %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i64 %inc store i16* %tmp, i16** %ptr ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3 } -declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3.v8i16.p0i16(i16*) +declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16*) define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3(i16* %A, i16** %ptr) { ;CHECK-LABEL: test_v4i16_post_imm_ld3: ;CHECK: ld3.4h { v0, v1, v2 }, [x0], #24 - %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3.v4i16.p0i16(i16* %A) + %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i32 12 store i16* %tmp, i16** %ptr ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3 @@ -939,19 +939,19 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3(i16* %A, i16 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3(i16* %A, i16** %ptr, i64 %inc) { ;CHECK-LABEL: test_v4i16_post_reg_ld3: ;CHECK: ld3.4h { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3.v4i16.p0i16(i16* %A) + %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i64 %inc store i16* %tmp, i16** %ptr ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3 } -declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3.v4i16.p0i16(i16*) +declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16*) define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3(i32* %A, i32** %ptr) { ;CHECK-LABEL: test_v4i32_post_imm_ld3: ;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48 - %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3.v4i32.p0i32(i32* %A) + %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i32 12 store i32* %tmp, i32** %ptr ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3 @@ -960,19 +960,19 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3(i32* %A, i32 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3(i32* %A, i32** %ptr, i64 %inc) { ;CHECK-LABEL: test_v4i32_post_reg_ld3: ;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3.v4i32.p0i32(i32* %A) + %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i64 %inc store i32* %tmp, i32** %ptr ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3 } -declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3.v4i32.p0i32(i32*) +declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32*) define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3(i32* %A, i32** %ptr) { ;CHECK-LABEL: test_v2i32_post_imm_ld3: ;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24 - %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3.v2i32.p0i32(i32* %A) + %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i32 6 store i32* %tmp, i32** %ptr ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3 @@ -981,19 +981,19 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3(i32* %A, i32 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3(i32* %A, i32** %ptr, i64 %inc) { ;CHECK-LABEL: test_v2i32_post_reg_ld3: ;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3.v2i32.p0i32(i32* %A) + %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i64 %inc store i32* %tmp, i32** %ptr ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3 } -declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3.v2i32.p0i32(i32*) +declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32*) define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3(i64* %A, i64** %ptr) { ;CHECK-LABEL: test_v2i64_post_imm_ld3: ;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48 - %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3.v2i64.p0i64(i64* %A) + %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i32 6 store i64* %tmp, i64** %ptr ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3 @@ -1002,19 +1002,19 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3(i64* %A, i64 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3(i64* %A, i64** %ptr, i64 %inc) { ;CHECK-LABEL: test_v2i64_post_reg_ld3: ;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3.v2i64.p0i64(i64* %A) + %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i64 %inc store i64* %tmp, i64** %ptr ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3 } -declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3.v2i64.p0i64(i64*) +declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64*) define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3(i64* %A, i64** %ptr) { ;CHECK-LABEL: test_v1i64_post_imm_ld3: ;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24 - %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3.v1i64.p0i64(i64* %A) + %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i32 3 store i64* %tmp, i64** %ptr ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3 @@ -1023,19 +1023,19 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3(i64* %A, i64 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3(i64* %A, i64** %ptr, i64 %inc) { ;CHECK-LABEL: test_v1i64_post_reg_ld3: ;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3.v1i64.p0i64(i64* %A) + %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i64 %inc store i64* %tmp, i64** %ptr ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3 } -declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3.v1i64.p0i64(i64*) +declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64*) define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3(float* %A, float** %ptr) { ;CHECK-LABEL: test_v4f32_post_imm_ld3: ;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48 - %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3.v4f32.p0f32(float* %A) + %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %A) %tmp = getelementptr float* %A, i32 12 store float* %tmp, float** %ptr ret { <4 x float>, <4 x float>, <4 x float> } %ld3 @@ -1044,19 +1044,19 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3(float* define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3(float* %A, float** %ptr, i64 %inc) { ;CHECK-LABEL: test_v4f32_post_reg_ld3: ;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3.v4f32.p0f32(float* %A) + %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %A) %tmp = getelementptr float* %A, i64 %inc store float* %tmp, float** %ptr ret { <4 x float>, <4 x float>, <4 x float> } %ld3 } -declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3.v4f32.p0f32(float*) +declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float*) define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3(float* %A, float** %ptr) { ;CHECK-LABEL: test_v2f32_post_imm_ld3: ;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24 - %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3.v2f32.p0f32(float* %A) + %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %A) %tmp = getelementptr float* %A, i32 6 store float* %tmp, float** %ptr ret { <2 x float>, <2 x float>, <2 x float> } %ld3 @@ -1065,19 +1065,19 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3(float* define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3(float* %A, float** %ptr, i64 %inc) { ;CHECK-LABEL: test_v2f32_post_reg_ld3: ;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3.v2f32.p0f32(float* %A) + %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %A) %tmp = getelementptr float* %A, i64 %inc store float* %tmp, float** %ptr ret { <2 x float>, <2 x float>, <2 x float> } %ld3 } -declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3.v2f32.p0f32(float*) +declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float*) define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3(double* %A, double** %ptr) { ;CHECK-LABEL: test_v2f64_post_imm_ld3: ;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48 - %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3.v2f64.p0f64(double* %A) + %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double* %A) %tmp = getelementptr double* %A, i32 6 store double* %tmp, double** %ptr ret { <2 x double>, <2 x double>, <2 x double> } %ld3 @@ -1086,19 +1086,19 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3(dou define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3(double* %A, double** %ptr, i64 %inc) { ;CHECK-LABEL: test_v2f64_post_reg_ld3: ;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3.v2f64.p0f64(double* %A) + %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double* %A) %tmp = getelementptr double* %A, i64 %inc store double* %tmp, double** %ptr ret { <2 x double>, <2 x double>, <2 x double> } %ld3 } -declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3.v2f64.p0f64(double*) +declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double*) define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3(double* %A, double** %ptr) { ;CHECK-LABEL: test_v1f64_post_imm_ld3: ;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24 - %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3.v1f64.p0f64(double* %A) + %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A) %tmp = getelementptr double* %A, i32 3 store double* %tmp, double** %ptr ret { <1 x double>, <1 x double>, <1 x double> } %ld3 @@ -1107,19 +1107,19 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3(dou define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3(double* %A, double** %ptr, i64 %inc) { ;CHECK-LABEL: test_v1f64_post_reg_ld3: ;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3.v1f64.p0f64(double* %A) + %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A) %tmp = getelementptr double* %A, i64 %inc store double* %tmp, double** %ptr ret { <1 x double>, <1 x double>, <1 x double> } %ld3 } -declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3.v1f64.p0f64(double*) +declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double*) define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4(i8* %A, i8** %ptr) { ;CHECK-LABEL: test_v16i8_post_imm_ld4: ;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], #64 - %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0i8(i8* %A) + %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i32 64 store i8* %tmp, i8** %ptr ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4 @@ -1128,19 +1128,19 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4(i define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4(i8* %A, i8** %ptr, i64 %inc) { ;CHECK-LABEL: test_v16i8_post_reg_ld4: ;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0i8(i8* %A) + %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i64 %inc store i8* %tmp, i8** %ptr ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4 } -declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0i8(i8*) +declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8*) define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4(i8* %A, i8** %ptr) { ;CHECK-LABEL: test_v8i8_post_imm_ld4: ;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], #32 - %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4.v8i8.p0i8(i8* %A) + %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i32 32 store i8* %tmp, i8** %ptr ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4 @@ -1149,19 +1149,19 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4(i8* %A define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4(i8* %A, i8** %ptr, i64 %inc) { ;CHECK-LABEL: test_v8i8_post_reg_ld4: ;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4.v8i8.p0i8(i8* %A) + %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i64 %inc store i8* %tmp, i8** %ptr ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4 } -declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4.v8i8.p0i8(i8*) +declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8*) define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4(i16* %A, i16** %ptr) { ;CHECK-LABEL: test_v8i16_post_imm_ld4: ;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], #64 - %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4.v8i16.p0i16(i16* %A) + %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i32 32 store i16* %tmp, i16** %ptr ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4 @@ -1170,19 +1170,19 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4(i define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4(i16* %A, i16** %ptr, i64 %inc) { ;CHECK-LABEL: test_v8i16_post_reg_ld4: ;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4.v8i16.p0i16(i16* %A) + %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i64 %inc store i16* %tmp, i16** %ptr ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4 } -declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4.v8i16.p0i16(i16*) +declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16*) define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4(i16* %A, i16** %ptr) { ;CHECK-LABEL: test_v4i16_post_imm_ld4: ;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], #32 - %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4.v4i16.p0i16(i16* %A) + %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i32 16 store i16* %tmp, i16** %ptr ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4 @@ -1191,19 +1191,19 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4(i define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4(i16* %A, i16** %ptr, i64 %inc) { ;CHECK-LABEL: test_v4i16_post_reg_ld4: ;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4.v4i16.p0i16(i16* %A) + %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i64 %inc store i16* %tmp, i16** %ptr ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4 } -declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4.v4i16.p0i16(i16*) +declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16*) define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4(i32* %A, i32** %ptr) { ;CHECK-LABEL: test_v4i32_post_imm_ld4: ;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64 - %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4.v4i32.p0i32(i32* %A) + %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i32 16 store i32* %tmp, i32** %ptr ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4 @@ -1212,19 +1212,19 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4(i define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4(i32* %A, i32** %ptr, i64 %inc) { ;CHECK-LABEL: test_v4i32_post_reg_ld4: ;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4.v4i32.p0i32(i32* %A) + %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i64 %inc store i32* %tmp, i32** %ptr ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4 } -declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4.v4i32.p0i32(i32*) +declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32*) define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4(i32* %A, i32** %ptr) { ;CHECK-LABEL: test_v2i32_post_imm_ld4: ;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32 - %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4.v2i32.p0i32(i32* %A) + %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i32 8 store i32* %tmp, i32** %ptr ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4 @@ -1233,19 +1233,19 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4(i define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4(i32* %A, i32** %ptr, i64 %inc) { ;CHECK-LABEL: test_v2i32_post_reg_ld4: ;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4.v2i32.p0i32(i32* %A) + %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i64 %inc store i32* %tmp, i32** %ptr ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4 } -declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4.v2i32.p0i32(i32*) +declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32*) define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4(i64* %A, i64** %ptr) { ;CHECK-LABEL: test_v2i64_post_imm_ld4: ;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64 - %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4.v2i64.p0i64(i64* %A) + %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i32 8 store i64* %tmp, i64** %ptr ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4 @@ -1254,19 +1254,19 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4(i define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4(i64* %A, i64** %ptr, i64 %inc) { ;CHECK-LABEL: test_v2i64_post_reg_ld4: ;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4.v2i64.p0i64(i64* %A) + %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i64 %inc store i64* %tmp, i64** %ptr ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4 } -declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4.v2i64.p0i64(i64*) +declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64*) define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4(i64* %A, i64** %ptr) { ;CHECK-LABEL: test_v1i64_post_imm_ld4: ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32 - %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4.v1i64.p0i64(i64* %A) + %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i32 4 store i64* %tmp, i64** %ptr ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4 @@ -1275,19 +1275,19 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4(i define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4(i64* %A, i64** %ptr, i64 %inc) { ;CHECK-LABEL: test_v1i64_post_reg_ld4: ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4.v1i64.p0i64(i64* %A) + %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i64 %inc store i64* %tmp, i64** %ptr ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4 } -declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4.v1i64.p0i64(i64*) +declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64*) define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4(float* %A, float** %ptr) { ;CHECK-LABEL: test_v4f32_post_imm_ld4: ;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64 - %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4.v4f32.p0f32(float* %A) + %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float* %A) %tmp = getelementptr float* %A, i32 16 store float* %tmp, float** %ptr ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4 @@ -1296,19 +1296,19 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4(float* %A, float** %ptr, i64 %inc) { ;CHECK-LABEL: test_v4f32_post_reg_ld4: ;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4.v4f32.p0f32(float* %A) + %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float* %A) %tmp = getelementptr float* %A, i64 %inc store float* %tmp, float** %ptr ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4 } -declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4.v4f32.p0f32(float*) +declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float*) define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4(float* %A, float** %ptr) { ;CHECK-LABEL: test_v2f32_post_imm_ld4: ;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32 - %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4.v2f32.p0f32(float* %A) + %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %A) %tmp = getelementptr float* %A, i32 8 store float* %tmp, float** %ptr ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4 @@ -1317,19 +1317,19 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4(float* %A, float** %ptr, i64 %inc) { ;CHECK-LABEL: test_v2f32_post_reg_ld4: ;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4.v2f32.p0f32(float* %A) + %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %A) %tmp = getelementptr float* %A, i64 %inc store float* %tmp, float** %ptr ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4 } -declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4.v2f32.p0f32(float*) +declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float*) define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4(double* %A, double** %ptr) { ;CHECK-LABEL: test_v2f64_post_imm_ld4: ;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64 - %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4.v2f64.p0f64(double* %A) + %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double* %A) %tmp = getelementptr double* %A, i32 8 store double* %tmp, double** %ptr ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4 @@ -1338,19 +1338,19 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4(double* %A, double** %ptr, i64 %inc) { ;CHECK-LABEL: test_v2f64_post_reg_ld4: ;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4.v2f64.p0f64(double* %A) + %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double* %A) %tmp = getelementptr double* %A, i64 %inc store double* %tmp, double** %ptr ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4 } -declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4.v2f64.p0f64(double*) +declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double*) define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4(double* %A, double** %ptr) { ;CHECK-LABEL: test_v1f64_post_imm_ld4: ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32 - %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4.v1f64.p0f64(double* %A) + %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A) %tmp = getelementptr double* %A, i32 4 store double* %tmp, double** %ptr ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4 @@ -1359,18 +1359,18 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4(double* %A, double** %ptr, i64 %inc) { ;CHECK-LABEL: test_v1f64_post_reg_ld4: ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4.v1f64.p0f64(double* %A) + %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A) %tmp = getelementptr double* %A, i64 %inc store double* %tmp, double** %ptr ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4 } -declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4.v1f64.p0f64(double*) +declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double*) define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x2(i8* %A, i8** %ptr) { ;CHECK-LABEL: test_v16i8_post_imm_ld1x2: ;CHECK: ld1.16b { v0, v1 }, [x0], #32 - %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x2.v16i8.p0i8(i8* %A) + %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i32 32 store i8* %tmp, i8** %ptr ret { <16 x i8>, <16 x i8> } %ld1x2 @@ -1379,19 +1379,19 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x2(i8* %A, i8** %ptr) { define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x2(i8* %A, i8** %ptr, i64 %inc) { ;CHECK-LABEL: test_v16i8_post_reg_ld1x2: ;CHECK: ld1.16b { v0, v1 }, [x0], x{{[0-9]+}} - %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x2.v16i8.p0i8(i8* %A) + %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i64 %inc store i8* %tmp, i8** %ptr ret { <16 x i8>, <16 x i8> } %ld1x2 } -declare { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x2.v16i8.p0i8(i8*) +declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8*) define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x2(i8* %A, i8** %ptr) { ;CHECK-LABEL: test_v8i8_post_imm_ld1x2: ;CHECK: ld1.8b { v0, v1 }, [x0], #16 - %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x2.v8i8.p0i8(i8* %A) + %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i32 16 store i8* %tmp, i8** %ptr ret { <8 x i8>, <8 x i8> } %ld1x2 @@ -1400,19 +1400,19 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x2(i8* %A, i8** %ptr) { define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x2(i8* %A, i8** %ptr, i64 %inc) { ;CHECK-LABEL: test_v8i8_post_reg_ld1x2: ;CHECK: ld1.8b { v0, v1 }, [x0], x{{[0-9]+}} - %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x2.v8i8.p0i8(i8* %A) + %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i64 %inc store i8* %tmp, i8** %ptr ret { <8 x i8>, <8 x i8> } %ld1x2 } -declare { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x2.v8i8.p0i8(i8*) +declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8*) define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x2(i16* %A, i16** %ptr) { ;CHECK-LABEL: test_v8i16_post_imm_ld1x2: ;CHECK: ld1.8h { v0, v1 }, [x0], #32 - %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x2.v8i16.p0i16(i16* %A) + %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i32 16 store i16* %tmp, i16** %ptr ret { <8 x i16>, <8 x i16> } %ld1x2 @@ -1421,19 +1421,19 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x2(i16* %A, i16** %ptr) define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x2(i16* %A, i16** %ptr, i64 %inc) { ;CHECK-LABEL: test_v8i16_post_reg_ld1x2: ;CHECK: ld1.8h { v0, v1 }, [x0], x{{[0-9]+}} - %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x2.v8i16.p0i16(i16* %A) + %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i64 %inc store i16* %tmp, i16** %ptr ret { <8 x i16>, <8 x i16> } %ld1x2 } -declare { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x2.v8i16.p0i16(i16*) +declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16*) define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x2(i16* %A, i16** %ptr) { ;CHECK-LABEL: test_v4i16_post_imm_ld1x2: ;CHECK: ld1.4h { v0, v1 }, [x0], #16 - %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x2.v4i16.p0i16(i16* %A) + %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i32 8 store i16* %tmp, i16** %ptr ret { <4 x i16>, <4 x i16> } %ld1x2 @@ -1442,19 +1442,19 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x2(i16* %A, i16** %ptr) define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x2(i16* %A, i16** %ptr, i64 %inc) { ;CHECK-LABEL: test_v4i16_post_reg_ld1x2: ;CHECK: ld1.4h { v0, v1 }, [x0], x{{[0-9]+}} - %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x2.v4i16.p0i16(i16* %A) + %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i64 %inc store i16* %tmp, i16** %ptr ret { <4 x i16>, <4 x i16> } %ld1x2 } -declare { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x2.v4i16.p0i16(i16*) +declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16*) define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x2(i32* %A, i32** %ptr) { ;CHECK-LABEL: test_v4i32_post_imm_ld1x2: ;CHECK: ld1.4s { v0, v1 }, [x0], #32 - %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x2.v4i32.p0i32(i32* %A) + %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i32 8 store i32* %tmp, i32** %ptr ret { <4 x i32>, <4 x i32> } %ld1x2 @@ -1463,19 +1463,19 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x2(i32* %A, i32** %ptr) define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x2(i32* %A, i32** %ptr, i64 %inc) { ;CHECK-LABEL: test_v4i32_post_reg_ld1x2: ;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}} - %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x2.v4i32.p0i32(i32* %A) + %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i64 %inc store i32* %tmp, i32** %ptr ret { <4 x i32>, <4 x i32> } %ld1x2 } -declare { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x2.v4i32.p0i32(i32*) +declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32*) define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x2(i32* %A, i32** %ptr) { ;CHECK-LABEL: test_v2i32_post_imm_ld1x2: ;CHECK: ld1.2s { v0, v1 }, [x0], #16 - %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x2.v2i32.p0i32(i32* %A) + %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i32 4 store i32* %tmp, i32** %ptr ret { <2 x i32>, <2 x i32> } %ld1x2 @@ -1484,19 +1484,19 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x2(i32* %A, i32** %ptr) define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x2(i32* %A, i32** %ptr, i64 %inc) { ;CHECK-LABEL: test_v2i32_post_reg_ld1x2: ;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}} - %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x2.v2i32.p0i32(i32* %A) + %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i64 %inc store i32* %tmp, i32** %ptr ret { <2 x i32>, <2 x i32> } %ld1x2 } -declare { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x2.v2i32.p0i32(i32*) +declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32*) define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x2(i64* %A, i64** %ptr) { ;CHECK-LABEL: test_v2i64_post_imm_ld1x2: ;CHECK: ld1.2d { v0, v1 }, [x0], #32 - %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x2.v2i64.p0i64(i64* %A) + %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i32 4 store i64* %tmp, i64** %ptr ret { <2 x i64>, <2 x i64> } %ld1x2 @@ -1505,19 +1505,19 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x2(i64* %A, i64** %ptr) define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x2(i64* %A, i64** %ptr, i64 %inc) { ;CHECK-LABEL: test_v2i64_post_reg_ld1x2: ;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}} - %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x2.v2i64.p0i64(i64* %A) + %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i64 %inc store i64* %tmp, i64** %ptr ret { <2 x i64>, <2 x i64> } %ld1x2 } -declare { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x2.v2i64.p0i64(i64*) +declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64*) define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x2(i64* %A, i64** %ptr) { ;CHECK-LABEL: test_v1i64_post_imm_ld1x2: ;CHECK: ld1.1d { v0, v1 }, [x0], #16 - %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x2.v1i64.p0i64(i64* %A) + %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i32 2 store i64* %tmp, i64** %ptr ret { <1 x i64>, <1 x i64> } %ld1x2 @@ -1526,19 +1526,19 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x2(i64* %A, i64** %ptr) define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x2(i64* %A, i64** %ptr, i64 %inc) { ;CHECK-LABEL: test_v1i64_post_reg_ld1x2: ;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}} - %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x2.v1i64.p0i64(i64* %A) + %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i64 %inc store i64* %tmp, i64** %ptr ret { <1 x i64>, <1 x i64> } %ld1x2 } -declare { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x2.v1i64.p0i64(i64*) +declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64*) define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x2(float* %A, float** %ptr) { ;CHECK-LABEL: test_v4f32_post_imm_ld1x2: ;CHECK: ld1.4s { v0, v1 }, [x0], #32 - %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x2.v4f32.p0f32(float* %A) + %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %A) %tmp = getelementptr float* %A, i32 8 store float* %tmp, float** %ptr ret { <4 x float>, <4 x float> } %ld1x2 @@ -1547,19 +1547,19 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x2(float* %A, float* define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x2(float* %A, float** %ptr, i64 %inc) { ;CHECK-LABEL: test_v4f32_post_reg_ld1x2: ;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}} - %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x2.v4f32.p0f32(float* %A) + %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %A) %tmp = getelementptr float* %A, i64 %inc store float* %tmp, float** %ptr ret { <4 x float>, <4 x float> } %ld1x2 } -declare { <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x2.v4f32.p0f32(float*) +declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float*) define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x2(float* %A, float** %ptr) { ;CHECK-LABEL: test_v2f32_post_imm_ld1x2: ;CHECK: ld1.2s { v0, v1 }, [x0], #16 - %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x2.v2f32.p0f32(float* %A) + %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %A) %tmp = getelementptr float* %A, i32 4 store float* %tmp, float** %ptr ret { <2 x float>, <2 x float> } %ld1x2 @@ -1568,19 +1568,19 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x2(float* %A, float* define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x2(float* %A, float** %ptr, i64 %inc) { ;CHECK-LABEL: test_v2f32_post_reg_ld1x2: ;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}} - %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x2.v2f32.p0f32(float* %A) + %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %A) %tmp = getelementptr float* %A, i64 %inc store float* %tmp, float** %ptr ret { <2 x float>, <2 x float> } %ld1x2 } -declare { <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x2.v2f32.p0f32(float*) +declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float*) define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x2(double* %A, double** %ptr) { ;CHECK-LABEL: test_v2f64_post_imm_ld1x2: ;CHECK: ld1.2d { v0, v1 }, [x0], #32 - %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x2.v2f64.p0f64(double* %A) + %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %A) %tmp = getelementptr double* %A, i32 4 store double* %tmp, double** %ptr ret { <2 x double>, <2 x double> } %ld1x2 @@ -1589,19 +1589,19 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x2(double* %A, dou define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x2(double* %A, double** %ptr, i64 %inc) { ;CHECK-LABEL: test_v2f64_post_reg_ld1x2: ;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}} - %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x2.v2f64.p0f64(double* %A) + %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %A) %tmp = getelementptr double* %A, i64 %inc store double* %tmp, double** %ptr ret { <2 x double>, <2 x double> } %ld1x2 } -declare { <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x2.v2f64.p0f64(double*) +declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double*) define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x2(double* %A, double** %ptr) { ;CHECK-LABEL: test_v1f64_post_imm_ld1x2: ;CHECK: ld1.1d { v0, v1 }, [x0], #16 - %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x2.v1f64.p0f64(double* %A) + %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %A) %tmp = getelementptr double* %A, i32 2 store double* %tmp, double** %ptr ret { <1 x double>, <1 x double> } %ld1x2 @@ -1610,19 +1610,19 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x2(double* %A, dou define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x2(double* %A, double** %ptr, i64 %inc) { ;CHECK-LABEL: test_v1f64_post_reg_ld1x2: ;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}} - %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x2.v1f64.p0f64(double* %A) + %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %A) %tmp = getelementptr double* %A, i64 %inc store double* %tmp, double** %ptr ret { <1 x double>, <1 x double> } %ld1x2 } -declare { <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x2.v1f64.p0f64(double*) +declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double*) define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x3(i8* %A, i8** %ptr) { ;CHECK-LABEL: test_v16i8_post_imm_ld1x3: ;CHECK: ld1.16b { v0, v1, v2 }, [x0], #48 - %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x3.v16i8.p0i8(i8* %A) + %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i32 48 store i8* %tmp, i8** %ptr ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3 @@ -1631,19 +1631,19 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x3(i8* %A, i8 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x3(i8* %A, i8** %ptr, i64 %inc) { ;CHECK-LABEL: test_v16i8_post_reg_ld1x3: ;CHECK: ld1.16b { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x3.v16i8.p0i8(i8* %A) + %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i64 %inc store i8* %tmp, i8** %ptr ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3 } -declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x3.v16i8.p0i8(i8*) +declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8*) define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x3(i8* %A, i8** %ptr) { ;CHECK-LABEL: test_v8i8_post_imm_ld1x3: ;CHECK: ld1.8b { v0, v1, v2 }, [x0], #24 - %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x3.v8i8.p0i8(i8* %A) + %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i32 24 store i8* %tmp, i8** %ptr ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3 @@ -1652,19 +1652,19 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x3(i8* %A, i8** % define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x3(i8* %A, i8** %ptr, i64 %inc) { ;CHECK-LABEL: test_v8i8_post_reg_ld1x3: ;CHECK: ld1.8b { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x3.v8i8.p0i8(i8* %A) + %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i64 %inc store i8* %tmp, i8** %ptr ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3 } -declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x3.v8i8.p0i8(i8*) +declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8*) define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x3(i16* %A, i16** %ptr) { ;CHECK-LABEL: test_v8i16_post_imm_ld1x3: ;CHECK: ld1.8h { v0, v1, v2 }, [x0], #48 - %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x3.v8i16.p0i16(i16* %A) + %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i32 24 store i16* %tmp, i16** %ptr ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3 @@ -1673,19 +1673,19 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x3(i16* %A, i define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x3(i16* %A, i16** %ptr, i64 %inc) { ;CHECK-LABEL: test_v8i16_post_reg_ld1x3: ;CHECK: ld1.8h { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x3.v8i16.p0i16(i16* %A) + %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i64 %inc store i16* %tmp, i16** %ptr ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3 } -declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x3.v8i16.p0i16(i16*) +declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16*) define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x3(i16* %A, i16** %ptr) { ;CHECK-LABEL: test_v4i16_post_imm_ld1x3: ;CHECK: ld1.4h { v0, v1, v2 }, [x0], #24 - %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x3.v4i16.p0i16(i16* %A) + %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i32 12 store i16* %tmp, i16** %ptr ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3 @@ -1694,19 +1694,19 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x3(i16* %A, i define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x3(i16* %A, i16** %ptr, i64 %inc) { ;CHECK-LABEL: test_v4i16_post_reg_ld1x3: ;CHECK: ld1.4h { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x3.v4i16.p0i16(i16* %A) + %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i64 %inc store i16* %tmp, i16** %ptr ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3 } -declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x3.v4i16.p0i16(i16*) +declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16*) define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x3(i32* %A, i32** %ptr) { ;CHECK-LABEL: test_v4i32_post_imm_ld1x3: ;CHECK: ld1.4s { v0, v1, v2 }, [x0], #48 - %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x3.v4i32.p0i32(i32* %A) + %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i32 12 store i32* %tmp, i32** %ptr ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3 @@ -1715,19 +1715,19 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x3(i32* %A, i define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x3(i32* %A, i32** %ptr, i64 %inc) { ;CHECK-LABEL: test_v4i32_post_reg_ld1x3: ;CHECK: ld1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x3.v4i32.p0i32(i32* %A) + %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i64 %inc store i32* %tmp, i32** %ptr ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3 } -declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x3.v4i32.p0i32(i32*) +declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32*) define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x3(i32* %A, i32** %ptr) { ;CHECK-LABEL: test_v2i32_post_imm_ld1x3: ;CHECK: ld1.2s { v0, v1, v2 }, [x0], #24 - %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x3.v2i32.p0i32(i32* %A) + %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i32 6 store i32* %tmp, i32** %ptr ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld1x3 @@ -1736,19 +1736,19 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x3(i32* %A, i define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x3(i32* %A, i32** %ptr, i64 %inc) { ;CHECK-LABEL: test_v2i32_post_reg_ld1x3: ;CHECK: ld1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x3.v2i32.p0i32(i32* %A) + %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i64 %inc store i32* %tmp, i32** %ptr ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld1x3 } -declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x3.v2i32.p0i32(i32*) +declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32*) define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x3(i64* %A, i64** %ptr) { ;CHECK-LABEL: test_v2i64_post_imm_ld1x3: ;CHECK: ld1.2d { v0, v1, v2 }, [x0], #48 - %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x3.v2i64.p0i64(i64* %A) + %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i32 6 store i64* %tmp, i64** %ptr ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld1x3 @@ -1757,19 +1757,19 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x3(i64* %A, i define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x3(i64* %A, i64** %ptr, i64 %inc) { ;CHECK-LABEL: test_v2i64_post_reg_ld1x3: ;CHECK: ld1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x3.v2i64.p0i64(i64* %A) + %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i64 %inc store i64* %tmp, i64** %ptr ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld1x3 } -declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x3.v2i64.p0i64(i64*) +declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64*) define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x3(i64* %A, i64** %ptr) { ;CHECK-LABEL: test_v1i64_post_imm_ld1x3: ;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24 - %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x3.v1i64.p0i64(i64* %A) + %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i32 3 store i64* %tmp, i64** %ptr ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld1x3 @@ -1778,19 +1778,19 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x3(i64* %A, i define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x3(i64* %A, i64** %ptr, i64 %inc) { ;CHECK-LABEL: test_v1i64_post_reg_ld1x3: ;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x3.v1i64.p0i64(i64* %A) + %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i64 %inc store i64* %tmp, i64** %ptr ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld1x3 } -declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x3.v1i64.p0i64(i64*) +declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64*) define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x3(float* %A, float** %ptr) { ;CHECK-LABEL: test_v4f32_post_imm_ld1x3: ;CHECK: ld1.4s { v0, v1, v2 }, [x0], #48 - %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x3.v4f32.p0f32(float* %A) + %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %A) %tmp = getelementptr float* %A, i32 12 store float* %tmp, float** %ptr ret { <4 x float>, <4 x float>, <4 x float> } %ld1x3 @@ -1799,19 +1799,19 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x3(floa define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x3(float* %A, float** %ptr, i64 %inc) { ;CHECK-LABEL: test_v4f32_post_reg_ld1x3: ;CHECK: ld1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x3.v4f32.p0f32(float* %A) + %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %A) %tmp = getelementptr float* %A, i64 %inc store float* %tmp, float** %ptr ret { <4 x float>, <4 x float>, <4 x float> } %ld1x3 } -declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x3.v4f32.p0f32(float*) +declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float*) define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x3(float* %A, float** %ptr) { ;CHECK-LABEL: test_v2f32_post_imm_ld1x3: ;CHECK: ld1.2s { v0, v1, v2 }, [x0], #24 - %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x3.v2f32.p0f32(float* %A) + %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %A) %tmp = getelementptr float* %A, i32 6 store float* %tmp, float** %ptr ret { <2 x float>, <2 x float>, <2 x float> } %ld1x3 @@ -1820,19 +1820,19 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x3(floa define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x3(float* %A, float** %ptr, i64 %inc) { ;CHECK-LABEL: test_v2f32_post_reg_ld1x3: ;CHECK: ld1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x3.v2f32.p0f32(float* %A) + %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %A) %tmp = getelementptr float* %A, i64 %inc store float* %tmp, float** %ptr ret { <2 x float>, <2 x float>, <2 x float> } %ld1x3 } -declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x3.v2f32.p0f32(float*) +declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float*) define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x3(double* %A, double** %ptr) { ;CHECK-LABEL: test_v2f64_post_imm_ld1x3: ;CHECK: ld1.2d { v0, v1, v2 }, [x0], #48 - %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x3.v2f64.p0f64(double* %A) + %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %A) %tmp = getelementptr double* %A, i32 6 store double* %tmp, double** %ptr ret { <2 x double>, <2 x double>, <2 x double> } %ld1x3 @@ -1841,19 +1841,19 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x3(d define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x3(double* %A, double** %ptr, i64 %inc) { ;CHECK-LABEL: test_v2f64_post_reg_ld1x3: ;CHECK: ld1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x3.v2f64.p0f64(double* %A) + %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %A) %tmp = getelementptr double* %A, i64 %inc store double* %tmp, double** %ptr ret { <2 x double>, <2 x double>, <2 x double> } %ld1x3 } -declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x3.v2f64.p0f64(double*) +declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double*) define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x3(double* %A, double** %ptr) { ;CHECK-LABEL: test_v1f64_post_imm_ld1x3: ;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24 - %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x3.v1f64.p0f64(double* %A) + %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %A) %tmp = getelementptr double* %A, i32 3 store double* %tmp, double** %ptr ret { <1 x double>, <1 x double>, <1 x double> } %ld1x3 @@ -1862,19 +1862,19 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x3(d define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x3(double* %A, double** %ptr, i64 %inc) { ;CHECK-LABEL: test_v1f64_post_reg_ld1x3: ;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x3.v1f64.p0f64(double* %A) + %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %A) %tmp = getelementptr double* %A, i64 %inc store double* %tmp, double** %ptr ret { <1 x double>, <1 x double>, <1 x double> } %ld1x3 } -declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x3.v1f64.p0f64(double*) +declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double*) define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x4(i8* %A, i8** %ptr) { ;CHECK-LABEL: test_v16i8_post_imm_ld1x4: ;CHECK: ld1.16b { v0, v1, v2, v3 }, [x0], #64 - %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x4.v16i8.p0i8(i8* %A) + %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i32 64 store i8* %tmp, i8** %ptr ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld1x4 @@ -1883,19 +1883,19 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x4 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x4(i8* %A, i8** %ptr, i64 %inc) { ;CHECK-LABEL: test_v16i8_post_reg_ld1x4: ;CHECK: ld1.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x4.v16i8.p0i8(i8* %A) + %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i64 %inc store i8* %tmp, i8** %ptr ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld1x4 } -declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x4.v16i8.p0i8(i8*) +declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8*) define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x4(i8* %A, i8** %ptr) { ;CHECK-LABEL: test_v8i8_post_imm_ld1x4: ;CHECK: ld1.8b { v0, v1, v2, v3 }, [x0], #32 - %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x4.v8i8.p0i8(i8* %A) + %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i32 32 store i8* %tmp, i8** %ptr ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld1x4 @@ -1904,19 +1904,19 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x4(i8* define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x4(i8* %A, i8** %ptr, i64 %inc) { ;CHECK-LABEL: test_v8i8_post_reg_ld1x4: ;CHECK: ld1.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x4.v8i8.p0i8(i8* %A) + %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i64 %inc store i8* %tmp, i8** %ptr ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld1x4 } -declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x4.v8i8.p0i8(i8*) +declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8*) define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x4(i16* %A, i16** %ptr) { ;CHECK-LABEL: test_v8i16_post_imm_ld1x4: ;CHECK: ld1.8h { v0, v1, v2, v3 }, [x0], #64 - %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x4.v8i16.p0i16(i16* %A) + %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i32 32 store i16* %tmp, i16** %ptr ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld1x4 @@ -1925,19 +1925,19 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x4 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x4(i16* %A, i16** %ptr, i64 %inc) { ;CHECK-LABEL: test_v8i16_post_reg_ld1x4: ;CHECK: ld1.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x4.v8i16.p0i16(i16* %A) + %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i64 %inc store i16* %tmp, i16** %ptr ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld1x4 } -declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x4.v8i16.p0i16(i16*) +declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16*) define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x4(i16* %A, i16** %ptr) { ;CHECK-LABEL: test_v4i16_post_imm_ld1x4: ;CHECK: ld1.4h { v0, v1, v2, v3 }, [x0], #32 - %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x4.v4i16.p0i16(i16* %A) + %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i32 16 store i16* %tmp, i16** %ptr ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld1x4 @@ -1946,19 +1946,19 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x4 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x4(i16* %A, i16** %ptr, i64 %inc) { ;CHECK-LABEL: test_v4i16_post_reg_ld1x4: ;CHECK: ld1.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x4.v4i16.p0i16(i16* %A) + %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i64 %inc store i16* %tmp, i16** %ptr ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld1x4 } -declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x4.v4i16.p0i16(i16*) +declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16*) define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x4(i32* %A, i32** %ptr) { ;CHECK-LABEL: test_v4i32_post_imm_ld1x4: ;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], #64 - %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x4.v4i32.p0i32(i32* %A) + %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i32 16 store i32* %tmp, i32** %ptr ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld1x4 @@ -1967,19 +1967,19 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x4 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x4(i32* %A, i32** %ptr, i64 %inc) { ;CHECK-LABEL: test_v4i32_post_reg_ld1x4: ;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x4.v4i32.p0i32(i32* %A) + %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i64 %inc store i32* %tmp, i32** %ptr ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld1x4 } -declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x4.v4i32.p0i32(i32*) +declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32*) define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x4(i32* %A, i32** %ptr) { ;CHECK-LABEL: test_v2i32_post_imm_ld1x4: ;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], #32 - %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x4.v2i32.p0i32(i32* %A) + %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i32 8 store i32* %tmp, i32** %ptr ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld1x4 @@ -1988,19 +1988,19 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x4 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x4(i32* %A, i32** %ptr, i64 %inc) { ;CHECK-LABEL: test_v2i32_post_reg_ld1x4: ;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x4.v2i32.p0i32(i32* %A) + %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i64 %inc store i32* %tmp, i32** %ptr ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld1x4 } -declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x4.v2i32.p0i32(i32*) +declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32*) define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x4(i64* %A, i64** %ptr) { ;CHECK-LABEL: test_v2i64_post_imm_ld1x4: ;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], #64 - %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x4.v2i64.p0i64(i64* %A) + %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i32 8 store i64* %tmp, i64** %ptr ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld1x4 @@ -2009,19 +2009,19 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x4 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x4(i64* %A, i64** %ptr, i64 %inc) { ;CHECK-LABEL: test_v2i64_post_reg_ld1x4: ;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x4.v2i64.p0i64(i64* %A) + %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i64 %inc store i64* %tmp, i64** %ptr ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld1x4 } -declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x4.v2i64.p0i64(i64*) +declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64*) define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x4(i64* %A, i64** %ptr) { ;CHECK-LABEL: test_v1i64_post_imm_ld1x4: ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32 - %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x4.v1i64.p0i64(i64* %A) + %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i32 4 store i64* %tmp, i64** %ptr ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld1x4 @@ -2030,19 +2030,19 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x4 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x4(i64* %A, i64** %ptr, i64 %inc) { ;CHECK-LABEL: test_v1i64_post_reg_ld1x4: ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x4.v1i64.p0i64(i64* %A) + %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i64 %inc store i64* %tmp, i64** %ptr ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld1x4 } -declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x4.v1i64.p0i64(i64*) +declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64*) define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x4(float* %A, float** %ptr) { ;CHECK-LABEL: test_v4f32_post_imm_ld1x4: ;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], #64 - %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x4.v4f32.p0f32(float* %A) + %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %A) %tmp = getelementptr float* %A, i32 16 store float* %tmp, float** %ptr ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld1x4 @@ -2051,19 +2051,19 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x4(float* %A, float** %ptr, i64 %inc) { ;CHECK-LABEL: test_v4f32_post_reg_ld1x4: ;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x4.v4f32.p0f32(float* %A) + %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %A) %tmp = getelementptr float* %A, i64 %inc store float* %tmp, float** %ptr ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld1x4 } -declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x4.v4f32.p0f32(float*) +declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float*) define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x4(float* %A, float** %ptr) { ;CHECK-LABEL: test_v2f32_post_imm_ld1x4: ;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], #32 - %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x4.v2f32.p0f32(float* %A) + %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %A) %tmp = getelementptr float* %A, i32 8 store float* %tmp, float** %ptr ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld1x4 @@ -2072,19 +2072,19 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x4(float* %A, float** %ptr, i64 %inc) { ;CHECK-LABEL: test_v2f32_post_reg_ld1x4: ;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x4.v2f32.p0f32(float* %A) + %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %A) %tmp = getelementptr float* %A, i64 %inc store float* %tmp, float** %ptr ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld1x4 } -declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x4.v2f32.p0f32(float*) +declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float*) define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x4(double* %A, double** %ptr) { ;CHECK-LABEL: test_v2f64_post_imm_ld1x4: ;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], #64 - %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x4.v2f64.p0f64(double* %A) + %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %A) %tmp = getelementptr double* %A, i32 8 store double* %tmp, double** %ptr ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld1x4 @@ -2093,19 +2093,19 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x4(double* %A, double** %ptr, i64 %inc) { ;CHECK-LABEL: test_v2f64_post_reg_ld1x4: ;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x4.v2f64.p0f64(double* %A) + %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %A) %tmp = getelementptr double* %A, i64 %inc store double* %tmp, double** %ptr ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld1x4 } -declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x4.v2f64.p0f64(double*) +declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double*) define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x4(double* %A, double** %ptr) { ;CHECK-LABEL: test_v1f64_post_imm_ld1x4: ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32 - %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x4.v1f64.p0f64(double* %A) + %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %A) %tmp = getelementptr double* %A, i32 4 store double* %tmp, double** %ptr ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld1x4 @@ -2114,19 +2114,19 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x4(double* %A, double** %ptr, i64 %inc) { ;CHECK-LABEL: test_v1f64_post_reg_ld1x4: ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x4.v1f64.p0f64(double* %A) + %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %A) %tmp = getelementptr double* %A, i64 %inc store double* %tmp, double** %ptr ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld1x4 } -declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x4.v1f64.p0f64(double*) +declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double*) define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwind { ;CHECK-LABEL: test_v16i8_post_imm_ld2r: ;CHECK: ld2r.16b { v0, v1 }, [x0], #2 - %ld2 = call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2r.v16i8.p0i8(i8* %A) + %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i32 2 store i8* %tmp, i8** %ptr ret { <16 x i8>, <16 x i8> } %ld2 @@ -2135,19 +2135,19 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2r(i8* %A, i8** %ptr) nou define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2r(i8* %A, i8** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v16i8_post_reg_ld2r: ;CHECK: ld2r.16b { v0, v1 }, [x0], x{{[0-9]+}} - %ld2 = call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2r.v16i8.p0i8(i8* %A) + %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i64 %inc store i8* %tmp, i8** %ptr ret { <16 x i8>, <16 x i8> } %ld2 } -declare { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2r.v16i8.p0i8(i8*) nounwind readonly +declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8*) nounwind readonly define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwind { ;CHECK-LABEL: test_v8i8_post_imm_ld2r: ;CHECK: ld2r.8b { v0, v1 }, [x0], #2 - %ld2 = call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2r.v8i8.p0i8(i8* %A) + %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i32 2 store i8* %tmp, i8** %ptr ret { <8 x i8>, <8 x i8> } %ld2 @@ -2156,19 +2156,19 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwi define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2r(i8* %A, i8** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v8i8_post_reg_ld2r: ;CHECK: ld2r.8b { v0, v1 }, [x0], x{{[0-9]+}} - %ld2 = call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2r.v8i8.p0i8(i8* %A) + %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i64 %inc store i8* %tmp, i8** %ptr ret { <8 x i8>, <8 x i8> } %ld2 } -declare { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2r.v8i8.p0i8(i8*) nounwind readonly +declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8*) nounwind readonly define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2r(i16* %A, i16** %ptr) nounwind { ;CHECK-LABEL: test_v8i16_post_imm_ld2r: ;CHECK: ld2r.8h { v0, v1 }, [x0], #4 - %ld2 = call { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2r.v8i16.p0i16(i16* %A) + %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i32 2 store i16* %tmp, i16** %ptr ret { <8 x i16>, <8 x i16> } %ld2 @@ -2177,19 +2177,19 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2r(i16* %A, i16** %ptr) n define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2r(i16* %A, i16** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v8i16_post_reg_ld2r: ;CHECK: ld2r.8h { v0, v1 }, [x0], x{{[0-9]+}} - %ld2 = call { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2r.v8i16.p0i16(i16* %A) + %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i64 %inc store i16* %tmp, i16** %ptr ret { <8 x i16>, <8 x i16> } %ld2 } -declare { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2r.v8i16.p0i16(i16*) nounwind readonly +declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16*) nounwind readonly define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2r(i16* %A, i16** %ptr) nounwind { ;CHECK-LABEL: test_v4i16_post_imm_ld2r: ;CHECK: ld2r.4h { v0, v1 }, [x0], #4 - %ld2 = call { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2r.v4i16.p0i16(i16* %A) + %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i32 2 store i16* %tmp, i16** %ptr ret { <4 x i16>, <4 x i16> } %ld2 @@ -2198,19 +2198,19 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2r(i16* %A, i16** %ptr) n define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2r(i16* %A, i16** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v4i16_post_reg_ld2r: ;CHECK: ld2r.4h { v0, v1 }, [x0], x{{[0-9]+}} - %ld2 = call { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2r.v4i16.p0i16(i16* %A) + %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i64 %inc store i16* %tmp, i16** %ptr ret { <4 x i16>, <4 x i16> } %ld2 } -declare { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2r.v4i16.p0i16(i16*) nounwind readonly +declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16*) nounwind readonly define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2r(i32* %A, i32** %ptr) nounwind { ;CHECK-LABEL: test_v4i32_post_imm_ld2r: ;CHECK: ld2r.4s { v0, v1 }, [x0], #8 - %ld2 = call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2r.v4i32.p0i32(i32* %A) + %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i32 2 store i32* %tmp, i32** %ptr ret { <4 x i32>, <4 x i32> } %ld2 @@ -2219,18 +2219,18 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2r(i32* %A, i32** %ptr) n define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2r(i32* %A, i32** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v4i32_post_reg_ld2r: ;CHECK: ld2r.4s { v0, v1 }, [x0], x{{[0-9]+}} - %ld2 = call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2r.v4i32.p0i32(i32* %A) + %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i64 %inc store i32* %tmp, i32** %ptr ret { <4 x i32>, <4 x i32> } %ld2 } -declare { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2r.v4i32.p0i32(i32*) nounwind readonly +declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32*) nounwind readonly define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2r(i32* %A, i32** %ptr) nounwind { ;CHECK-LABEL: test_v2i32_post_imm_ld2r: ;CHECK: ld2r.2s { v0, v1 }, [x0], #8 - %ld2 = call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2r.v2i32.p0i32(i32* %A) + %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i32 2 store i32* %tmp, i32** %ptr ret { <2 x i32>, <2 x i32> } %ld2 @@ -2239,19 +2239,19 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2r(i32* %A, i32** %ptr) n define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2r(i32* %A, i32** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v2i32_post_reg_ld2r: ;CHECK: ld2r.2s { v0, v1 }, [x0], x{{[0-9]+}} - %ld2 = call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2r.v2i32.p0i32(i32* %A) + %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i64 %inc store i32* %tmp, i32** %ptr ret { <2 x i32>, <2 x i32> } %ld2 } -declare { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2r.v2i32.p0i32(i32*) nounwind readonly +declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32*) nounwind readonly define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2r(i64* %A, i64** %ptr) nounwind { ;CHECK-LABEL: test_v2i64_post_imm_ld2r: ;CHECK: ld2r.2d { v0, v1 }, [x0], #16 - %ld2 = call { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2r.v2i64.p0i64(i64* %A) + %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i32 2 store i64* %tmp, i64** %ptr ret { <2 x i64>, <2 x i64> } %ld2 @@ -2260,18 +2260,18 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2r(i64* %A, i64** %ptr) n define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2r(i64* %A, i64** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v2i64_post_reg_ld2r: ;CHECK: ld2r.2d { v0, v1 }, [x0], x{{[0-9]+}} - %ld2 = call { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2r.v2i64.p0i64(i64* %A) + %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i64 %inc store i64* %tmp, i64** %ptr ret { <2 x i64>, <2 x i64> } %ld2 } -declare { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2r.v2i64.p0i64(i64*) nounwind readonly +declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64*) nounwind readonly define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2r(i64* %A, i64** %ptr) nounwind { ;CHECK-LABEL: test_v1i64_post_imm_ld2r: ;CHECK: ld2r.1d { v0, v1 }, [x0], #16 - %ld2 = call { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2r.v1i64.p0i64(i64* %A) + %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i32 2 store i64* %tmp, i64** %ptr ret { <1 x i64>, <1 x i64> } %ld2 @@ -2280,19 +2280,19 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2r(i64* %A, i64** %ptr) n define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2r(i64* %A, i64** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v1i64_post_reg_ld2r: ;CHECK: ld2r.1d { v0, v1 }, [x0], x{{[0-9]+}} - %ld2 = call { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2r.v1i64.p0i64(i64* %A) + %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i64 %inc store i64* %tmp, i64** %ptr ret { <1 x i64>, <1 x i64> } %ld2 } -declare { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2r.v1i64.p0i64(i64*) nounwind readonly +declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64*) nounwind readonly define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2r(float* %A, float** %ptr) nounwind { ;CHECK-LABEL: test_v4f32_post_imm_ld2r: ;CHECK: ld2r.4s { v0, v1 }, [x0], #8 - %ld2 = call { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2r.v4f32.p0f32(float* %A) + %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* %A) %tmp = getelementptr float* %A, i32 2 store float* %tmp, float** %ptr ret { <4 x float>, <4 x float> } %ld2 @@ -2301,18 +2301,18 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2r(float* %A, float** define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2r(float* %A, float** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v4f32_post_reg_ld2r: ;CHECK: ld2r.4s { v0, v1 }, [x0], x{{[0-9]+}} - %ld2 = call { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2r.v4f32.p0f32(float* %A) + %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* %A) %tmp = getelementptr float* %A, i64 %inc store float* %tmp, float** %ptr ret { <4 x float>, <4 x float> } %ld2 } -declare { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2r.v4f32.p0f32(float*) nounwind readonly +declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float*) nounwind readonly define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2r(float* %A, float** %ptr) nounwind { ;CHECK-LABEL: test_v2f32_post_imm_ld2r: ;CHECK: ld2r.2s { v0, v1 }, [x0], #8 - %ld2 = call { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2r.v2f32.p0f32(float* %A) + %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* %A) %tmp = getelementptr float* %A, i32 2 store float* %tmp, float** %ptr ret { <2 x float>, <2 x float> } %ld2 @@ -2321,19 +2321,19 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2r(float* %A, float** define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2r(float* %A, float** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v2f32_post_reg_ld2r: ;CHECK: ld2r.2s { v0, v1 }, [x0], x{{[0-9]+}} - %ld2 = call { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2r.v2f32.p0f32(float* %A) + %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* %A) %tmp = getelementptr float* %A, i64 %inc store float* %tmp, float** %ptr ret { <2 x float>, <2 x float> } %ld2 } -declare { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2r.v2f32.p0f32(float*) nounwind readonly +declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float*) nounwind readonly define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2r(double* %A, double** %ptr) nounwind { ;CHECK-LABEL: test_v2f64_post_imm_ld2r: ;CHECK: ld2r.2d { v0, v1 }, [x0], #16 - %ld2 = call { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2r.v2f64.p0f64(double* %A) + %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double* %A) %tmp = getelementptr double* %A, i32 2 store double* %tmp, double** %ptr ret { <2 x double>, <2 x double> } %ld2 @@ -2342,18 +2342,18 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2r(double* %A, doub define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2r(double* %A, double** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v2f64_post_reg_ld2r: ;CHECK: ld2r.2d { v0, v1 }, [x0], x{{[0-9]+}} - %ld2 = call { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2r.v2f64.p0f64(double* %A) + %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double* %A) %tmp = getelementptr double* %A, i64 %inc store double* %tmp, double** %ptr ret { <2 x double>, <2 x double> } %ld2 } -declare { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2r.v2f64.p0f64(double*) nounwind readonly +declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double*) nounwind readonly define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2r(double* %A, double** %ptr) nounwind { ;CHECK-LABEL: test_v1f64_post_imm_ld2r: ;CHECK: ld2r.1d { v0, v1 }, [x0], #16 - %ld2 = call { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2r.v1f64.p0f64(double* %A) + %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double* %A) %tmp = getelementptr double* %A, i32 2 store double* %tmp, double** %ptr ret { <1 x double>, <1 x double> } %ld2 @@ -2362,19 +2362,19 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2r(double* %A, doub define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2r(double* %A, double** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v1f64_post_reg_ld2r: ;CHECK: ld2r.1d { v0, v1 }, [x0], x{{[0-9]+}} - %ld2 = call { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2r.v1f64.p0f64(double* %A) + %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double* %A) %tmp = getelementptr double* %A, i64 %inc store double* %tmp, double** %ptr ret { <1 x double>, <1 x double> } %ld2 } -declare { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2r.v1f64.p0f64(double*) nounwind readonly +declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double*) nounwind readonly define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3r(i8* %A, i8** %ptr) nounwind { ;CHECK-LABEL: test_v16i8_post_imm_ld3r: ;CHECK: ld3r.16b { v0, v1, v2 }, [x0], #3 - %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3r.v16i8.p0i8(i8* %A) + %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i32 3 store i8* %tmp, i8** %ptr ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3 @@ -2383,19 +2383,19 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3r(i8* %A, i8* define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3r(i8* %A, i8** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v16i8_post_reg_ld3r: ;CHECK: ld3r.16b { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3r.v16i8.p0i8(i8* %A) + %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i64 %inc store i8* %tmp, i8** %ptr ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3 } -declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3r.v16i8.p0i8(i8*) nounwind readonly +declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8*) nounwind readonly define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3r(i8* %A, i8** %ptr) nounwind { ;CHECK-LABEL: test_v8i8_post_imm_ld3r: ;CHECK: ld3r.8b { v0, v1, v2 }, [x0], #3 - %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3r.v8i8.p0i8(i8* %A) + %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i32 3 store i8* %tmp, i8** %ptr ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3 @@ -2404,19 +2404,19 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3r(i8* %A, i8** %p define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3r(i8* %A, i8** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v8i8_post_reg_ld3r: ;CHECK: ld3r.8b { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3r.v8i8.p0i8(i8* %A) + %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i64 %inc store i8* %tmp, i8** %ptr ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3 } -declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3r.v8i8.p0i8(i8*) nounwind readonly +declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8*) nounwind readonly define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3r(i16* %A, i16** %ptr) nounwind { ;CHECK-LABEL: test_v8i16_post_imm_ld3r: ;CHECK: ld3r.8h { v0, v1, v2 }, [x0], #6 - %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3r.v8i16.p0i16(i16* %A) + %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i32 3 store i16* %tmp, i16** %ptr ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3 @@ -2425,19 +2425,19 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3r(i16* %A, i1 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3r(i16* %A, i16** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v8i16_post_reg_ld3r: ;CHECK: ld3r.8h { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3r.v8i16.p0i16(i16* %A) + %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i64 %inc store i16* %tmp, i16** %ptr ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3 } -declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3r.v8i16.p0i16(i16*) nounwind readonly +declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16*) nounwind readonly define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3r(i16* %A, i16** %ptr) nounwind { ;CHECK-LABEL: test_v4i16_post_imm_ld3r: ;CHECK: ld3r.4h { v0, v1, v2 }, [x0], #6 - %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3r.v4i16.p0i16(i16* %A) + %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i32 3 store i16* %tmp, i16** %ptr ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3 @@ -2446,19 +2446,19 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3r(i16* %A, i1 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3r(i16* %A, i16** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v4i16_post_reg_ld3r: ;CHECK: ld3r.4h { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3r.v4i16.p0i16(i16* %A) + %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i64 %inc store i16* %tmp, i16** %ptr ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3 } -declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3r.v4i16.p0i16(i16*) nounwind readonly +declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16*) nounwind readonly define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3r(i32* %A, i32** %ptr) nounwind { ;CHECK-LABEL: test_v4i32_post_imm_ld3r: ;CHECK: ld3r.4s { v0, v1, v2 }, [x0], #12 - %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3r.v4i32.p0i32(i32* %A) + %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i32 3 store i32* %tmp, i32** %ptr ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3 @@ -2467,18 +2467,18 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3r(i32* %A, i3 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3r(i32* %A, i32** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v4i32_post_reg_ld3r: ;CHECK: ld3r.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3r.v4i32.p0i32(i32* %A) + %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i64 %inc store i32* %tmp, i32** %ptr ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3 } -declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3r.v4i32.p0i32(i32*) nounwind readonly +declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32*) nounwind readonly define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3r(i32* %A, i32** %ptr) nounwind { ;CHECK-LABEL: test_v2i32_post_imm_ld3r: ;CHECK: ld3r.2s { v0, v1, v2 }, [x0], #12 - %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3r.v2i32.p0i32(i32* %A) + %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i32 3 store i32* %tmp, i32** %ptr ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3 @@ -2487,19 +2487,19 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3r(i32* %A, i3 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3r(i32* %A, i32** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v2i32_post_reg_ld3r: ;CHECK: ld3r.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3r.v2i32.p0i32(i32* %A) + %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i64 %inc store i32* %tmp, i32** %ptr ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3 } -declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3r.v2i32.p0i32(i32*) nounwind readonly +declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32*) nounwind readonly define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3r(i64* %A, i64** %ptr) nounwind { ;CHECK-LABEL: test_v2i64_post_imm_ld3r: ;CHECK: ld3r.2d { v0, v1, v2 }, [x0], #24 - %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3r.v2i64.p0i64(i64* %A) + %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i32 3 store i64* %tmp, i64** %ptr ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3 @@ -2508,18 +2508,18 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3r(i64* %A, i6 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3r(i64* %A, i64** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v2i64_post_reg_ld3r: ;CHECK: ld3r.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3r.v2i64.p0i64(i64* %A) + %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i64 %inc store i64* %tmp, i64** %ptr ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3 } -declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3r.v2i64.p0i64(i64*) nounwind readonly +declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64*) nounwind readonly define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3r(i64* %A, i64** %ptr) nounwind { ;CHECK-LABEL: test_v1i64_post_imm_ld3r: ;CHECK: ld3r.1d { v0, v1, v2 }, [x0], #24 - %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3r.v1i64.p0i64(i64* %A) + %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i32 3 store i64* %tmp, i64** %ptr ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3 @@ -2528,19 +2528,19 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3r(i64* %A, i6 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3r(i64* %A, i64** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v1i64_post_reg_ld3r: ;CHECK: ld3r.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3r.v1i64.p0i64(i64* %A) + %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i64 %inc store i64* %tmp, i64** %ptr ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3 } -declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3r.v1i64.p0i64(i64*) nounwind readonly +declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64*) nounwind readonly define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3r(float* %A, float** %ptr) nounwind { ;CHECK-LABEL: test_v4f32_post_imm_ld3r: ;CHECK: ld3r.4s { v0, v1, v2 }, [x0], #12 - %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3r.v4f32.p0f32(float* %A) + %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* %A) %tmp = getelementptr float* %A, i32 3 store float* %tmp, float** %ptr ret { <4 x float>, <4 x float>, <4 x float> } %ld3 @@ -2549,18 +2549,18 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3r(float define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3r(float* %A, float** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v4f32_post_reg_ld3r: ;CHECK: ld3r.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3r.v4f32.p0f32(float* %A) + %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* %A) %tmp = getelementptr float* %A, i64 %inc store float* %tmp, float** %ptr ret { <4 x float>, <4 x float>, <4 x float> } %ld3 } -declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3r.v4f32.p0f32(float*) nounwind readonly +declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float*) nounwind readonly define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3r(float* %A, float** %ptr) nounwind { ;CHECK-LABEL: test_v2f32_post_imm_ld3r: ;CHECK: ld3r.2s { v0, v1, v2 }, [x0], #12 - %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3r.v2f32.p0f32(float* %A) + %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* %A) %tmp = getelementptr float* %A, i32 3 store float* %tmp, float** %ptr ret { <2 x float>, <2 x float>, <2 x float> } %ld3 @@ -2569,19 +2569,19 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3r(float define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3r(float* %A, float** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v2f32_post_reg_ld3r: ;CHECK: ld3r.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3r.v2f32.p0f32(float* %A) + %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* %A) %tmp = getelementptr float* %A, i64 %inc store float* %tmp, float** %ptr ret { <2 x float>, <2 x float>, <2 x float> } %ld3 } -declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3r.v2f32.p0f32(float*) nounwind readonly +declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float*) nounwind readonly define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3r(double* %A, double** %ptr) nounwind { ;CHECK-LABEL: test_v2f64_post_imm_ld3r: ;CHECK: ld3r.2d { v0, v1, v2 }, [x0], #24 - %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3r.v2f64.p0f64(double* %A) + %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double* %A) %tmp = getelementptr double* %A, i32 3 store double* %tmp, double** %ptr ret { <2 x double>, <2 x double>, <2 x double> } %ld3 @@ -2590,18 +2590,18 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3r(do define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3r(double* %A, double** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v2f64_post_reg_ld3r: ;CHECK: ld3r.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3r.v2f64.p0f64(double* %A) + %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double* %A) %tmp = getelementptr double* %A, i64 %inc store double* %tmp, double** %ptr ret { <2 x double>, <2 x double>, <2 x double> } %ld3 } -declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3r.v2f64.p0f64(double*) nounwind readonly +declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double*) nounwind readonly define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3r(double* %A, double** %ptr) nounwind { ;CHECK-LABEL: test_v1f64_post_imm_ld3r: ;CHECK: ld3r.1d { v0, v1, v2 }, [x0], #24 - %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3r.v1f64.p0f64(double* %A) + %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double* %A) %tmp = getelementptr double* %A, i32 3 store double* %tmp, double** %ptr ret { <1 x double>, <1 x double>, <1 x double> } %ld3 @@ -2610,19 +2610,19 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3r(do define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3r(double* %A, double** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v1f64_post_reg_ld3r: ;CHECK: ld3r.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} - %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3r.v1f64.p0f64(double* %A) + %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double* %A) %tmp = getelementptr double* %A, i64 %inc store double* %tmp, double** %ptr ret { <1 x double>, <1 x double>, <1 x double> } %ld3 } -declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3r.v1f64.p0f64(double*) nounwind readonly +declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double*) nounwind readonly define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4r(i8* %A, i8** %ptr) nounwind { ;CHECK-LABEL: test_v16i8_post_imm_ld4r: ;CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0], #4 - %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4r.v16i8.p0i8(i8* %A) + %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i32 4 store i8* %tmp, i8** %ptr ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4 @@ -2631,19 +2631,19 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4r( define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4r(i8* %A, i8** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v16i8_post_reg_ld4r: ;CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4r.v16i8.p0i8(i8* %A) + %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i64 %inc store i8* %tmp, i8** %ptr ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4 } -declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4r.v16i8.p0i8(i8*) nounwind readonly +declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8*) nounwind readonly define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4r(i8* %A, i8** %ptr) nounwind { ;CHECK-LABEL: test_v8i8_post_imm_ld4r: ;CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0], #4 - %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4r.v8i8.p0i8(i8* %A) + %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i32 4 store i8* %tmp, i8** %ptr ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4 @@ -2652,19 +2652,19 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4r(i8* % define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4r(i8* %A, i8** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v8i8_post_reg_ld4r: ;CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4r.v8i8.p0i8(i8* %A) + %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i64 %inc store i8* %tmp, i8** %ptr ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4 } -declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4r.v8i8.p0i8(i8*) nounwind readonly +declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8*) nounwind readonly define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4r(i16* %A, i16** %ptr) nounwind { ;CHECK-LABEL: test_v8i16_post_imm_ld4r: ;CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0], #8 - %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4r.v8i16.p0i16(i16* %A) + %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i32 4 store i16* %tmp, i16** %ptr ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4 @@ -2673,19 +2673,19 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4r( define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4r(i16* %A, i16** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v8i16_post_reg_ld4r: ;CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4r.v8i16.p0i16(i16* %A) + %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i64 %inc store i16* %tmp, i16** %ptr ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4 } -declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4r.v8i16.p0i16(i16*) nounwind readonly +declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16*) nounwind readonly define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4r(i16* %A, i16** %ptr) nounwind { ;CHECK-LABEL: test_v4i16_post_imm_ld4r: ;CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0], #8 - %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4r.v4i16.p0i16(i16* %A) + %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i32 4 store i16* %tmp, i16** %ptr ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4 @@ -2694,19 +2694,19 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4r( define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4r(i16* %A, i16** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v4i16_post_reg_ld4r: ;CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4r.v4i16.p0i16(i16* %A) + %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A) %tmp = getelementptr i16* %A, i64 %inc store i16* %tmp, i16** %ptr ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4 } -declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4r.v4i16.p0i16(i16*) nounwind readonly +declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16*) nounwind readonly define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4r(i32* %A, i32** %ptr) nounwind { ;CHECK-LABEL: test_v4i32_post_imm_ld4r: ;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], #16 - %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4r.v4i32.p0i32(i32* %A) + %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i32 4 store i32* %tmp, i32** %ptr ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4 @@ -2715,18 +2715,18 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4r( define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4r(i32* %A, i32** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v4i32_post_reg_ld4r: ;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4r.v4i32.p0i32(i32* %A) + %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i64 %inc store i32* %tmp, i32** %ptr ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4 } -declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4r.v4i32.p0i32(i32*) nounwind readonly +declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32*) nounwind readonly define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4r(i32* %A, i32** %ptr) nounwind { ;CHECK-LABEL: test_v2i32_post_imm_ld4r: ;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], #16 - %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4r.v2i32.p0i32(i32* %A) + %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i32 4 store i32* %tmp, i32** %ptr ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4 @@ -2735,19 +2735,19 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4r( define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4r(i32* %A, i32** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v2i32_post_reg_ld4r: ;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4r.v2i32.p0i32(i32* %A) + %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A) %tmp = getelementptr i32* %A, i64 %inc store i32* %tmp, i32** %ptr ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4 } -declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4r.v2i32.p0i32(i32*) nounwind readonly +declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32*) nounwind readonly define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4r(i64* %A, i64** %ptr) nounwind { ;CHECK-LABEL: test_v2i64_post_imm_ld4r: ;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], #32 - %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4r.v2i64.p0i64(i64* %A) + %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i32 4 store i64* %tmp, i64** %ptr ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4 @@ -2756,18 +2756,18 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4r( define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4r(i64* %A, i64** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v2i64_post_reg_ld4r: ;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4r.v2i64.p0i64(i64* %A) + %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i64 %inc store i64* %tmp, i64** %ptr ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4 } -declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly +declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4r(i64* %A, i64** %ptr) nounwind { ;CHECK-LABEL: test_v1i64_post_imm_ld4r: ;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], #32 - %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4r.v1i64.p0i64(i64* %A) + %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i32 4 store i64* %tmp, i64** %ptr ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4 @@ -2776,19 +2776,19 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4r( define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4r(i64* %A, i64** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v1i64_post_reg_ld4r: ;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4r.v1i64.p0i64(i64* %A) + %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A) %tmp = getelementptr i64* %A, i64 %inc store i64* %tmp, i64** %ptr ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4 } -declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4r.v1i64.p0i64(i64*) nounwind readonly +declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64*) nounwind readonly define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4r(float* %A, float** %ptr) nounwind { ;CHECK-LABEL: test_v4f32_post_imm_ld4r: ;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], #16 - %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4r.v4f32.p0f32(float* %A) + %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* %A) %tmp = getelementptr float* %A, i32 4 store float* %tmp, float** %ptr ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4 @@ -2797,18 +2797,18 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4r(float* %A, float** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v4f32_post_reg_ld4r: ;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4r.v4f32.p0f32(float* %A) + %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* %A) %tmp = getelementptr float* %A, i64 %inc store float* %tmp, float** %ptr ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4 } -declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4r.v4f32.p0f32(float*) nounwind readonly +declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float*) nounwind readonly define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4r(float* %A, float** %ptr) nounwind { ;CHECK-LABEL: test_v2f32_post_imm_ld4r: ;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], #16 - %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4r.v2f32.p0f32(float* %A) + %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* %A) %tmp = getelementptr float* %A, i32 4 store float* %tmp, float** %ptr ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4 @@ -2817,19 +2817,19 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4r(float* %A, float** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v2f32_post_reg_ld4r: ;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4r.v2f32.p0f32(float* %A) + %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* %A) %tmp = getelementptr float* %A, i64 %inc store float* %tmp, float** %ptr ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4 } -declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4r.v2f32.p0f32(float*) nounwind readonly +declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float*) nounwind readonly define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4r(double* %A, double** %ptr) nounwind { ;CHECK-LABEL: test_v2f64_post_imm_ld4r: ;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], #32 - %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4r.v2f64.p0f64(double* %A) + %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double* %A) %tmp = getelementptr double* %A, i32 4 store double* %tmp, double** %ptr ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4 @@ -2838,18 +2838,18 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4r(double* %A, double** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v2f64_post_reg_ld4r: ;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4r.v2f64.p0f64(double* %A) + %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double* %A) %tmp = getelementptr double* %A, i64 %inc store double* %tmp, double** %ptr ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4 } -declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4r.v2f64.p0f64(double*) nounwind readonly +declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double*) nounwind readonly define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4r(double* %A, double** %ptr) nounwind { ;CHECK-LABEL: test_v1f64_post_imm_ld4r: ;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], #32 - %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4r.v1f64.p0f64(double* %A) + %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double* %A) %tmp = getelementptr double* %A, i32 4 store double* %tmp, double** %ptr ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4 @@ -2858,19 +2858,19 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4r(double* %A, double** %ptr, i64 %inc) nounwind { ;CHECK-LABEL: test_v1f64_post_reg_ld4r: ;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4r.v1f64.p0f64(double* %A) + %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double* %A) %tmp = getelementptr double* %A, i64 %inc store double* %tmp, double** %ptr ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4 } -declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4r.v1f64.p0f64(double*) nounwind readonly +declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double*) nounwind readonly define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { ;CHECK-LABEL: test_v16i8_post_imm_ld2lane: ;CHECK: ld2.b { v0, v1 }[0], [x0], #2 - %ld2 = call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A) + %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A) %tmp = getelementptr i8* %A, i32 2 store i8* %tmp, i8** %ptr ret { <16 x i8>, <16 x i8> } %ld2 @@ -2879,19 +2879,19 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(i8* %A, i8** %ptr, define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C) nounwind { ;CHECK-LABEL: test_v16i8_post_reg_ld2lane: ;CHECK: ld2.b { v0, v1 }[0], [x0], x{{[0-9]+}} - %ld2 = call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A) + %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A) %tmp = getelementptr i8* %A, i64 %inc store i8* %tmp, i8** %ptr ret { <16 x i8>, <16 x i8> } %ld2 } -declare { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readonly +declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readonly define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { ;CHECK-LABEL: test_v8i8_post_imm_ld2lane: ;CHECK: ld2.b { v0, v1 }[0], [x0], #2 - %ld2 = call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A) + %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A) %tmp = getelementptr i8* %A, i32 2 store i8* %tmp, i8** %ptr ret { <8 x i8>, <8 x i8> } %ld2 @@ -2900,19 +2900,19 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(i8* %A, i8** %ptr, <8 define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C) nounwind { ;CHECK-LABEL: test_v8i8_post_reg_ld2lane: ;CHECK: ld2.b { v0, v1 }[0], [x0], x{{[0-9]+}} - %ld2 = call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A) + %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A) %tmp = getelementptr i8* %A, i64 %inc store i8* %tmp, i8** %ptr ret { <8 x i8>, <8 x i8> } %ld2 } -declare { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2lane.v8i8.p0i8(<8 x i8>, <8 x i8>, i64, i8*) nounwind readonly +declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8>, <8 x i8>, i64, i8*) nounwind readonly define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { ;CHECK-LABEL: test_v8i16_post_imm_ld2lane: ;CHECK: ld2.h { v0, v1 }[0], [x0], #4 - %ld2 = call { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A) + %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A) %tmp = getelementptr i16* %A, i32 2 store i16* %tmp, i16** %ptr ret { <8 x i16>, <8 x i16> } %ld2 @@ -2921,19 +2921,19 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(i16* %A, i16** %ptr define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C) nounwind { ;CHECK-LABEL: test_v8i16_post_reg_ld2lane: ;CHECK: ld2.h { v0, v1 }[0], [x0], x{{[0-9]+}} - %ld2 = call { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A) + %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A) %tmp = getelementptr i16* %A, i64 %inc store i16* %tmp, i16** %ptr ret { <8 x i16>, <8 x i16> } %ld2 } -declare { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readonly +declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readonly define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { ;CHECK-LABEL: test_v4i16_post_imm_ld2lane: ;CHECK: ld2.h { v0, v1 }[0], [x0], #4 - %ld2 = call { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A) + %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A) %tmp = getelementptr i16* %A, i32 2 store i16* %tmp, i16** %ptr ret { <4 x i16>, <4 x i16> } %ld2 @@ -2942,19 +2942,19 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(i16* %A, i16** %ptr define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C) nounwind { ;CHECK-LABEL: test_v4i16_post_reg_ld2lane: ;CHECK: ld2.h { v0, v1 }[0], [x0], x{{[0-9]+}} - %ld2 = call { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A) + %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A) %tmp = getelementptr i16* %A, i64 %inc store i16* %tmp, i16** %ptr ret { <4 x i16>, <4 x i16> } %ld2 } -declare { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2lane.v4i16.p0i16(<4 x i16>, <4 x i16>, i64, i16*) nounwind readonly +declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16>, <4 x i16>, i64, i16*) nounwind readonly define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { ;CHECK-LABEL: test_v4i32_post_imm_ld2lane: ;CHECK: ld2.s { v0, v1 }[0], [x0], #8 - %ld2 = call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A) + %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A) %tmp = getelementptr i32* %A, i32 2 store i32* %tmp, i32** %ptr ret { <4 x i32>, <4 x i32> } %ld2 @@ -2963,19 +2963,19 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(i32* %A, i32** %ptr define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C) nounwind { ;CHECK-LABEL: test_v4i32_post_reg_ld2lane: ;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}} - %ld2 = call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A) + %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A) %tmp = getelementptr i32* %A, i64 %inc store i32* %tmp, i32** %ptr ret { <4 x i32>, <4 x i32> } %ld2 } -declare { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readonly +declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readonly define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { ;CHECK-LABEL: test_v2i32_post_imm_ld2lane: ;CHECK: ld2.s { v0, v1 }[0], [x0], #8 - %ld2 = call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A) + %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A) %tmp = getelementptr i32* %A, i32 2 store i32* %tmp, i32** %ptr ret { <2 x i32>, <2 x i32> } %ld2 @@ -2984,19 +2984,19 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(i32* %A, i32** %ptr define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C) nounwind { ;CHECK-LABEL: test_v2i32_post_reg_ld2lane: ;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}} - %ld2 = call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A) + %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A) %tmp = getelementptr i32* %A, i64 %inc store i32* %tmp, i32** %ptr ret { <2 x i32>, <2 x i32> } %ld2 } -declare { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2lane.v2i32.p0i32(<2 x i32>, <2 x i32>, i64, i32*) nounwind readonly +declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32>, <2 x i32>, i64, i32*) nounwind readonly define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { ;CHECK-LABEL: test_v2i64_post_imm_ld2lane: ;CHECK: ld2.d { v0, v1 }[0], [x0], #16 - %ld2 = call { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A) + %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A) %tmp = getelementptr i64* %A, i32 2 store i64* %tmp, i64** %ptr ret { <2 x i64>, <2 x i64> } %ld2 @@ -3005,19 +3005,19 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(i64* %A, i64** %ptr define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C) nounwind { ;CHECK-LABEL: test_v2i64_post_reg_ld2lane: ;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}} - %ld2 = call { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A) + %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A) %tmp = getelementptr i64* %A, i64 %inc store i64* %tmp, i64** %ptr ret { <2 x i64>, <2 x i64> } %ld2 } -declare { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readonly +declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readonly define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { ;CHECK-LABEL: test_v1i64_post_imm_ld2lane: ;CHECK: ld2.d { v0, v1 }[0], [x0], #16 - %ld2 = call { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A) + %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A) %tmp = getelementptr i64* %A, i32 2 store i64* %tmp, i64** %ptr ret { <1 x i64>, <1 x i64> } %ld2 @@ -3026,19 +3026,19 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(i64* %A, i64** %ptr define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C) nounwind { ;CHECK-LABEL: test_v1i64_post_reg_ld2lane: ;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}} - %ld2 = call { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A) + %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A) %tmp = getelementptr i64* %A, i64 %inc store i64* %tmp, i64** %ptr ret { <1 x i64>, <1 x i64> } %ld2 } -declare { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*) nounwind readonly +declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*) nounwind readonly define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind { ;CHECK-LABEL: test_v4f32_post_imm_ld2lane: ;CHECK: ld2.s { v0, v1 }[0], [x0], #8 - %ld2 = call { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A) + %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A) %tmp = getelementptr float* %A, i32 2 store float* %tmp, float** %ptr ret { <4 x float>, <4 x float> } %ld2 @@ -3047,19 +3047,19 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(float* %A, floa define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C) nounwind { ;CHECK-LABEL: test_v4f32_post_reg_ld2lane: ;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}} - %ld2 = call { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A) + %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A) %tmp = getelementptr float* %A, i64 %inc store float* %tmp, float** %ptr ret { <4 x float>, <4 x float> } %ld2 } -declare { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2lane.v4f32.p0f32(<4 x float>, <4 x float>, i64, float*) nounwind readonly +declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float>, <4 x float>, i64, float*) nounwind readonly define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind { ;CHECK-LABEL: test_v2f32_post_imm_ld2lane: ;CHECK: ld2.s { v0, v1 }[0], [x0], #8 - %ld2 = call { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A) + %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A) %tmp = getelementptr float* %A, i32 2 store float* %tmp, float** %ptr ret { <2 x float>, <2 x float> } %ld2 @@ -3068,19 +3068,19 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(float* %A, floa define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C) nounwind { ;CHECK-LABEL: test_v2f32_post_reg_ld2lane: ;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}} - %ld2 = call { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A) + %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A) %tmp = getelementptr float* %A, i64 %inc store float* %tmp, float** %ptr ret { <2 x float>, <2 x float> } %ld2 } -declare { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2lane.v2f32.p0f32(<2 x float>, <2 x float>, i64, float*) nounwind readonly +declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float>, <2 x float>, i64, float*) nounwind readonly define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind { ;CHECK-LABEL: test_v2f64_post_imm_ld2lane: ;CHECK: ld2.d { v0, v1 }[0], [x0], #16 - %ld2 = call { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A) + %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A) %tmp = getelementptr double* %A, i32 2 store double* %tmp, double** %ptr ret { <2 x double>, <2 x double> } %ld2 @@ -3089,19 +3089,19 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(double* %A, d define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C) nounwind { ;CHECK-LABEL: test_v2f64_post_reg_ld2lane: ;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}} - %ld2 = call { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A) + %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A) %tmp = getelementptr double* %A, i64 %inc store double* %tmp, double** %ptr ret { <2 x double>, <2 x double> } %ld2 } -declare { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2lane.v2f64.p0f64(<2 x double>, <2 x double>, i64, double*) nounwind readonly +declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double>, <2 x double>, i64, double*) nounwind readonly define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind { ;CHECK-LABEL: test_v1f64_post_imm_ld2lane: ;CHECK: ld2.d { v0, v1 }[0], [x0], #16 - %ld2 = call { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A) + %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A) %tmp = getelementptr double* %A, i32 2 store double* %tmp, double** %ptr ret { <1 x double>, <1 x double> } %ld2 @@ -3110,19 +3110,19 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(double* %A, d define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C) nounwind { ;CHECK-LABEL: test_v1f64_post_reg_ld2lane: ;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}} - %ld2 = call { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A) + %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A) %tmp = getelementptr double* %A, i64 %inc store double* %tmp, double** %ptr ret { <1 x double>, <1 x double> } %ld2 } -declare { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2lane.v1f64.p0f64(<1 x double>, <1 x double>, i64, double*) nounwind readonly +declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double>, <1 x double>, i64, double*) nounwind readonly define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { ;CHECK-LABEL: test_v16i8_post_imm_ld3lane: ;CHECK: ld3.b { v0, v1, v2 }[0], [x0], #3 - %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A) + %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A) %tmp = getelementptr i8* %A, i32 3 store i8* %tmp, i8** %ptr ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3 @@ -3131,19 +3131,19 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(i8* %A, define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { ;CHECK-LABEL: test_v16i8_post_reg_ld3lane: ;CHECK: ld3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}} - %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A) + %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A) %tmp = getelementptr i8* %A, i64 %inc store i8* %tmp, i8** %ptr ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3 } -declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly +declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { ;CHECK-LABEL: test_v8i8_post_imm_ld3lane: ;CHECK: ld3.b { v0, v1, v2 }[0], [x0], #3 - %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A) + %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A) %tmp = getelementptr i8* %A, i32 3 store i8* %tmp, i8** %ptr ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3 @@ -3152,19 +3152,19 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(i8* %A, i8** define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { ;CHECK-LABEL: test_v8i8_post_reg_ld3lane: ;CHECK: ld3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}} - %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A) + %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A) %tmp = getelementptr i8* %A, i64 %inc store i8* %tmp, i8** %ptr ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3 } -declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) nounwind readonly +declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) nounwind readonly define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { ;CHECK-LABEL: test_v8i16_post_imm_ld3lane: ;CHECK: ld3.h { v0, v1, v2 }[0], [x0], #6 - %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A) + %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A) %tmp = getelementptr i16* %A, i32 3 store i16* %tmp, i16** %ptr ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3 @@ -3173,19 +3173,19 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(i16* %A, define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { ;CHECK-LABEL: test_v8i16_post_reg_ld3lane: ;CHECK: ld3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}} - %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A) + %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A) %tmp = getelementptr i16* %A, i64 %inc store i16* %tmp, i16** %ptr ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3 } -declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly +declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { ;CHECK-LABEL: test_v4i16_post_imm_ld3lane: ;CHECK: ld3.h { v0, v1, v2 }[0], [x0], #6 - %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A) + %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A) %tmp = getelementptr i16* %A, i32 3 store i16* %tmp, i16** %ptr ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3 @@ -3194,19 +3194,19 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(i16* %A, define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { ;CHECK-LABEL: test_v4i16_post_reg_ld3lane: ;CHECK: ld3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}} - %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A) + %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A) %tmp = getelementptr i16* %A, i64 %inc store i16* %tmp, i16** %ptr ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3 } -declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) nounwind readonly +declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) nounwind readonly define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { ;CHECK-LABEL: test_v4i32_post_imm_ld3lane: ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12 - %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A) + %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A) %tmp = getelementptr i32* %A, i32 3 store i32* %tmp, i32** %ptr ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3 @@ -3215,19 +3215,19 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(i32* %A, define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { ;CHECK-LABEL: test_v4i32_post_reg_ld3lane: ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}} - %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A) + %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A) %tmp = getelementptr i32* %A, i64 %inc store i32* %tmp, i32** %ptr ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3 } -declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly +declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { ;CHECK-LABEL: test_v2i32_post_imm_ld3lane: ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12 - %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A) + %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A) %tmp = getelementptr i32* %A, i32 3 store i32* %tmp, i32** %ptr ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3 @@ -3236,19 +3236,19 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(i32* %A, define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { ;CHECK-LABEL: test_v2i32_post_reg_ld3lane: ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}} - %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A) + %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A) %tmp = getelementptr i32* %A, i64 %inc store i32* %tmp, i32** %ptr ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3 } -declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) nounwind readonly +declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) nounwind readonly define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { ;CHECK-LABEL: test_v2i64_post_imm_ld3lane: ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24 - %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A) + %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A) %tmp = getelementptr i64* %A, i32 3 store i64* %tmp, i64** %ptr ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3 @@ -3257,19 +3257,19 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(i64* %A, define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { ;CHECK-LABEL: test_v2i64_post_reg_ld3lane: ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}} - %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A) + %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A) %tmp = getelementptr i64* %A, i64 %inc store i64* %tmp, i64** %ptr ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3 } -declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly +declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { ;CHECK-LABEL: test_v1i64_post_imm_ld3lane: ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24 - %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A) + %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A) %tmp = getelementptr i64* %A, i32 3 store i64* %tmp, i64** %ptr ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3 @@ -3278,19 +3278,19 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(i64* %A, define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { ;CHECK-LABEL: test_v1i64_post_reg_ld3lane: ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}} - %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A) + %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A) %tmp = getelementptr i64* %A, i64 %inc store i64* %tmp, i64** %ptr ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3 } -declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) nounwind readonly +declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) nounwind readonly define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { ;CHECK-LABEL: test_v4f32_post_imm_ld3lane: ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12 - %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A) + %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A) %tmp = getelementptr float* %A, i32 3 store float* %tmp, float** %ptr ret { <4 x float>, <4 x float>, <4 x float> } %ld3 @@ -3299,19 +3299,19 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(fl define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { ;CHECK-LABEL: test_v4f32_post_reg_ld3lane: ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}} - %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A) + %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A) %tmp = getelementptr float* %A, i64 %inc store float* %tmp, float** %ptr ret { <4 x float>, <4 x float>, <4 x float> } %ld3 } -declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, i64, float*) nounwind readonly +declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, i64, float*) nounwind readonly define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { ;CHECK-LABEL: test_v2f32_post_imm_ld3lane: ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12 - %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A) + %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A) %tmp = getelementptr float* %A, i32 3 store float* %tmp, float** %ptr ret { <2 x float>, <2 x float>, <2 x float> } %ld3 @@ -3320,19 +3320,19 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(fl define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { ;CHECK-LABEL: test_v2f32_post_reg_ld3lane: ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}} - %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A) + %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A) %tmp = getelementptr float* %A, i64 %inc store float* %tmp, float** %ptr ret { <2 x float>, <2 x float>, <2 x float> } %ld3 } -declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, i64, float*) nounwind readonly +declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, i64, float*) nounwind readonly define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { ;CHECK-LABEL: test_v2f64_post_imm_ld3lane: ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24 - %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A) + %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A) %tmp = getelementptr double* %A, i32 3 store double* %tmp, double** %ptr ret { <2 x double>, <2 x double>, <2 x double> } %ld3 @@ -3341,19 +3341,19 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { ;CHECK-LABEL: test_v2f64_post_reg_ld3lane: ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}} - %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A) + %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A) %tmp = getelementptr double* %A, i64 %inc store double* %tmp, double** %ptr ret { <2 x double>, <2 x double>, <2 x double> } %ld3 } -declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, i64, double*) nounwind readonly +declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, i64, double*) nounwind readonly define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { ;CHECK-LABEL: test_v1f64_post_imm_ld3lane: ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24 - %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A) + %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A) %tmp = getelementptr double* %A, i32 3 store double* %tmp, double** %ptr ret { <1 x double>, <1 x double>, <1 x double> } %ld3 @@ -3362,19 +3362,19 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { ;CHECK-LABEL: test_v1f64_post_reg_ld3lane: ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}} - %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A) + %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A) %tmp = getelementptr double* %A, i64 %inc store double* %tmp, double** %ptr ret { <1 x double>, <1 x double>, <1 x double> } %ld3 } -declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, i64, double*) nounwind readonly +declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, i64, double*) nounwind readonly define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { ;CHECK-LABEL: test_v16i8_post_imm_ld4lane: ;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], #4 - %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A) + %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A) %tmp = getelementptr i8* %A, i32 4 store i8* %tmp, i8** %ptr ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4 @@ -3383,19 +3383,19 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4la define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { ;CHECK-LABEL: test_v16i8_post_reg_ld4lane: ;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} - %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A) + %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A) %tmp = getelementptr i8* %A, i64 %inc store i8* %tmp, i8** %ptr ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4 } -declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly +declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { ;CHECK-LABEL: test_v8i8_post_imm_ld4lane: ;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], #4 - %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A) + %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A) %tmp = getelementptr i8* %A, i32 4 store i8* %tmp, i8** %ptr ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4 @@ -3404,19 +3404,19 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(i8 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { ;CHECK-LABEL: test_v8i8_post_reg_ld4lane: ;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} - %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A) + %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A) %tmp = getelementptr i8* %A, i64 %inc store i8* %tmp, i8** %ptr ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4 } -declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) nounwind readonly +declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) nounwind readonly define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { ;CHECK-LABEL: test_v8i16_post_imm_ld4lane: ;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], #8 - %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A) + %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A) %tmp = getelementptr i16* %A, i32 4 store i16* %tmp, i16** %ptr ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4 @@ -3425,19 +3425,19 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4la define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { ;CHECK-LABEL: test_v8i16_post_reg_ld4lane: ;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} - %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A) + %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A) %tmp = getelementptr i16* %A, i64 %inc store i16* %tmp, i16** %ptr ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4 } -declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly +declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { ;CHECK-LABEL: test_v4i16_post_imm_ld4lane: ;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], #8 - %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A) + %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A) %tmp = getelementptr i16* %A, i32 4 store i16* %tmp, i16** %ptr ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4 @@ -3446,19 +3446,19 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4la define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { ;CHECK-LABEL: test_v4i16_post_reg_ld4lane: ;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} - %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A) + %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A) %tmp = getelementptr i16* %A, i64 %inc store i16* %tmp, i16** %ptr ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4 } -declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) nounwind readonly +declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) nounwind readonly define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { ;CHECK-LABEL: test_v4i32_post_imm_ld4lane: ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 - %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A) + %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A) %tmp = getelementptr i32* %A, i32 4 store i32* %tmp, i32** %ptr ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4 @@ -3467,19 +3467,19 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4la define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { ;CHECK-LABEL: test_v4i32_post_reg_ld4lane: ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} - %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A) + %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A) %tmp = getelementptr i32* %A, i64 %inc store i32* %tmp, i32** %ptr ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4 } -declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly +declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { ;CHECK-LABEL: test_v2i32_post_imm_ld4lane: ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 - %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A) + %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A) %tmp = getelementptr i32* %A, i32 4 store i32* %tmp, i32** %ptr ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4 @@ -3488,19 +3488,19 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4la define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { ;CHECK-LABEL: test_v2i32_post_reg_ld4lane: ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} - %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A) + %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A) %tmp = getelementptr i32* %A, i64 %inc store i32* %tmp, i32** %ptr ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4 } -declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) nounwind readonly +declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) nounwind readonly define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { ;CHECK-LABEL: test_v2i64_post_imm_ld4lane: ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 - %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A) + %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A) %tmp = getelementptr i64* %A, i32 4 store i64* %tmp, i64** %ptr ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4 @@ -3509,19 +3509,19 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4la define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { ;CHECK-LABEL: test_v2i64_post_reg_ld4lane: ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} - %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A) + %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A) %tmp = getelementptr i64* %A, i64 %inc store i64* %tmp, i64** %ptr ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4 } -declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly +declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { ;CHECK-LABEL: test_v1i64_post_imm_ld4lane: ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 - %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A) + %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A) %tmp = getelementptr i64* %A, i32 4 store i64* %tmp, i64** %ptr ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4 @@ -3530,19 +3530,19 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4la define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { ;CHECK-LABEL: test_v1i64_post_reg_ld4lane: ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} - %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A) + %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A) %tmp = getelementptr i64* %A, i64 %inc store i64* %tmp, i64** %ptr ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4 } -declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) nounwind readonly +declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) nounwind readonly define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { ;CHECK-LABEL: test_v4f32_post_imm_ld4lane: ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 - %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A) + %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A) %tmp = getelementptr float* %A, i32 4 store float* %tmp, float** %ptr ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4 @@ -3551,19 +3551,19 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { ;CHECK-LABEL: test_v4f32_post_reg_ld4lane: ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} - %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A) + %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A) %tmp = getelementptr float* %A, i64 %inc store float* %tmp, float** %ptr ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4 } -declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, float*) nounwind readonly +declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, float*) nounwind readonly define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { ;CHECK-LABEL: test_v2f32_post_imm_ld4lane: ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 - %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A) + %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A) %tmp = getelementptr float* %A, i32 4 store float* %tmp, float** %ptr ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4 @@ -3572,19 +3572,19 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { ;CHECK-LABEL: test_v2f32_post_reg_ld4lane: ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} - %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A) + %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A) %tmp = getelementptr float* %A, i64 %inc store float* %tmp, float** %ptr ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4 } -declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, float*) nounwind readonly +declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, float*) nounwind readonly define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { ;CHECK-LABEL: test_v2f64_post_imm_ld4lane: ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 - %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A) + %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A) %tmp = getelementptr double* %A, i32 4 store double* %tmp, double** %ptr ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4 @@ -3593,19 +3593,19 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { ;CHECK-LABEL: test_v2f64_post_reg_ld4lane: ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} - %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A) + %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A) %tmp = getelementptr double* %A, i64 %inc store double* %tmp, double** %ptr ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4 } -declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, double*) nounwind readonly +declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, double*) nounwind readonly define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { ;CHECK-LABEL: test_v1f64_post_imm_ld4lane: ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 - %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A) + %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A) %tmp = getelementptr double* %A, i32 4 store double* %tmp, double** %ptr ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4 @@ -3614,19 +3614,19 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { ;CHECK-LABEL: test_v1f64_post_reg_ld4lane: ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} - %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A) + %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A) %tmp = getelementptr double* %A, i64 %inc store double* %tmp, double** %ptr ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4 } -declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, double*) nounwind readonly +declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, double*) nounwind readonly define i8* @test_v16i8_post_imm_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { ;CHECK-LABEL: test_v16i8_post_imm_st2: ;CHECK: st2.16b { v0, v1 }, [x0], #32 - call void @llvm.arm64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A) + call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A) %tmp = getelementptr i8* %A, i32 32 ret i8* %tmp } @@ -3634,18 +3634,18 @@ define i8* @test_v16i8_post_imm_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> % define i8* @test_v16i8_post_reg_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v16i8_post_reg_st2: ;CHECK: st2.16b { v0, v1 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A) + call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A) %tmp = getelementptr i8* %A, i64 %inc ret i8* %tmp } -declare void @llvm.arm64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) +declare void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) define i8* @test_v8i8_post_imm_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { ;CHECK-LABEL: test_v8i8_post_imm_st2: ;CHECK: st2.8b { v0, v1 }, [x0], #16 - call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A) + call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A) %tmp = getelementptr i8* %A, i32 16 ret i8* %tmp } @@ -3653,18 +3653,18 @@ define i8* @test_v8i8_post_imm_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) define i8* @test_v8i8_post_reg_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v8i8_post_reg_st2: ;CHECK: st2.8b { v0, v1 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A) + call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A) %tmp = getelementptr i8* %A, i64 %inc ret i8* %tmp } -declare void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) +declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) define i16* @test_v8i16_post_imm_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { ;CHECK-LABEL: test_v8i16_post_imm_st2: ;CHECK: st2.8h { v0, v1 }, [x0], #32 - call void @llvm.arm64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A) + call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A) %tmp = getelementptr i16* %A, i32 16 ret i16* %tmp } @@ -3672,18 +3672,18 @@ define i16* @test_v8i16_post_imm_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16 define i16* @test_v8i16_post_reg_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v8i16_post_reg_st2: ;CHECK: st2.8h { v0, v1 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A) + call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A) %tmp = getelementptr i16* %A, i64 %inc ret i16* %tmp } -declare void @llvm.arm64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) +declare void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) define i16* @test_v4i16_post_imm_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { ;CHECK-LABEL: test_v4i16_post_imm_st2: ;CHECK: st2.4h { v0, v1 }, [x0], #16 - call void @llvm.arm64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A) + call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A) %tmp = getelementptr i16* %A, i32 8 ret i16* %tmp } @@ -3691,18 +3691,18 @@ define i16* @test_v4i16_post_imm_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16 define i16* @test_v4i16_post_reg_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v4i16_post_reg_st2: ;CHECK: st2.4h { v0, v1 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A) + call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A) %tmp = getelementptr i16* %A, i64 %inc ret i16* %tmp } -declare void @llvm.arm64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) +declare void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) define i32* @test_v4i32_post_imm_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { ;CHECK-LABEL: test_v4i32_post_imm_st2: ;CHECK: st2.4s { v0, v1 }, [x0], #32 - call void @llvm.arm64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A) + call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A) %tmp = getelementptr i32* %A, i32 8 ret i32* %tmp } @@ -3710,18 +3710,18 @@ define i32* @test_v4i32_post_imm_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32 define i32* @test_v4i32_post_reg_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v4i32_post_reg_st2: ;CHECK: st2.4s { v0, v1 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A) + call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A) %tmp = getelementptr i32* %A, i64 %inc ret i32* %tmp } -declare void @llvm.arm64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) +declare void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) define i32* @test_v2i32_post_imm_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { ;CHECK-LABEL: test_v2i32_post_imm_st2: ;CHECK: st2.2s { v0, v1 }, [x0], #16 - call void @llvm.arm64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A) + call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A) %tmp = getelementptr i32* %A, i32 4 ret i32* %tmp } @@ -3729,18 +3729,18 @@ define i32* @test_v2i32_post_imm_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32 define i32* @test_v2i32_post_reg_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v2i32_post_reg_st2: ;CHECK: st2.2s { v0, v1 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A) + call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A) %tmp = getelementptr i32* %A, i64 %inc ret i32* %tmp } -declare void @llvm.arm64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) +declare void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) define i64* @test_v2i64_post_imm_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { ;CHECK-LABEL: test_v2i64_post_imm_st2: ;CHECK: st2.2d { v0, v1 }, [x0], #32 - call void @llvm.arm64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A) + call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A) %tmp = getelementptr i64* %A, i64 4 ret i64* %tmp } @@ -3748,18 +3748,18 @@ define i64* @test_v2i64_post_imm_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64 define i64* @test_v2i64_post_reg_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v2i64_post_reg_st2: ;CHECK: st2.2d { v0, v1 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A) + call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A) %tmp = getelementptr i64* %A, i64 %inc ret i64* %tmp } -declare void @llvm.arm64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) +declare void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) define i64* @test_v1i64_post_imm_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { ;CHECK-LABEL: test_v1i64_post_imm_st2: ;CHECK: st1.1d { v0, v1 }, [x0], #16 - call void @llvm.arm64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A) + call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A) %tmp = getelementptr i64* %A, i64 2 ret i64* %tmp } @@ -3767,18 +3767,18 @@ define i64* @test_v1i64_post_imm_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64 define i64* @test_v1i64_post_reg_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v1i64_post_reg_st2: ;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A) + call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A) %tmp = getelementptr i64* %A, i64 %inc ret i64* %tmp } -declare void @llvm.arm64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) +declare void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) define float* @test_v4f32_post_imm_st2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind { ;CHECK-LABEL: test_v4f32_post_imm_st2: ;CHECK: st2.4s { v0, v1 }, [x0], #32 - call void @llvm.arm64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A) + call void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A) %tmp = getelementptr float* %A, i32 8 ret float* %tmp } @@ -3786,18 +3786,18 @@ define float* @test_v4f32_post_imm_st2(float* %A, float** %ptr, <4 x float> %B, define float* @test_v4f32_post_reg_st2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v4f32_post_reg_st2: ;CHECK: st2.4s { v0, v1 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A) + call void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A) %tmp = getelementptr float* %A, i64 %inc ret float* %tmp } -declare void @llvm.arm64.neon.st2.v4f32.p0f32(<4 x float>, <4 x float>, float*) +declare void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float>, <4 x float>, float*) define float* @test_v2f32_post_imm_st2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind { ;CHECK-LABEL: test_v2f32_post_imm_st2: ;CHECK: st2.2s { v0, v1 }, [x0], #16 - call void @llvm.arm64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A) + call void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A) %tmp = getelementptr float* %A, i32 4 ret float* %tmp } @@ -3805,18 +3805,18 @@ define float* @test_v2f32_post_imm_st2(float* %A, float** %ptr, <2 x float> %B, define float* @test_v2f32_post_reg_st2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v2f32_post_reg_st2: ;CHECK: st2.2s { v0, v1 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A) + call void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A) %tmp = getelementptr float* %A, i64 %inc ret float* %tmp } -declare void @llvm.arm64.neon.st2.v2f32.p0f32(<2 x float>, <2 x float>, float*) +declare void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float>, <2 x float>, float*) define double* @test_v2f64_post_imm_st2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind { ;CHECK-LABEL: test_v2f64_post_imm_st2: ;CHECK: st2.2d { v0, v1 }, [x0], #32 - call void @llvm.arm64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A) + call void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A) %tmp = getelementptr double* %A, i64 4 ret double* %tmp } @@ -3824,18 +3824,18 @@ define double* @test_v2f64_post_imm_st2(double* %A, double** %ptr, <2 x double> define double* @test_v2f64_post_reg_st2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v2f64_post_reg_st2: ;CHECK: st2.2d { v0, v1 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A) + call void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A) %tmp = getelementptr double* %A, i64 %inc ret double* %tmp } -declare void @llvm.arm64.neon.st2.v2f64.p0f64(<2 x double>, <2 x double>, double*) +declare void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double>, <2 x double>, double*) define double* @test_v1f64_post_imm_st2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind { ;CHECK-LABEL: test_v1f64_post_imm_st2: ;CHECK: st1.1d { v0, v1 }, [x0], #16 - call void @llvm.arm64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A) + call void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A) %tmp = getelementptr double* %A, i64 2 ret double* %tmp } @@ -3843,18 +3843,18 @@ define double* @test_v1f64_post_imm_st2(double* %A, double** %ptr, <1 x double> define double* @test_v1f64_post_reg_st2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v1f64_post_reg_st2: ;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A) + call void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A) %tmp = getelementptr double* %A, i64 %inc ret double* %tmp } -declare void @llvm.arm64.neon.st2.v1f64.p0f64(<1 x double>, <1 x double>, double*) +declare void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double>, <1 x double>, double*) define i8* @test_v16i8_post_imm_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { ;CHECK-LABEL: test_v16i8_post_imm_st3: ;CHECK: st3.16b { v0, v1, v2 }, [x0], #48 - call void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A) + call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A) %tmp = getelementptr i8* %A, i32 48 ret i8* %tmp } @@ -3862,18 +3862,18 @@ define i8* @test_v16i8_post_imm_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> % define i8* @test_v16i8_post_reg_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v16i8_post_reg_st3: ;CHECK: st3.16b { v0, v1, v2 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A) + call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A) %tmp = getelementptr i8* %A, i64 %inc ret i8* %tmp } -declare void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) +declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) define i8* @test_v8i8_post_imm_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { ;CHECK-LABEL: test_v8i8_post_imm_st3: ;CHECK: st3.8b { v0, v1, v2 }, [x0], #24 - call void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A) + call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A) %tmp = getelementptr i8* %A, i32 24 ret i8* %tmp } @@ -3881,18 +3881,18 @@ define i8* @test_v8i8_post_imm_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, define i8* @test_v8i8_post_reg_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v8i8_post_reg_st3: ;CHECK: st3.8b { v0, v1, v2 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A) + call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A) %tmp = getelementptr i8* %A, i64 %inc ret i8* %tmp } -declare void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) +declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) define i16* @test_v8i16_post_imm_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { ;CHECK-LABEL: test_v8i16_post_imm_st3: ;CHECK: st3.8h { v0, v1, v2 }, [x0], #48 - call void @llvm.arm64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A) + call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A) %tmp = getelementptr i16* %A, i32 24 ret i16* %tmp } @@ -3900,18 +3900,18 @@ define i16* @test_v8i16_post_imm_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16 define i16* @test_v8i16_post_reg_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v8i16_post_reg_st3: ;CHECK: st3.8h { v0, v1, v2 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A) + call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A) %tmp = getelementptr i16* %A, i64 %inc ret i16* %tmp } -declare void @llvm.arm64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) +declare void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) define i16* @test_v4i16_post_imm_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { ;CHECK-LABEL: test_v4i16_post_imm_st3: ;CHECK: st3.4h { v0, v1, v2 }, [x0], #24 - call void @llvm.arm64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A) + call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A) %tmp = getelementptr i16* %A, i32 12 ret i16* %tmp } @@ -3919,18 +3919,18 @@ define i16* @test_v4i16_post_imm_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16 define i16* @test_v4i16_post_reg_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v4i16_post_reg_st3: ;CHECK: st3.4h { v0, v1, v2 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A) + call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A) %tmp = getelementptr i16* %A, i64 %inc ret i16* %tmp } -declare void @llvm.arm64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) +declare void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) define i32* @test_v4i32_post_imm_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { ;CHECK-LABEL: test_v4i32_post_imm_st3: ;CHECK: st3.4s { v0, v1, v2 }, [x0], #48 - call void @llvm.arm64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A) + call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A) %tmp = getelementptr i32* %A, i32 12 ret i32* %tmp } @@ -3938,18 +3938,18 @@ define i32* @test_v4i32_post_imm_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32 define i32* @test_v4i32_post_reg_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v4i32_post_reg_st3: ;CHECK: st3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A) + call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A) %tmp = getelementptr i32* %A, i64 %inc ret i32* %tmp } -declare void @llvm.arm64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) +declare void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) define i32* @test_v2i32_post_imm_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { ;CHECK-LABEL: test_v2i32_post_imm_st3: ;CHECK: st3.2s { v0, v1, v2 }, [x0], #24 - call void @llvm.arm64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A) + call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A) %tmp = getelementptr i32* %A, i32 6 ret i32* %tmp } @@ -3957,18 +3957,18 @@ define i32* @test_v2i32_post_imm_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32 define i32* @test_v2i32_post_reg_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v2i32_post_reg_st3: ;CHECK: st3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A) + call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A) %tmp = getelementptr i32* %A, i64 %inc ret i32* %tmp } -declare void @llvm.arm64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) +declare void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) define i64* @test_v2i64_post_imm_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { ;CHECK-LABEL: test_v2i64_post_imm_st3: ;CHECK: st3.2d { v0, v1, v2 }, [x0], #48 - call void @llvm.arm64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A) + call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A) %tmp = getelementptr i64* %A, i64 6 ret i64* %tmp } @@ -3976,18 +3976,18 @@ define i64* @test_v2i64_post_imm_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64 define i64* @test_v2i64_post_reg_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v2i64_post_reg_st3: ;CHECK: st3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A) + call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A) %tmp = getelementptr i64* %A, i64 %inc ret i64* %tmp } -declare void @llvm.arm64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) +declare void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) define i64* @test_v1i64_post_imm_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { ;CHECK-LABEL: test_v1i64_post_imm_st3: ;CHECK: st1.1d { v0, v1, v2 }, [x0], #24 - call void @llvm.arm64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A) + call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A) %tmp = getelementptr i64* %A, i64 3 ret i64* %tmp } @@ -3995,18 +3995,18 @@ define i64* @test_v1i64_post_imm_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64 define i64* @test_v1i64_post_reg_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v1i64_post_reg_st3: ;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A) + call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A) %tmp = getelementptr i64* %A, i64 %inc ret i64* %tmp } -declare void @llvm.arm64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) +declare void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) define float* @test_v4f32_post_imm_st3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { ;CHECK-LABEL: test_v4f32_post_imm_st3: ;CHECK: st3.4s { v0, v1, v2 }, [x0], #48 - call void @llvm.arm64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A) + call void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A) %tmp = getelementptr float* %A, i32 12 ret float* %tmp } @@ -4014,18 +4014,18 @@ define float* @test_v4f32_post_imm_st3(float* %A, float** %ptr, <4 x float> %B, define float* @test_v4f32_post_reg_st3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v4f32_post_reg_st3: ;CHECK: st3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A) + call void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A) %tmp = getelementptr float* %A, i64 %inc ret float* %tmp } -declare void @llvm.arm64.neon.st3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*) +declare void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*) define float* @test_v2f32_post_imm_st3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { ;CHECK-LABEL: test_v2f32_post_imm_st3: ;CHECK: st3.2s { v0, v1, v2 }, [x0], #24 - call void @llvm.arm64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A) + call void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A) %tmp = getelementptr float* %A, i32 6 ret float* %tmp } @@ -4033,18 +4033,18 @@ define float* @test_v2f32_post_imm_st3(float* %A, float** %ptr, <2 x float> %B, define float* @test_v2f32_post_reg_st3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v2f32_post_reg_st3: ;CHECK: st3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A) + call void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A) %tmp = getelementptr float* %A, i64 %inc ret float* %tmp } -declare void @llvm.arm64.neon.st3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*) +declare void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*) define double* @test_v2f64_post_imm_st3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { ;CHECK-LABEL: test_v2f64_post_imm_st3: ;CHECK: st3.2d { v0, v1, v2 }, [x0], #48 - call void @llvm.arm64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A) + call void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A) %tmp = getelementptr double* %A, i64 6 ret double* %tmp } @@ -4052,18 +4052,18 @@ define double* @test_v2f64_post_imm_st3(double* %A, double** %ptr, <2 x double> define double* @test_v2f64_post_reg_st3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v2f64_post_reg_st3: ;CHECK: st3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A) + call void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A) %tmp = getelementptr double* %A, i64 %inc ret double* %tmp } -declare void @llvm.arm64.neon.st3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*) +declare void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*) define double* @test_v1f64_post_imm_st3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { ;CHECK-LABEL: test_v1f64_post_imm_st3: ;CHECK: st1.1d { v0, v1, v2 }, [x0], #24 - call void @llvm.arm64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A) + call void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A) %tmp = getelementptr double* %A, i64 3 ret double* %tmp } @@ -4071,18 +4071,18 @@ define double* @test_v1f64_post_imm_st3(double* %A, double** %ptr, <1 x double> define double* @test_v1f64_post_reg_st3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v1f64_post_reg_st3: ;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A) + call void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A) %tmp = getelementptr double* %A, i64 %inc ret double* %tmp } -declare void @llvm.arm64.neon.st3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*) +declare void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*) define i8* @test_v16i8_post_imm_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { ;CHECK-LABEL: test_v16i8_post_imm_st4: ;CHECK: st4.16b { v0, v1, v2, v3 }, [x0], #64 - call void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A) + call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A) %tmp = getelementptr i8* %A, i32 64 ret i8* %tmp } @@ -4090,18 +4090,18 @@ define i8* @test_v16i8_post_imm_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> % define i8* @test_v16i8_post_reg_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v16i8_post_reg_st4: ;CHECK: st4.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A) + call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A) %tmp = getelementptr i8* %A, i64 %inc ret i8* %tmp } -declare void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) +declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) define i8* @test_v8i8_post_imm_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { ;CHECK-LABEL: test_v8i8_post_imm_st4: ;CHECK: st4.8b { v0, v1, v2, v3 }, [x0], #32 - call void @llvm.arm64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A) + call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A) %tmp = getelementptr i8* %A, i32 32 ret i8* %tmp } @@ -4109,18 +4109,18 @@ define i8* @test_v8i8_post_imm_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, define i8* @test_v8i8_post_reg_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v8i8_post_reg_st4: ;CHECK: st4.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A) + call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A) %tmp = getelementptr i8* %A, i64 %inc ret i8* %tmp } -declare void @llvm.arm64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) +declare void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) define i16* @test_v8i16_post_imm_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { ;CHECK-LABEL: test_v8i16_post_imm_st4: ;CHECK: st4.8h { v0, v1, v2, v3 }, [x0], #64 - call void @llvm.arm64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A) + call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A) %tmp = getelementptr i16* %A, i32 32 ret i16* %tmp } @@ -4128,18 +4128,18 @@ define i16* @test_v8i16_post_imm_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16 define i16* @test_v8i16_post_reg_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v8i16_post_reg_st4: ;CHECK: st4.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A) + call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A) %tmp = getelementptr i16* %A, i64 %inc ret i16* %tmp } -declare void @llvm.arm64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) +declare void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) define i16* @test_v4i16_post_imm_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { ;CHECK-LABEL: test_v4i16_post_imm_st4: ;CHECK: st4.4h { v0, v1, v2, v3 }, [x0], #32 - call void @llvm.arm64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A) + call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A) %tmp = getelementptr i16* %A, i32 16 ret i16* %tmp } @@ -4147,18 +4147,18 @@ define i16* @test_v4i16_post_imm_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16 define i16* @test_v4i16_post_reg_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v4i16_post_reg_st4: ;CHECK: st4.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A) + call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A) %tmp = getelementptr i16* %A, i64 %inc ret i16* %tmp } -declare void @llvm.arm64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>, i16*) +declare void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>, i16*) define i32* @test_v4i32_post_imm_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { ;CHECK-LABEL: test_v4i32_post_imm_st4: ;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64 - call void @llvm.arm64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A) + call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A) %tmp = getelementptr i32* %A, i32 16 ret i32* %tmp } @@ -4166,18 +4166,18 @@ define i32* @test_v4i32_post_imm_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32 define i32* @test_v4i32_post_reg_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v4i32_post_reg_st4: ;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A) + call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A) %tmp = getelementptr i32* %A, i64 %inc ret i32* %tmp } -declare void @llvm.arm64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>, i32*) +declare void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>, i32*) define i32* @test_v2i32_post_imm_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { ;CHECK-LABEL: test_v2i32_post_imm_st4: ;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32 - call void @llvm.arm64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A) + call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A) %tmp = getelementptr i32* %A, i32 8 ret i32* %tmp } @@ -4185,18 +4185,18 @@ define i32* @test_v2i32_post_imm_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32 define i32* @test_v2i32_post_reg_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v2i32_post_reg_st4: ;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A) + call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A) %tmp = getelementptr i32* %A, i64 %inc ret i32* %tmp } -declare void @llvm.arm64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) +declare void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) define i64* @test_v2i64_post_imm_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { ;CHECK-LABEL: test_v2i64_post_imm_st4: ;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64 - call void @llvm.arm64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A) + call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A) %tmp = getelementptr i64* %A, i64 8 ret i64* %tmp } @@ -4204,18 +4204,18 @@ define i64* @test_v2i64_post_imm_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64 define i64* @test_v2i64_post_reg_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v2i64_post_reg_st4: ;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A) + call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A) %tmp = getelementptr i64* %A, i64 %inc ret i64* %tmp } -declare void @llvm.arm64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>, i64*) +declare void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>, i64*) define i64* @test_v1i64_post_imm_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { ;CHECK-LABEL: test_v1i64_post_imm_st4: ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32 - call void @llvm.arm64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A) + call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A) %tmp = getelementptr i64* %A, i64 4 ret i64* %tmp } @@ -4223,18 +4223,18 @@ define i64* @test_v1i64_post_imm_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64 define i64* @test_v1i64_post_reg_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v1i64_post_reg_st4: ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A) + call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A) %tmp = getelementptr i64* %A, i64 %inc ret i64* %tmp } -declare void @llvm.arm64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>, i64*) +declare void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>, i64*) define float* @test_v4f32_post_imm_st4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { ;CHECK-LABEL: test_v4f32_post_imm_st4: ;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64 - call void @llvm.arm64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A) + call void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A) %tmp = getelementptr float* %A, i32 16 ret float* %tmp } @@ -4242,18 +4242,18 @@ define float* @test_v4f32_post_imm_st4(float* %A, float** %ptr, <4 x float> %B, define float* @test_v4f32_post_reg_st4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v4f32_post_reg_st4: ;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A) + call void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A) %tmp = getelementptr float* %A, i64 %inc ret float* %tmp } -declare void @llvm.arm64.neon.st4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*) +declare void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*) define float* @test_v2f32_post_imm_st4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { ;CHECK-LABEL: test_v2f32_post_imm_st4: ;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32 - call void @llvm.arm64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A) + call void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A) %tmp = getelementptr float* %A, i32 8 ret float* %tmp } @@ -4261,18 +4261,18 @@ define float* @test_v2f32_post_imm_st4(float* %A, float** %ptr, <2 x float> %B, define float* @test_v2f32_post_reg_st4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v2f32_post_reg_st4: ;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A) + call void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A) %tmp = getelementptr float* %A, i64 %inc ret float* %tmp } -declare void @llvm.arm64.neon.st4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*) +declare void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*) define double* @test_v2f64_post_imm_st4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { ;CHECK-LABEL: test_v2f64_post_imm_st4: ;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64 - call void @llvm.arm64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A) + call void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A) %tmp = getelementptr double* %A, i64 8 ret double* %tmp } @@ -4280,18 +4280,18 @@ define double* @test_v2f64_post_imm_st4(double* %A, double** %ptr, <2 x double> define double* @test_v2f64_post_reg_st4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v2f64_post_reg_st4: ;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A) + call void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A) %tmp = getelementptr double* %A, i64 %inc ret double* %tmp } -declare void @llvm.arm64.neon.st4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>,<2 x double>, double*) +declare void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>,<2 x double>, double*) define double* @test_v1f64_post_imm_st4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { ;CHECK-LABEL: test_v1f64_post_imm_st4: ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32 - call void @llvm.arm64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A) + call void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A) %tmp = getelementptr double* %A, i64 4 ret double* %tmp } @@ -4299,18 +4299,18 @@ define double* @test_v1f64_post_imm_st4(double* %A, double** %ptr, <1 x double> define double* @test_v1f64_post_reg_st4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v1f64_post_reg_st4: ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A) + call void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A) %tmp = getelementptr double* %A, i64 %inc ret double* %tmp } -declare void @llvm.arm64.neon.st4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*) +declare void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*) define i8* @test_v16i8_post_imm_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { ;CHECK-LABEL: test_v16i8_post_imm_st1x2: ;CHECK: st1.16b { v0, v1 }, [x0], #32 - call void @llvm.arm64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A) + call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A) %tmp = getelementptr i8* %A, i32 32 ret i8* %tmp } @@ -4318,18 +4318,18 @@ define i8* @test_v16i8_post_imm_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> define i8* @test_v16i8_post_reg_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v16i8_post_reg_st1x2: ;CHECK: st1.16b { v0, v1 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A) + call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A) %tmp = getelementptr i8* %A, i64 %inc ret i8* %tmp } -declare void @llvm.arm64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) +declare void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) define i8* @test_v8i8_post_imm_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { ;CHECK-LABEL: test_v8i8_post_imm_st1x2: ;CHECK: st1.8b { v0, v1 }, [x0], #16 - call void @llvm.arm64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A) + call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A) %tmp = getelementptr i8* %A, i32 16 ret i8* %tmp } @@ -4337,18 +4337,18 @@ define i8* @test_v8i8_post_imm_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C define i8* @test_v8i8_post_reg_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v8i8_post_reg_st1x2: ;CHECK: st1.8b { v0, v1 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A) + call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A) %tmp = getelementptr i8* %A, i64 %inc ret i8* %tmp } -declare void @llvm.arm64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) +declare void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) define i16* @test_v8i16_post_imm_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { ;CHECK-LABEL: test_v8i16_post_imm_st1x2: ;CHECK: st1.8h { v0, v1 }, [x0], #32 - call void @llvm.arm64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A) + call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A) %tmp = getelementptr i16* %A, i32 16 ret i16* %tmp } @@ -4356,18 +4356,18 @@ define i16* @test_v8i16_post_imm_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i define i16* @test_v8i16_post_reg_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v8i16_post_reg_st1x2: ;CHECK: st1.8h { v0, v1 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A) + call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A) %tmp = getelementptr i16* %A, i64 %inc ret i16* %tmp } -declare void @llvm.arm64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) +declare void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) define i16* @test_v4i16_post_imm_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { ;CHECK-LABEL: test_v4i16_post_imm_st1x2: ;CHECK: st1.4h { v0, v1 }, [x0], #16 - call void @llvm.arm64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A) + call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A) %tmp = getelementptr i16* %A, i32 8 ret i16* %tmp } @@ -4375,18 +4375,18 @@ define i16* @test_v4i16_post_imm_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i define i16* @test_v4i16_post_reg_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v4i16_post_reg_st1x2: ;CHECK: st1.4h { v0, v1 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A) + call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A) %tmp = getelementptr i16* %A, i64 %inc ret i16* %tmp } -declare void @llvm.arm64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) +declare void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) define i32* @test_v4i32_post_imm_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { ;CHECK-LABEL: test_v4i32_post_imm_st1x2: ;CHECK: st1.4s { v0, v1 }, [x0], #32 - call void @llvm.arm64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A) + call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A) %tmp = getelementptr i32* %A, i32 8 ret i32* %tmp } @@ -4394,18 +4394,18 @@ define i32* @test_v4i32_post_imm_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i define i32* @test_v4i32_post_reg_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v4i32_post_reg_st1x2: ;CHECK: st1.4s { v0, v1 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A) + call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A) %tmp = getelementptr i32* %A, i64 %inc ret i32* %tmp } -declare void @llvm.arm64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) +declare void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) define i32* @test_v2i32_post_imm_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { ;CHECK-LABEL: test_v2i32_post_imm_st1x2: ;CHECK: st1.2s { v0, v1 }, [x0], #16 - call void @llvm.arm64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A) + call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A) %tmp = getelementptr i32* %A, i32 4 ret i32* %tmp } @@ -4413,18 +4413,18 @@ define i32* @test_v2i32_post_imm_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i define i32* @test_v2i32_post_reg_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v2i32_post_reg_st1x2: ;CHECK: st1.2s { v0, v1 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A) + call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A) %tmp = getelementptr i32* %A, i64 %inc ret i32* %tmp } -declare void @llvm.arm64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) +declare void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) define i64* @test_v2i64_post_imm_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { ;CHECK-LABEL: test_v2i64_post_imm_st1x2: ;CHECK: st1.2d { v0, v1 }, [x0], #32 - call void @llvm.arm64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A) + call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A) %tmp = getelementptr i64* %A, i64 4 ret i64* %tmp } @@ -4432,18 +4432,18 @@ define i64* @test_v2i64_post_imm_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i define i64* @test_v2i64_post_reg_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v2i64_post_reg_st1x2: ;CHECK: st1.2d { v0, v1 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A) + call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A) %tmp = getelementptr i64* %A, i64 %inc ret i64* %tmp } -declare void @llvm.arm64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) +declare void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) define i64* @test_v1i64_post_imm_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { ;CHECK-LABEL: test_v1i64_post_imm_st1x2: ;CHECK: st1.1d { v0, v1 }, [x0], #16 - call void @llvm.arm64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A) + call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A) %tmp = getelementptr i64* %A, i64 2 ret i64* %tmp } @@ -4451,18 +4451,18 @@ define i64* @test_v1i64_post_imm_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i define i64* @test_v1i64_post_reg_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v1i64_post_reg_st1x2: ;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A) + call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A) %tmp = getelementptr i64* %A, i64 %inc ret i64* %tmp } -declare void @llvm.arm64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) +declare void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) define float* @test_v4f32_post_imm_st1x2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind { ;CHECK-LABEL: test_v4f32_post_imm_st1x2: ;CHECK: st1.4s { v0, v1 }, [x0], #32 - call void @llvm.arm64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A) + call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A) %tmp = getelementptr float* %A, i32 8 ret float* %tmp } @@ -4470,18 +4470,18 @@ define float* @test_v4f32_post_imm_st1x2(float* %A, float** %ptr, <4 x float> %B define float* @test_v4f32_post_reg_st1x2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v4f32_post_reg_st1x2: ;CHECK: st1.4s { v0, v1 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A) + call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A) %tmp = getelementptr float* %A, i64 %inc ret float* %tmp } -declare void @llvm.arm64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*) +declare void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*) define float* @test_v2f32_post_imm_st1x2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind { ;CHECK-LABEL: test_v2f32_post_imm_st1x2: ;CHECK: st1.2s { v0, v1 }, [x0], #16 - call void @llvm.arm64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A) + call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A) %tmp = getelementptr float* %A, i32 4 ret float* %tmp } @@ -4489,18 +4489,18 @@ define float* @test_v2f32_post_imm_st1x2(float* %A, float** %ptr, <2 x float> %B define float* @test_v2f32_post_reg_st1x2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v2f32_post_reg_st1x2: ;CHECK: st1.2s { v0, v1 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A) + call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A) %tmp = getelementptr float* %A, i64 %inc ret float* %tmp } -declare void @llvm.arm64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*) +declare void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*) define double* @test_v2f64_post_imm_st1x2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind { ;CHECK-LABEL: test_v2f64_post_imm_st1x2: ;CHECK: st1.2d { v0, v1 }, [x0], #32 - call void @llvm.arm64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A) + call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A) %tmp = getelementptr double* %A, i64 4 ret double* %tmp } @@ -4508,18 +4508,18 @@ define double* @test_v2f64_post_imm_st1x2(double* %A, double** %ptr, <2 x double define double* @test_v2f64_post_reg_st1x2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v2f64_post_reg_st1x2: ;CHECK: st1.2d { v0, v1 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A) + call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A) %tmp = getelementptr double* %A, i64 %inc ret double* %tmp } -declare void @llvm.arm64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*) +declare void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*) define double* @test_v1f64_post_imm_st1x2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind { ;CHECK-LABEL: test_v1f64_post_imm_st1x2: ;CHECK: st1.1d { v0, v1 }, [x0], #16 - call void @llvm.arm64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A) + call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A) %tmp = getelementptr double* %A, i64 2 ret double* %tmp } @@ -4527,18 +4527,18 @@ define double* @test_v1f64_post_imm_st1x2(double* %A, double** %ptr, <1 x double define double* @test_v1f64_post_reg_st1x2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v1f64_post_reg_st1x2: ;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A) + call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A) %tmp = getelementptr double* %A, i64 %inc ret double* %tmp } -declare void @llvm.arm64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*) +declare void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*) define i8* @test_v16i8_post_imm_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { ;CHECK-LABEL: test_v16i8_post_imm_st1x3: ;CHECK: st1.16b { v0, v1, v2 }, [x0], #48 - call void @llvm.arm64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A) + call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A) %tmp = getelementptr i8* %A, i32 48 ret i8* %tmp } @@ -4546,18 +4546,18 @@ define i8* @test_v16i8_post_imm_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> define i8* @test_v16i8_post_reg_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v16i8_post_reg_st1x3: ;CHECK: st1.16b { v0, v1, v2 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A) + call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A) %tmp = getelementptr i8* %A, i64 %inc ret i8* %tmp } -declare void @llvm.arm64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) +declare void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) define i8* @test_v8i8_post_imm_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { ;CHECK-LABEL: test_v8i8_post_imm_st1x3: ;CHECK: st1.8b { v0, v1, v2 }, [x0], #24 - call void @llvm.arm64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A) + call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A) %tmp = getelementptr i8* %A, i32 24 ret i8* %tmp } @@ -4565,18 +4565,18 @@ define i8* @test_v8i8_post_imm_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C define i8* @test_v8i8_post_reg_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v8i8_post_reg_st1x3: ;CHECK: st1.8b { v0, v1, v2 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A) + call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A) %tmp = getelementptr i8* %A, i64 %inc ret i8* %tmp } -declare void @llvm.arm64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) +declare void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) define i16* @test_v8i16_post_imm_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { ;CHECK-LABEL: test_v8i16_post_imm_st1x3: ;CHECK: st1.8h { v0, v1, v2 }, [x0], #48 - call void @llvm.arm64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A) + call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A) %tmp = getelementptr i16* %A, i32 24 ret i16* %tmp } @@ -4584,18 +4584,18 @@ define i16* @test_v8i16_post_imm_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i define i16* @test_v8i16_post_reg_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v8i16_post_reg_st1x3: ;CHECK: st1.8h { v0, v1, v2 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A) + call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A) %tmp = getelementptr i16* %A, i64 %inc ret i16* %tmp } -declare void @llvm.arm64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) +declare void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) define i16* @test_v4i16_post_imm_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { ;CHECK-LABEL: test_v4i16_post_imm_st1x3: ;CHECK: st1.4h { v0, v1, v2 }, [x0], #24 - call void @llvm.arm64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A) + call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A) %tmp = getelementptr i16* %A, i32 12 ret i16* %tmp } @@ -4603,18 +4603,18 @@ define i16* @test_v4i16_post_imm_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i define i16* @test_v4i16_post_reg_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v4i16_post_reg_st1x3: ;CHECK: st1.4h { v0, v1, v2 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A) + call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A) %tmp = getelementptr i16* %A, i64 %inc ret i16* %tmp } -declare void @llvm.arm64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) +declare void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) define i32* @test_v4i32_post_imm_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { ;CHECK-LABEL: test_v4i32_post_imm_st1x3: ;CHECK: st1.4s { v0, v1, v2 }, [x0], #48 - call void @llvm.arm64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A) + call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A) %tmp = getelementptr i32* %A, i32 12 ret i32* %tmp } @@ -4622,18 +4622,18 @@ define i32* @test_v4i32_post_imm_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i define i32* @test_v4i32_post_reg_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v4i32_post_reg_st1x3: ;CHECK: st1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A) + call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A) %tmp = getelementptr i32* %A, i64 %inc ret i32* %tmp } -declare void @llvm.arm64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) +declare void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) define i32* @test_v2i32_post_imm_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { ;CHECK-LABEL: test_v2i32_post_imm_st1x3: ;CHECK: st1.2s { v0, v1, v2 }, [x0], #24 - call void @llvm.arm64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A) + call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A) %tmp = getelementptr i32* %A, i32 6 ret i32* %tmp } @@ -4641,18 +4641,18 @@ define i32* @test_v2i32_post_imm_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i define i32* @test_v2i32_post_reg_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v2i32_post_reg_st1x3: ;CHECK: st1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A) + call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A) %tmp = getelementptr i32* %A, i64 %inc ret i32* %tmp } -declare void @llvm.arm64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) +declare void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) define i64* @test_v2i64_post_imm_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { ;CHECK-LABEL: test_v2i64_post_imm_st1x3: ;CHECK: st1.2d { v0, v1, v2 }, [x0], #48 - call void @llvm.arm64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A) + call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A) %tmp = getelementptr i64* %A, i64 6 ret i64* %tmp } @@ -4660,18 +4660,18 @@ define i64* @test_v2i64_post_imm_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i define i64* @test_v2i64_post_reg_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v2i64_post_reg_st1x3: ;CHECK: st1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A) + call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A) %tmp = getelementptr i64* %A, i64 %inc ret i64* %tmp } -declare void @llvm.arm64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) +declare void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) define i64* @test_v1i64_post_imm_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { ;CHECK-LABEL: test_v1i64_post_imm_st1x3: ;CHECK: st1.1d { v0, v1, v2 }, [x0], #24 - call void @llvm.arm64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A) + call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A) %tmp = getelementptr i64* %A, i64 3 ret i64* %tmp } @@ -4679,18 +4679,18 @@ define i64* @test_v1i64_post_imm_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i define i64* @test_v1i64_post_reg_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v1i64_post_reg_st1x3: ;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A) + call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A) %tmp = getelementptr i64* %A, i64 %inc ret i64* %tmp } -declare void @llvm.arm64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) +declare void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) define float* @test_v4f32_post_imm_st1x3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { ;CHECK-LABEL: test_v4f32_post_imm_st1x3: ;CHECK: st1.4s { v0, v1, v2 }, [x0], #48 - call void @llvm.arm64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A) + call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A) %tmp = getelementptr float* %A, i32 12 ret float* %tmp } @@ -4698,18 +4698,18 @@ define float* @test_v4f32_post_imm_st1x3(float* %A, float** %ptr, <4 x float> %B define float* @test_v4f32_post_reg_st1x3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v4f32_post_reg_st1x3: ;CHECK: st1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A) + call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A) %tmp = getelementptr float* %A, i64 %inc ret float* %tmp } -declare void @llvm.arm64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*) +declare void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*) define float* @test_v2f32_post_imm_st1x3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { ;CHECK-LABEL: test_v2f32_post_imm_st1x3: ;CHECK: st1.2s { v0, v1, v2 }, [x0], #24 - call void @llvm.arm64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A) + call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A) %tmp = getelementptr float* %A, i32 6 ret float* %tmp } @@ -4717,18 +4717,18 @@ define float* @test_v2f32_post_imm_st1x3(float* %A, float** %ptr, <2 x float> %B define float* @test_v2f32_post_reg_st1x3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v2f32_post_reg_st1x3: ;CHECK: st1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A) + call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A) %tmp = getelementptr float* %A, i64 %inc ret float* %tmp } -declare void @llvm.arm64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*) +declare void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*) define double* @test_v2f64_post_imm_st1x3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { ;CHECK-LABEL: test_v2f64_post_imm_st1x3: ;CHECK: st1.2d { v0, v1, v2 }, [x0], #48 - call void @llvm.arm64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A) + call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A) %tmp = getelementptr double* %A, i64 6 ret double* %tmp } @@ -4736,18 +4736,18 @@ define double* @test_v2f64_post_imm_st1x3(double* %A, double** %ptr, <2 x double define double* @test_v2f64_post_reg_st1x3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v2f64_post_reg_st1x3: ;CHECK: st1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A) + call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A) %tmp = getelementptr double* %A, i64 %inc ret double* %tmp } -declare void @llvm.arm64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*) +declare void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*) define double* @test_v1f64_post_imm_st1x3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { ;CHECK-LABEL: test_v1f64_post_imm_st1x3: ;CHECK: st1.1d { v0, v1, v2 }, [x0], #24 - call void @llvm.arm64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A) + call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A) %tmp = getelementptr double* %A, i64 3 ret double* %tmp } @@ -4755,18 +4755,18 @@ define double* @test_v1f64_post_imm_st1x3(double* %A, double** %ptr, <1 x double define double* @test_v1f64_post_reg_st1x3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v1f64_post_reg_st1x3: ;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A) + call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A) %tmp = getelementptr double* %A, i64 %inc ret double* %tmp } -declare void @llvm.arm64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*) +declare void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*) define i8* @test_v16i8_post_imm_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { ;CHECK-LABEL: test_v16i8_post_imm_st1x4: ;CHECK: st1.16b { v0, v1, v2, v3 }, [x0], #64 - call void @llvm.arm64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A) + call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A) %tmp = getelementptr i8* %A, i32 64 ret i8* %tmp } @@ -4774,18 +4774,18 @@ define i8* @test_v16i8_post_imm_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> define i8* @test_v16i8_post_reg_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v16i8_post_reg_st1x4: ;CHECK: st1.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A) + call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A) %tmp = getelementptr i8* %A, i64 %inc ret i8* %tmp } -declare void @llvm.arm64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) +declare void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) define i8* @test_v8i8_post_imm_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { ;CHECK-LABEL: test_v8i8_post_imm_st1x4: ;CHECK: st1.8b { v0, v1, v2, v3 }, [x0], #32 - call void @llvm.arm64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A) + call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A) %tmp = getelementptr i8* %A, i32 32 ret i8* %tmp } @@ -4793,18 +4793,18 @@ define i8* @test_v8i8_post_imm_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C define i8* @test_v8i8_post_reg_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v8i8_post_reg_st1x4: ;CHECK: st1.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A) + call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A) %tmp = getelementptr i8* %A, i64 %inc ret i8* %tmp } -declare void @llvm.arm64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) +declare void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) define i16* @test_v8i16_post_imm_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { ;CHECK-LABEL: test_v8i16_post_imm_st1x4: ;CHECK: st1.8h { v0, v1, v2, v3 }, [x0], #64 - call void @llvm.arm64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A) + call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A) %tmp = getelementptr i16* %A, i32 32 ret i16* %tmp } @@ -4812,18 +4812,18 @@ define i16* @test_v8i16_post_imm_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i define i16* @test_v8i16_post_reg_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v8i16_post_reg_st1x4: ;CHECK: st1.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A) + call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A) %tmp = getelementptr i16* %A, i64 %inc ret i16* %tmp } -declare void @llvm.arm64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) +declare void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) define i16* @test_v4i16_post_imm_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { ;CHECK-LABEL: test_v4i16_post_imm_st1x4: ;CHECK: st1.4h { v0, v1, v2, v3 }, [x0], #32 - call void @llvm.arm64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A) + call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A) %tmp = getelementptr i16* %A, i32 16 ret i16* %tmp } @@ -4831,18 +4831,18 @@ define i16* @test_v4i16_post_imm_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i define i16* @test_v4i16_post_reg_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v4i16_post_reg_st1x4: ;CHECK: st1.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A) + call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A) %tmp = getelementptr i16* %A, i64 %inc ret i16* %tmp } -declare void @llvm.arm64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>, i16*) +declare void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>, i16*) define i32* @test_v4i32_post_imm_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { ;CHECK-LABEL: test_v4i32_post_imm_st1x4: ;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], #64 - call void @llvm.arm64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A) + call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A) %tmp = getelementptr i32* %A, i32 16 ret i32* %tmp } @@ -4850,18 +4850,18 @@ define i32* @test_v4i32_post_imm_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i define i32* @test_v4i32_post_reg_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v4i32_post_reg_st1x4: ;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A) + call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A) %tmp = getelementptr i32* %A, i64 %inc ret i32* %tmp } -declare void @llvm.arm64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>, i32*) +declare void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>, i32*) define i32* @test_v2i32_post_imm_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { ;CHECK-LABEL: test_v2i32_post_imm_st1x4: ;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], #32 - call void @llvm.arm64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A) + call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A) %tmp = getelementptr i32* %A, i32 8 ret i32* %tmp } @@ -4869,18 +4869,18 @@ define i32* @test_v2i32_post_imm_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i define i32* @test_v2i32_post_reg_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v2i32_post_reg_st1x4: ;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A) + call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A) %tmp = getelementptr i32* %A, i64 %inc ret i32* %tmp } -declare void @llvm.arm64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) +declare void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) define i64* @test_v2i64_post_imm_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { ;CHECK-LABEL: test_v2i64_post_imm_st1x4: ;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], #64 - call void @llvm.arm64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A) + call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A) %tmp = getelementptr i64* %A, i64 8 ret i64* %tmp } @@ -4888,18 +4888,18 @@ define i64* @test_v2i64_post_imm_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i define i64* @test_v2i64_post_reg_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v2i64_post_reg_st1x4: ;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A) + call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A) %tmp = getelementptr i64* %A, i64 %inc ret i64* %tmp } -declare void @llvm.arm64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>, i64*) +declare void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>, i64*) define i64* @test_v1i64_post_imm_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { ;CHECK-LABEL: test_v1i64_post_imm_st1x4: ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32 - call void @llvm.arm64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A) + call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A) %tmp = getelementptr i64* %A, i64 4 ret i64* %tmp } @@ -4907,18 +4907,18 @@ define i64* @test_v1i64_post_imm_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i define i64* @test_v1i64_post_reg_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v1i64_post_reg_st1x4: ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A) + call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A) %tmp = getelementptr i64* %A, i64 %inc ret i64* %tmp } -declare void @llvm.arm64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>, i64*) +declare void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>, i64*) define float* @test_v4f32_post_imm_st1x4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { ;CHECK-LABEL: test_v4f32_post_imm_st1x4: ;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], #64 - call void @llvm.arm64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A) + call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A) %tmp = getelementptr float* %A, i32 16 ret float* %tmp } @@ -4926,18 +4926,18 @@ define float* @test_v4f32_post_imm_st1x4(float* %A, float** %ptr, <4 x float> %B define float* @test_v4f32_post_reg_st1x4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v4f32_post_reg_st1x4: ;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A) + call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A) %tmp = getelementptr float* %A, i64 %inc ret float* %tmp } -declare void @llvm.arm64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*) +declare void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*) define float* @test_v2f32_post_imm_st1x4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { ;CHECK-LABEL: test_v2f32_post_imm_st1x4: ;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], #32 - call void @llvm.arm64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A) + call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A) %tmp = getelementptr float* %A, i32 8 ret float* %tmp } @@ -4945,18 +4945,18 @@ define float* @test_v2f32_post_imm_st1x4(float* %A, float** %ptr, <2 x float> %B define float* @test_v2f32_post_reg_st1x4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v2f32_post_reg_st1x4: ;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A) + call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A) %tmp = getelementptr float* %A, i64 %inc ret float* %tmp } -declare void @llvm.arm64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*) +declare void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*) define double* @test_v2f64_post_imm_st1x4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { ;CHECK-LABEL: test_v2f64_post_imm_st1x4: ;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], #64 - call void @llvm.arm64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A) + call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A) %tmp = getelementptr double* %A, i64 8 ret double* %tmp } @@ -4964,18 +4964,18 @@ define double* @test_v2f64_post_imm_st1x4(double* %A, double** %ptr, <2 x double define double* @test_v2f64_post_reg_st1x4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v2f64_post_reg_st1x4: ;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A) + call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A) %tmp = getelementptr double* %A, i64 %inc ret double* %tmp } -declare void @llvm.arm64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>,<2 x double>, double*) +declare void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>,<2 x double>, double*) define double* @test_v1f64_post_imm_st1x4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { ;CHECK-LABEL: test_v1f64_post_imm_st1x4: ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32 - call void @llvm.arm64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A) + call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A) %tmp = getelementptr double* %A, i64 4 ret double* %tmp } @@ -4983,33 +4983,33 @@ define double* @test_v1f64_post_imm_st1x4(double* %A, double** %ptr, <1 x double define double* @test_v1f64_post_reg_st1x4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v1f64_post_reg_st1x4: ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A) + call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A) %tmp = getelementptr double* %A, i64 %inc ret double* %tmp } -declare void @llvm.arm64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*) +declare void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*) define i8* @test_v16i8_post_imm_st2lanelane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) { - call void @llvm.arm64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A) + call void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A) %tmp = getelementptr i8* %A, i32 2 ret i8* %tmp } define i8* @test_v16i8_post_reg_st2lanelane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) { - call void @llvm.arm64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A) + call void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A) %tmp = getelementptr i8* %A, i64 %inc ret i8* %tmp } -declare void @llvm.arm64.neon.st2lanelane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i64, i8*) nounwind readnone +declare void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i64, i8*) nounwind readnone define i8* @test_v16i8_post_imm_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { ;CHECK-LABEL: test_v16i8_post_imm_st2lane: ;CHECK: st2.b { v0, v1 }[0], [x0], #2 - call void @llvm.arm64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A) + call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A) %tmp = getelementptr i8* %A, i32 2 ret i8* %tmp } @@ -5017,18 +5017,18 @@ define i8* @test_v16i8_post_imm_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i define i8* @test_v16i8_post_reg_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v16i8_post_reg_st2lane: ;CHECK: st2.b { v0, v1 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A) + call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A) %tmp = getelementptr i8* %A, i64 %inc ret i8* %tmp } -declare void @llvm.arm64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) +declare void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) define i8* @test_v8i8_post_imm_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { ;CHECK-LABEL: test_v8i8_post_imm_st2lane: ;CHECK: st2.b { v0, v1 }[0], [x0], #2 - call void @llvm.arm64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A) + call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A) %tmp = getelementptr i8* %A, i32 2 ret i8* %tmp } @@ -5036,18 +5036,18 @@ define i8* @test_v8i8_post_imm_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> define i8* @test_v8i8_post_reg_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v8i8_post_reg_st2lane: ;CHECK: st2.b { v0, v1 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A) + call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A) %tmp = getelementptr i8* %A, i64 %inc ret i8* %tmp } -declare void @llvm.arm64.neon.st2lane.v8i8.p0i8(<8 x i8>, <8 x i8>, i64, i8*) +declare void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8>, <8 x i8>, i64, i8*) define i16* @test_v8i16_post_imm_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { ;CHECK-LABEL: test_v8i16_post_imm_st2lane: ;CHECK: st2.h { v0, v1 }[0], [x0], #4 - call void @llvm.arm64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A) + call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A) %tmp = getelementptr i16* %A, i32 2 ret i16* %tmp } @@ -5055,18 +5055,18 @@ define i16* @test_v8i16_post_imm_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x define i16* @test_v8i16_post_reg_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v8i16_post_reg_st2lane: ;CHECK: st2.h { v0, v1 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A) + call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A) %tmp = getelementptr i16* %A, i64 %inc ret i16* %tmp } -declare void @llvm.arm64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) +declare void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) define i16* @test_v4i16_post_imm_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { ;CHECK-LABEL: test_v4i16_post_imm_st2lane: ;CHECK: st2.h { v0, v1 }[0], [x0], #4 - call void @llvm.arm64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A) + call void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A) %tmp = getelementptr i16* %A, i32 2 ret i16* %tmp } @@ -5074,18 +5074,18 @@ define i16* @test_v4i16_post_imm_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x define i16* @test_v4i16_post_reg_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v4i16_post_reg_st2lane: ;CHECK: st2.h { v0, v1 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A) + call void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A) %tmp = getelementptr i16* %A, i64 %inc ret i16* %tmp } -declare void @llvm.arm64.neon.st2lane.v4i16.p0i16(<4 x i16>, <4 x i16>, i64, i16*) +declare void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16>, <4 x i16>, i64, i16*) define i32* @test_v4i32_post_imm_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { ;CHECK-LABEL: test_v4i32_post_imm_st2lane: ;CHECK: st2.s { v0, v1 }[0], [x0], #8 - call void @llvm.arm64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A) + call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A) %tmp = getelementptr i32* %A, i32 2 ret i32* %tmp } @@ -5093,18 +5093,18 @@ define i32* @test_v4i32_post_imm_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x define i32* @test_v4i32_post_reg_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v4i32_post_reg_st2lane: ;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A) + call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A) %tmp = getelementptr i32* %A, i64 %inc ret i32* %tmp } -declare void @llvm.arm64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) +declare void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) define i32* @test_v2i32_post_imm_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { ;CHECK-LABEL: test_v2i32_post_imm_st2lane: ;CHECK: st2.s { v0, v1 }[0], [x0], #8 - call void @llvm.arm64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A) + call void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A) %tmp = getelementptr i32* %A, i32 2 ret i32* %tmp } @@ -5112,18 +5112,18 @@ define i32* @test_v2i32_post_imm_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x define i32* @test_v2i32_post_reg_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v2i32_post_reg_st2lane: ;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A) + call void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A) %tmp = getelementptr i32* %A, i64 %inc ret i32* %tmp } -declare void @llvm.arm64.neon.st2lane.v2i32.p0i32(<2 x i32>, <2 x i32>, i64, i32*) +declare void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32>, <2 x i32>, i64, i32*) define i64* @test_v2i64_post_imm_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { ;CHECK-LABEL: test_v2i64_post_imm_st2lane: ;CHECK: st2.d { v0, v1 }[0], [x0], #16 - call void @llvm.arm64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A) + call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A) %tmp = getelementptr i64* %A, i64 2 ret i64* %tmp } @@ -5131,18 +5131,18 @@ define i64* @test_v2i64_post_imm_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x define i64* @test_v2i64_post_reg_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v2i64_post_reg_st2lane: ;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A) + call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A) %tmp = getelementptr i64* %A, i64 %inc ret i64* %tmp } -declare void @llvm.arm64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) +declare void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) define i64* @test_v1i64_post_imm_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { ;CHECK-LABEL: test_v1i64_post_imm_st2lane: ;CHECK: st2.d { v0, v1 }[0], [x0], #16 - call void @llvm.arm64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A) + call void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A) %tmp = getelementptr i64* %A, i64 2 ret i64* %tmp } @@ -5150,18 +5150,18 @@ define i64* @test_v1i64_post_imm_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x define i64* @test_v1i64_post_reg_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v1i64_post_reg_st2lane: ;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A) + call void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A) %tmp = getelementptr i64* %A, i64 %inc ret i64* %tmp } -declare void @llvm.arm64.neon.st2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*) +declare void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*) define float* @test_v4f32_post_imm_st2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind { ;CHECK-LABEL: test_v4f32_post_imm_st2lane: ;CHECK: st2.s { v0, v1 }[0], [x0], #8 - call void @llvm.arm64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A) + call void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A) %tmp = getelementptr float* %A, i32 2 ret float* %tmp } @@ -5169,18 +5169,18 @@ define float* @test_v4f32_post_imm_st2lane(float* %A, float** %ptr, <4 x float> define float* @test_v4f32_post_reg_st2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v4f32_post_reg_st2lane: ;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A) + call void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A) %tmp = getelementptr float* %A, i64 %inc ret float* %tmp } -declare void @llvm.arm64.neon.st2lane.v4f32.p0f32(<4 x float>, <4 x float>, i64, float*) +declare void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float>, <4 x float>, i64, float*) define float* @test_v2f32_post_imm_st2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind { ;CHECK-LABEL: test_v2f32_post_imm_st2lane: ;CHECK: st2.s { v0, v1 }[0], [x0], #8 - call void @llvm.arm64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A) + call void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A) %tmp = getelementptr float* %A, i32 2 ret float* %tmp } @@ -5188,18 +5188,18 @@ define float* @test_v2f32_post_imm_st2lane(float* %A, float** %ptr, <2 x float> define float* @test_v2f32_post_reg_st2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v2f32_post_reg_st2lane: ;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A) + call void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A) %tmp = getelementptr float* %A, i64 %inc ret float* %tmp } -declare void @llvm.arm64.neon.st2lane.v2f32.p0f32(<2 x float>, <2 x float>, i64, float*) +declare void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float>, <2 x float>, i64, float*) define double* @test_v2f64_post_imm_st2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind { ;CHECK-LABEL: test_v2f64_post_imm_st2lane: ;CHECK: st2.d { v0, v1 }[0], [x0], #16 - call void @llvm.arm64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A) + call void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A) %tmp = getelementptr double* %A, i64 2 ret double* %tmp } @@ -5207,18 +5207,18 @@ define double* @test_v2f64_post_imm_st2lane(double* %A, double** %ptr, <2 x doub define double* @test_v2f64_post_reg_st2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v2f64_post_reg_st2lane: ;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A) + call void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A) %tmp = getelementptr double* %A, i64 %inc ret double* %tmp } -declare void @llvm.arm64.neon.st2lane.v2f64.p0f64(<2 x double>, <2 x double>, i64, double*) +declare void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double>, <2 x double>, i64, double*) define double* @test_v1f64_post_imm_st2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind { ;CHECK-LABEL: test_v1f64_post_imm_st2lane: ;CHECK: st2.d { v0, v1 }[0], [x0], #16 - call void @llvm.arm64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A) + call void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A) %tmp = getelementptr double* %A, i64 2 ret double* %tmp } @@ -5226,18 +5226,18 @@ define double* @test_v1f64_post_imm_st2lane(double* %A, double** %ptr, <1 x doub define double* @test_v1f64_post_reg_st2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind { ;CHECK-LABEL: test_v1f64_post_reg_st2lane: ;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A) + call void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A) %tmp = getelementptr double* %A, i64 %inc ret double* %tmp } -declare void @llvm.arm64.neon.st2lane.v1f64.p0f64(<1 x double>, <1 x double>, i64, double*) +declare void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double>, <1 x double>, i64, double*) define i8* @test_v16i8_post_imm_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { ;CHECK-LABEL: test_v16i8_post_imm_st3lane: ;CHECK: st3.b { v0, v1, v2 }[0], [x0], #3 - call void @llvm.arm64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A) + call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A) %tmp = getelementptr i8* %A, i32 3 ret i8* %tmp } @@ -5245,18 +5245,18 @@ define i8* @test_v16i8_post_imm_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i define i8* @test_v16i8_post_reg_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v16i8_post_reg_st3lane: ;CHECK: st3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A) + call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A) %tmp = getelementptr i8* %A, i64 %inc ret i8* %tmp } -declare void @llvm.arm64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) +declare void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) define i8* @test_v8i8_post_imm_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { ;CHECK-LABEL: test_v8i8_post_imm_st3lane: ;CHECK: st3.b { v0, v1, v2 }[0], [x0], #3 - call void @llvm.arm64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A) + call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A) %tmp = getelementptr i8* %A, i32 3 ret i8* %tmp } @@ -5264,18 +5264,18 @@ define i8* @test_v8i8_post_imm_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> define i8* @test_v8i8_post_reg_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v8i8_post_reg_st3lane: ;CHECK: st3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A) + call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A) %tmp = getelementptr i8* %A, i64 %inc ret i8* %tmp } -declare void @llvm.arm64.neon.st3lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) +declare void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) define i16* @test_v8i16_post_imm_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { ;CHECK-LABEL: test_v8i16_post_imm_st3lane: ;CHECK: st3.h { v0, v1, v2 }[0], [x0], #6 - call void @llvm.arm64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A) + call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A) %tmp = getelementptr i16* %A, i32 3 ret i16* %tmp } @@ -5283,18 +5283,18 @@ define i16* @test_v8i16_post_imm_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x define i16* @test_v8i16_post_reg_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v8i16_post_reg_st3lane: ;CHECK: st3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A) + call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A) %tmp = getelementptr i16* %A, i64 %inc ret i16* %tmp } -declare void @llvm.arm64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) +declare void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) define i16* @test_v4i16_post_imm_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { ;CHECK-LABEL: test_v4i16_post_imm_st3lane: ;CHECK: st3.h { v0, v1, v2 }[0], [x0], #6 - call void @llvm.arm64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A) + call void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A) %tmp = getelementptr i16* %A, i32 3 ret i16* %tmp } @@ -5302,18 +5302,18 @@ define i16* @test_v4i16_post_imm_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x define i16* @test_v4i16_post_reg_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v4i16_post_reg_st3lane: ;CHECK: st3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A) + call void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A) %tmp = getelementptr i16* %A, i64 %inc ret i16* %tmp } -declare void @llvm.arm64.neon.st3lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) +declare void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) define i32* @test_v4i32_post_imm_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { ;CHECK-LABEL: test_v4i32_post_imm_st3lane: ;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12 - call void @llvm.arm64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A) + call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A) %tmp = getelementptr i32* %A, i32 3 ret i32* %tmp } @@ -5321,18 +5321,18 @@ define i32* @test_v4i32_post_imm_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x define i32* @test_v4i32_post_reg_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v4i32_post_reg_st3lane: ;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A) + call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A) %tmp = getelementptr i32* %A, i64 %inc ret i32* %tmp } -declare void @llvm.arm64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) +declare void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) define i32* @test_v2i32_post_imm_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { ;CHECK-LABEL: test_v2i32_post_imm_st3lane: ;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12 - call void @llvm.arm64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A) + call void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A) %tmp = getelementptr i32* %A, i32 3 ret i32* %tmp } @@ -5340,18 +5340,18 @@ define i32* @test_v2i32_post_imm_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x define i32* @test_v2i32_post_reg_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v2i32_post_reg_st3lane: ;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A) + call void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A) %tmp = getelementptr i32* %A, i64 %inc ret i32* %tmp } -declare void @llvm.arm64.neon.st3lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) +declare void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) define i64* @test_v2i64_post_imm_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { ;CHECK-LABEL: test_v2i64_post_imm_st3lane: ;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24 - call void @llvm.arm64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A) + call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A) %tmp = getelementptr i64* %A, i64 3 ret i64* %tmp } @@ -5359,18 +5359,18 @@ define i64* @test_v2i64_post_imm_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x define i64* @test_v2i64_post_reg_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v2i64_post_reg_st3lane: ;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A) + call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A) %tmp = getelementptr i64* %A, i64 %inc ret i64* %tmp } -declare void @llvm.arm64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) +declare void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) define i64* @test_v1i64_post_imm_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { ;CHECK-LABEL: test_v1i64_post_imm_st3lane: ;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24 - call void @llvm.arm64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A) + call void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A) %tmp = getelementptr i64* %A, i64 3 ret i64* %tmp } @@ -5378,18 +5378,18 @@ define i64* @test_v1i64_post_imm_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x define i64* @test_v1i64_post_reg_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v1i64_post_reg_st3lane: ;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A) + call void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A) %tmp = getelementptr i64* %A, i64 %inc ret i64* %tmp } -declare void @llvm.arm64.neon.st3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) +declare void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) define float* @test_v4f32_post_imm_st3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { ;CHECK-LABEL: test_v4f32_post_imm_st3lane: ;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12 - call void @llvm.arm64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A) + call void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A) %tmp = getelementptr float* %A, i32 3 ret float* %tmp } @@ -5397,18 +5397,18 @@ define float* @test_v4f32_post_imm_st3lane(float* %A, float** %ptr, <4 x float> define float* @test_v4f32_post_reg_st3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v4f32_post_reg_st3lane: ;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A) + call void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A) %tmp = getelementptr float* %A, i64 %inc ret float* %tmp } -declare void @llvm.arm64.neon.st3lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, i64, float*) +declare void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, i64, float*) define float* @test_v2f32_post_imm_st3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { ;CHECK-LABEL: test_v2f32_post_imm_st3lane: ;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12 - call void @llvm.arm64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A) + call void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A) %tmp = getelementptr float* %A, i32 3 ret float* %tmp } @@ -5416,18 +5416,18 @@ define float* @test_v2f32_post_imm_st3lane(float* %A, float** %ptr, <2 x float> define float* @test_v2f32_post_reg_st3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v2f32_post_reg_st3lane: ;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A) + call void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A) %tmp = getelementptr float* %A, i64 %inc ret float* %tmp } -declare void @llvm.arm64.neon.st3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, i64, float*) +declare void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, i64, float*) define double* @test_v2f64_post_imm_st3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { ;CHECK-LABEL: test_v2f64_post_imm_st3lane: ;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24 - call void @llvm.arm64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A) + call void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A) %tmp = getelementptr double* %A, i64 3 ret double* %tmp } @@ -5435,18 +5435,18 @@ define double* @test_v2f64_post_imm_st3lane(double* %A, double** %ptr, <2 x doub define double* @test_v2f64_post_reg_st3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v2f64_post_reg_st3lane: ;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A) + call void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A) %tmp = getelementptr double* %A, i64 %inc ret double* %tmp } -declare void @llvm.arm64.neon.st3lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, i64, double*) +declare void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, i64, double*) define double* @test_v1f64_post_imm_st3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { ;CHECK-LABEL: test_v1f64_post_imm_st3lane: ;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24 - call void @llvm.arm64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A) + call void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A) %tmp = getelementptr double* %A, i64 3 ret double* %tmp } @@ -5454,18 +5454,18 @@ define double* @test_v1f64_post_imm_st3lane(double* %A, double** %ptr, <1 x doub define double* @test_v1f64_post_reg_st3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { ;CHECK-LABEL: test_v1f64_post_reg_st3lane: ;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A) + call void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A) %tmp = getelementptr double* %A, i64 %inc ret double* %tmp } -declare void @llvm.arm64.neon.st3lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, i64, double*) +declare void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, i64, double*) define i8* @test_v16i8_post_imm_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { ;CHECK-LABEL: test_v16i8_post_imm_st4lane: ;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], #4 - call void @llvm.arm64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A) + call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A) %tmp = getelementptr i8* %A, i32 4 ret i8* %tmp } @@ -5473,18 +5473,18 @@ define i8* @test_v16i8_post_imm_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i define i8* @test_v16i8_post_reg_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v16i8_post_reg_st4lane: ;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A) + call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A) %tmp = getelementptr i8* %A, i64 %inc ret i8* %tmp } -declare void @llvm.arm64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) +declare void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) define i8* @test_v8i8_post_imm_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { ;CHECK-LABEL: test_v8i8_post_imm_st4lane: ;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], #4 - call void @llvm.arm64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A) + call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A) %tmp = getelementptr i8* %A, i32 4 ret i8* %tmp } @@ -5492,18 +5492,18 @@ define i8* @test_v8i8_post_imm_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> define i8* @test_v8i8_post_reg_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v8i8_post_reg_st4lane: ;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A) + call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A) %tmp = getelementptr i8* %A, i64 %inc ret i8* %tmp } -declare void @llvm.arm64.neon.st4lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) +declare void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) define i16* @test_v8i16_post_imm_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { ;CHECK-LABEL: test_v8i16_post_imm_st4lane: ;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], #8 - call void @llvm.arm64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A) + call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A) %tmp = getelementptr i16* %A, i32 4 ret i16* %tmp } @@ -5511,18 +5511,18 @@ define i16* @test_v8i16_post_imm_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x define i16* @test_v8i16_post_reg_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v8i16_post_reg_st4lane: ;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A) + call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A) %tmp = getelementptr i16* %A, i64 %inc ret i16* %tmp } -declare void @llvm.arm64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) +declare void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) define i16* @test_v4i16_post_imm_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { ;CHECK-LABEL: test_v4i16_post_imm_st4lane: ;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], #8 - call void @llvm.arm64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A) + call void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A) %tmp = getelementptr i16* %A, i32 4 ret i16* %tmp } @@ -5530,18 +5530,18 @@ define i16* @test_v4i16_post_imm_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x define i16* @test_v4i16_post_reg_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v4i16_post_reg_st4lane: ;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A) + call void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A) %tmp = getelementptr i16* %A, i64 %inc ret i16* %tmp } -declare void @llvm.arm64.neon.st4lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) +declare void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) define i32* @test_v4i32_post_imm_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { ;CHECK-LABEL: test_v4i32_post_imm_st4lane: ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16 - call void @llvm.arm64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A) + call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A) %tmp = getelementptr i32* %A, i32 4 ret i32* %tmp } @@ -5549,18 +5549,18 @@ define i32* @test_v4i32_post_imm_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x define i32* @test_v4i32_post_reg_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v4i32_post_reg_st4lane: ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A) + call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A) %tmp = getelementptr i32* %A, i64 %inc ret i32* %tmp } -declare void @llvm.arm64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) +declare void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) define i32* @test_v2i32_post_imm_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { ;CHECK-LABEL: test_v2i32_post_imm_st4lane: ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16 - call void @llvm.arm64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A) + call void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A) %tmp = getelementptr i32* %A, i32 4 ret i32* %tmp } @@ -5568,18 +5568,18 @@ define i32* @test_v2i32_post_imm_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x define i32* @test_v2i32_post_reg_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v2i32_post_reg_st4lane: ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A) + call void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A) %tmp = getelementptr i32* %A, i64 %inc ret i32* %tmp } -declare void @llvm.arm64.neon.st4lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) +declare void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) define i64* @test_v2i64_post_imm_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { ;CHECK-LABEL: test_v2i64_post_imm_st4lane: ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32 - call void @llvm.arm64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A) + call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A) %tmp = getelementptr i64* %A, i64 4 ret i64* %tmp } @@ -5587,18 +5587,18 @@ define i64* @test_v2i64_post_imm_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x define i64* @test_v2i64_post_reg_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v2i64_post_reg_st4lane: ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A) + call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A) %tmp = getelementptr i64* %A, i64 %inc ret i64* %tmp } -declare void @llvm.arm64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) +declare void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) define i64* @test_v1i64_post_imm_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { ;CHECK-LABEL: test_v1i64_post_imm_st4lane: ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32 - call void @llvm.arm64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A) + call void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A) %tmp = getelementptr i64* %A, i64 4 ret i64* %tmp } @@ -5606,18 +5606,18 @@ define i64* @test_v1i64_post_imm_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x define i64* @test_v1i64_post_reg_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v1i64_post_reg_st4lane: ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A) + call void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A) %tmp = getelementptr i64* %A, i64 %inc ret i64* %tmp } -declare void @llvm.arm64.neon.st4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) +declare void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) define float* @test_v4f32_post_imm_st4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { ;CHECK-LABEL: test_v4f32_post_imm_st4lane: ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16 - call void @llvm.arm64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A) + call void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A) %tmp = getelementptr float* %A, i32 4 ret float* %tmp } @@ -5625,18 +5625,18 @@ define float* @test_v4f32_post_imm_st4lane(float* %A, float** %ptr, <4 x float> define float* @test_v4f32_post_reg_st4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v4f32_post_reg_st4lane: ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A) + call void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A) %tmp = getelementptr float* %A, i64 %inc ret float* %tmp } -declare void @llvm.arm64.neon.st4lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, float*) +declare void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, float*) define float* @test_v2f32_post_imm_st4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { ;CHECK-LABEL: test_v2f32_post_imm_st4lane: ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16 - call void @llvm.arm64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A) + call void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A) %tmp = getelementptr float* %A, i32 4 ret float* %tmp } @@ -5644,18 +5644,18 @@ define float* @test_v2f32_post_imm_st4lane(float* %A, float** %ptr, <2 x float> define float* @test_v2f32_post_reg_st4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v2f32_post_reg_st4lane: ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A) + call void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A) %tmp = getelementptr float* %A, i64 %inc ret float* %tmp } -declare void @llvm.arm64.neon.st4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, float*) +declare void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, float*) define double* @test_v2f64_post_imm_st4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { ;CHECK-LABEL: test_v2f64_post_imm_st4lane: ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32 - call void @llvm.arm64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A) + call void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A) %tmp = getelementptr double* %A, i64 4 ret double* %tmp } @@ -5663,18 +5663,18 @@ define double* @test_v2f64_post_imm_st4lane(double* %A, double** %ptr, <2 x doub define double* @test_v2f64_post_reg_st4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v2f64_post_reg_st4lane: ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A) + call void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A) %tmp = getelementptr double* %A, i64 %inc ret double* %tmp } -declare void @llvm.arm64.neon.st4lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, double*) +declare void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, double*) define double* @test_v1f64_post_imm_st4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { ;CHECK-LABEL: test_v1f64_post_imm_st4lane: ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32 - call void @llvm.arm64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A) + call void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A) %tmp = getelementptr double* %A, i64 4 ret double* %tmp } @@ -5682,12 +5682,12 @@ define double* @test_v1f64_post_imm_st4lane(double* %A, double** %ptr, <1 x doub define double* @test_v1f64_post_reg_st4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind { ;CHECK-LABEL: test_v1f64_post_reg_st4lane: ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} - call void @llvm.arm64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A) + call void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A) %tmp = getelementptr double* %A, i64 %inc ret double* %tmp } -declare void @llvm.arm64.neon.st4lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, double*) +declare void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, double*) define <16 x i8> @test_v16i8_post_imm_ld1r(i8* %bar, i8** %ptr) { ; CHECK-LABEL: test_v16i8_post_imm_ld1r: diff --git a/test/CodeGen/ARM64/inline-asm-error-I.ll b/test/CodeGen/AArch64/arm64-inline-asm-error-I.ll similarity index 100% rename from test/CodeGen/ARM64/inline-asm-error-I.ll rename to test/CodeGen/AArch64/arm64-inline-asm-error-I.ll diff --git a/test/CodeGen/ARM64/inline-asm-error-J.ll b/test/CodeGen/AArch64/arm64-inline-asm-error-J.ll similarity index 100% rename from test/CodeGen/ARM64/inline-asm-error-J.ll rename to test/CodeGen/AArch64/arm64-inline-asm-error-J.ll diff --git a/test/CodeGen/ARM64/inline-asm-error-K.ll b/test/CodeGen/AArch64/arm64-inline-asm-error-K.ll similarity index 100% rename from test/CodeGen/ARM64/inline-asm-error-K.ll rename to test/CodeGen/AArch64/arm64-inline-asm-error-K.ll diff --git a/test/CodeGen/ARM64/inline-asm-error-L.ll b/test/CodeGen/AArch64/arm64-inline-asm-error-L.ll similarity index 100% rename from test/CodeGen/ARM64/inline-asm-error-L.ll rename to test/CodeGen/AArch64/arm64-inline-asm-error-L.ll diff --git a/test/CodeGen/ARM64/inline-asm-error-M.ll b/test/CodeGen/AArch64/arm64-inline-asm-error-M.ll similarity index 100% rename from test/CodeGen/ARM64/inline-asm-error-M.ll rename to test/CodeGen/AArch64/arm64-inline-asm-error-M.ll diff --git a/test/CodeGen/ARM64/inline-asm-error-N.ll b/test/CodeGen/AArch64/arm64-inline-asm-error-N.ll similarity index 100% rename from test/CodeGen/ARM64/inline-asm-error-N.ll rename to test/CodeGen/AArch64/arm64-inline-asm-error-N.ll diff --git a/test/CodeGen/ARM64/inline-asm-zero-reg-error.ll b/test/CodeGen/AArch64/arm64-inline-asm-zero-reg-error.ll similarity index 100% rename from test/CodeGen/ARM64/inline-asm-zero-reg-error.ll rename to test/CodeGen/AArch64/arm64-inline-asm-zero-reg-error.ll diff --git a/test/CodeGen/ARM64/inline-asm.ll b/test/CodeGen/AArch64/arm64-inline-asm.ll similarity index 98% rename from test/CodeGen/ARM64/inline-asm.ll rename to test/CodeGen/AArch64/arm64-inline-asm.ll index e64507870fbe..d76cca3f21c6 100644 --- a/test/CodeGen/ARM64/inline-asm.ll +++ b/test/CodeGen/AArch64/arm64-inline-asm.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -no-integrated-as | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -no-integrated-as | FileCheck %s ; rdar://9167275 diff --git a/test/CodeGen/ARM64/join-reserved.ll b/test/CodeGen/AArch64/arm64-join-reserved.ll similarity index 100% rename from test/CodeGen/ARM64/join-reserved.ll rename to test/CodeGen/AArch64/arm64-join-reserved.ll diff --git a/test/CodeGen/ARM64/jumptable.ll b/test/CodeGen/AArch64/arm64-jumptable.ll similarity index 100% rename from test/CodeGen/ARM64/jumptable.ll rename to test/CodeGen/AArch64/arm64-jumptable.ll diff --git a/test/CodeGen/ARM64/aarch64-large-frame.ll b/test/CodeGen/AArch64/arm64-large-frame.ll similarity index 100% rename from test/CodeGen/ARM64/aarch64-large-frame.ll rename to test/CodeGen/AArch64/arm64-large-frame.ll diff --git a/test/CodeGen/ARM64/ld1.ll b/test/CodeGen/AArch64/arm64-ld1.ll similarity index 67% rename from test/CodeGen/ARM64/ld1.ll rename to test/CodeGen/AArch64/arm64-ld1.ll index 61836a10a806..72d808ccc347 100644 --- a/test/CodeGen/ARM64/ld1.ll +++ b/test/CodeGen/AArch64/arm64-ld1.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s %struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } @@ -10,7 +10,7 @@ define %struct.__neon_int8x8x2_t @ld2_8b(i8* %A) nounwind { ; and from the argument of the function also defined by ABI (i.e., x0) ; CHECK ld2.8b { v0, v1 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm64.neon.ld2.v8i8.p0i8(i8* %A) + %tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A) ret %struct.__neon_int8x8x2_t %tmp2 } @@ -19,7 +19,7 @@ define %struct.__neon_int8x8x3_t @ld3_8b(i8* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld3.8b { v0, v1, v2 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm64.neon.ld3.v8i8.p0i8(i8* %A) + %tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A) ret %struct.__neon_int8x8x3_t %tmp2 } @@ -28,13 +28,13 @@ define %struct.__neon_int8x8x4_t @ld4_8b(i8* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld4.8b { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm64.neon.ld4.v8i8.p0i8(i8* %A) + %tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A) ret %struct.__neon_int8x8x4_t %tmp2 } -declare %struct.__neon_int8x8x2_t @llvm.arm64.neon.ld2.v8i8.p0i8(i8*) nounwind readonly -declare %struct.__neon_int8x8x3_t @llvm.arm64.neon.ld3.v8i8.p0i8(i8*) nounwind readonly -declare %struct.__neon_int8x8x4_t @llvm.arm64.neon.ld4.v8i8.p0i8(i8*) nounwind readonly +declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8*) nounwind readonly +declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0i8(i8*) nounwind readonly +declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0i8(i8*) nounwind readonly %struct.__neon_int8x16x2_t = type { <16 x i8>, <16 x i8> } %struct.__neon_int8x16x3_t = type { <16 x i8>, <16 x i8>, <16 x i8> } @@ -45,7 +45,7 @@ define %struct.__neon_int8x16x2_t @ld2_16b(i8* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld2.16b { v0, v1 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int8x16x2_t @llvm.arm64.neon.ld2.v16i8.p0i8(i8* %A) + %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A) ret %struct.__neon_int8x16x2_t %tmp2 } @@ -54,7 +54,7 @@ define %struct.__neon_int8x16x3_t @ld3_16b(i8* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld3.16b { v0, v1, v2 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int8x16x3_t @llvm.arm64.neon.ld3.v16i8.p0i8(i8* %A) + %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A) ret %struct.__neon_int8x16x3_t %tmp2 } @@ -63,13 +63,13 @@ define %struct.__neon_int8x16x4_t @ld4_16b(i8* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld4.16b { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld4.v16i8.p0i8(i8* %A) + %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A) ret %struct.__neon_int8x16x4_t %tmp2 } -declare %struct.__neon_int8x16x2_t @llvm.arm64.neon.ld2.v16i8.p0i8(i8*) nounwind readonly -declare %struct.__neon_int8x16x3_t @llvm.arm64.neon.ld3.v16i8.p0i8(i8*) nounwind readonly -declare %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld4.v16i8.p0i8(i8*) nounwind readonly +declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*) nounwind readonly +declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0i8(i8*) nounwind readonly +declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0i8(i8*) nounwind readonly %struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> } %struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } @@ -80,7 +80,7 @@ define %struct.__neon_int16x4x2_t @ld2_4h(i16* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld2.4h { v0, v1 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm64.neon.ld2.v4i16.p0i16(i16* %A) + %tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A) ret %struct.__neon_int16x4x2_t %tmp2 } @@ -89,7 +89,7 @@ define %struct.__neon_int16x4x3_t @ld3_4h(i16* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld3.4h { v0, v1, v2 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm64.neon.ld3.v4i16.p0i16(i16* %A) + %tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A) ret %struct.__neon_int16x4x3_t %tmp2 } @@ -98,13 +98,13 @@ define %struct.__neon_int16x4x4_t @ld4_4h(i16* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld4.4h { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm64.neon.ld4.v4i16.p0i16(i16* %A) + %tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A) ret %struct.__neon_int16x4x4_t %tmp2 } -declare %struct.__neon_int16x4x2_t @llvm.arm64.neon.ld2.v4i16.p0i16(i16*) nounwind readonly -declare %struct.__neon_int16x4x3_t @llvm.arm64.neon.ld3.v4i16.p0i16(i16*) nounwind readonly -declare %struct.__neon_int16x4x4_t @llvm.arm64.neon.ld4.v4i16.p0i16(i16*) nounwind readonly +declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0i16(i16*) nounwind readonly +declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0i16(i16*) nounwind readonly +declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0i16(i16*) nounwind readonly %struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> } %struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> } @@ -115,7 +115,7 @@ define %struct.__neon_int16x8x2_t @ld2_8h(i16* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld2.8h { v0, v1 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm64.neon.ld2.v8i16.p0i16(i16* %A) + %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A) ret %struct.__neon_int16x8x2_t %tmp2 } @@ -124,7 +124,7 @@ define %struct.__neon_int16x8x3_t @ld3_8h(i16* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld3.8h { v0, v1, v2 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm64.neon.ld3.v8i16.p0i16(i16* %A) + %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A) ret %struct.__neon_int16x8x3_t %tmp2 } @@ -133,13 +133,13 @@ define %struct.__neon_int16x8x4_t @ld4_8h(i16* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld4.8h { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld4.v8i16.p0i16(i16* %A) + %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A) ret %struct.__neon_int16x8x4_t %tmp2 } -declare %struct.__neon_int16x8x2_t @llvm.arm64.neon.ld2.v8i16.p0i16(i16*) nounwind readonly -declare %struct.__neon_int16x8x3_t @llvm.arm64.neon.ld3.v8i16.p0i16(i16*) nounwind readonly -declare %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld4.v8i16.p0i16(i16*) nounwind readonly +declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0i16(i16*) nounwind readonly +declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0i16(i16*) nounwind readonly +declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0i16(i16*) nounwind readonly %struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> } %struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> } @@ -150,7 +150,7 @@ define %struct.__neon_int32x2x2_t @ld2_2s(i32* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld2.2s { v0, v1 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm64.neon.ld2.v2i32.p0i32(i32* %A) + %tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A) ret %struct.__neon_int32x2x2_t %tmp2 } @@ -159,7 +159,7 @@ define %struct.__neon_int32x2x3_t @ld3_2s(i32* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld3.2s { v0, v1, v2 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm64.neon.ld3.v2i32.p0i32(i32* %A) + %tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A) ret %struct.__neon_int32x2x3_t %tmp2 } @@ -168,13 +168,13 @@ define %struct.__neon_int32x2x4_t @ld4_2s(i32* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld4.2s { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm64.neon.ld4.v2i32.p0i32(i32* %A) + %tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A) ret %struct.__neon_int32x2x4_t %tmp2 } -declare %struct.__neon_int32x2x2_t @llvm.arm64.neon.ld2.v2i32.p0i32(i32*) nounwind readonly -declare %struct.__neon_int32x2x3_t @llvm.arm64.neon.ld3.v2i32.p0i32(i32*) nounwind readonly -declare %struct.__neon_int32x2x4_t @llvm.arm64.neon.ld4.v2i32.p0i32(i32*) nounwind readonly +declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0i32(i32*) nounwind readonly +declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0i32(i32*) nounwind readonly +declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0i32(i32*) nounwind readonly %struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> } %struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> } @@ -185,7 +185,7 @@ define %struct.__neon_int32x4x2_t @ld2_4s(i32* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld2.4s { v0, v1 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm64.neon.ld2.v4i32.p0i32(i32* %A) + %tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A) ret %struct.__neon_int32x4x2_t %tmp2 } @@ -194,7 +194,7 @@ define %struct.__neon_int32x4x3_t @ld3_4s(i32* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld3.4s { v0, v1, v2 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm64.neon.ld3.v4i32.p0i32(i32* %A) + %tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A) ret %struct.__neon_int32x4x3_t %tmp2 } @@ -203,13 +203,13 @@ define %struct.__neon_int32x4x4_t @ld4_4s(i32* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld4.4s { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld4.v4i32.p0i32(i32* %A) + %tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A) ret %struct.__neon_int32x4x4_t %tmp2 } -declare %struct.__neon_int32x4x2_t @llvm.arm64.neon.ld2.v4i32.p0i32(i32*) nounwind readonly -declare %struct.__neon_int32x4x3_t @llvm.arm64.neon.ld3.v4i32.p0i32(i32*) nounwind readonly -declare %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld4.v4i32.p0i32(i32*) nounwind readonly +declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0i32(i32*) nounwind readonly +declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0i32(i32*) nounwind readonly +declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0i32(i32*) nounwind readonly %struct.__neon_int64x2x2_t = type { <2 x i64>, <2 x i64> } %struct.__neon_int64x2x3_t = type { <2 x i64>, <2 x i64>, <2 x i64> } @@ -220,7 +220,7 @@ define %struct.__neon_int64x2x2_t @ld2_2d(i64* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld2.2d { v0, v1 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int64x2x2_t @llvm.arm64.neon.ld2.v2i64.p0i64(i64* %A) + %tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A) ret %struct.__neon_int64x2x2_t %tmp2 } @@ -229,7 +229,7 @@ define %struct.__neon_int64x2x3_t @ld3_2d(i64* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld3.2d { v0, v1, v2 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld3.v2i64.p0i64(i64* %A) + %tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A) ret %struct.__neon_int64x2x3_t %tmp2 } @@ -238,13 +238,13 @@ define %struct.__neon_int64x2x4_t @ld4_2d(i64* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld4.2d { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld4.v2i64.p0i64(i64* %A) + %tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A) ret %struct.__neon_int64x2x4_t %tmp2 } -declare %struct.__neon_int64x2x2_t @llvm.arm64.neon.ld2.v2i64.p0i64(i64*) nounwind readonly -declare %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld3.v2i64.p0i64(i64*) nounwind readonly -declare %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld4.v2i64.p0i64(i64*) nounwind readonly +declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0i64(i64*) nounwind readonly +declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0i64(i64*) nounwind readonly +declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0i64(i64*) nounwind readonly %struct.__neon_int64x1x2_t = type { <1 x i64>, <1 x i64> } %struct.__neon_int64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> } @@ -256,7 +256,7 @@ define %struct.__neon_int64x1x2_t @ld2_1di64(i64* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld1.1d { v0, v1 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int64x1x2_t @llvm.arm64.neon.ld2.v1i64.p0i64(i64* %A) + %tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A) ret %struct.__neon_int64x1x2_t %tmp2 } @@ -265,7 +265,7 @@ define %struct.__neon_int64x1x3_t @ld3_1di64(i64* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld1.1d { v0, v1, v2 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int64x1x3_t @llvm.arm64.neon.ld3.v1i64.p0i64(i64* %A) + %tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A) ret %struct.__neon_int64x1x3_t %tmp2 } @@ -274,14 +274,14 @@ define %struct.__neon_int64x1x4_t @ld4_1di64(i64* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld1.1d { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int64x1x4_t @llvm.arm64.neon.ld4.v1i64.p0i64(i64* %A) + %tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A) ret %struct.__neon_int64x1x4_t %tmp2 } -declare %struct.__neon_int64x1x2_t @llvm.arm64.neon.ld2.v1i64.p0i64(i64*) nounwind readonly -declare %struct.__neon_int64x1x3_t @llvm.arm64.neon.ld3.v1i64.p0i64(i64*) nounwind readonly -declare %struct.__neon_int64x1x4_t @llvm.arm64.neon.ld4.v1i64.p0i64(i64*) nounwind readonly +declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0i64(i64*) nounwind readonly +declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0i64(i64*) nounwind readonly +declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0i64(i64*) nounwind readonly %struct.__neon_float64x1x2_t = type { <1 x double>, <1 x double> } %struct.__neon_float64x1x3_t = type { <1 x double>, <1 x double>, <1 x double> } @@ -293,7 +293,7 @@ define %struct.__neon_float64x1x2_t @ld2_1df64(double* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld1.1d { v0, v1 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_float64x1x2_t @llvm.arm64.neon.ld2.v1f64.p0f64(double* %A) + %tmp2 = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A) ret %struct.__neon_float64x1x2_t %tmp2 } @@ -302,7 +302,7 @@ define %struct.__neon_float64x1x3_t @ld3_1df64(double* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld1.1d { v0, v1, v2 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_float64x1x3_t @llvm.arm64.neon.ld3.v1f64.p0f64(double* %A) + %tmp2 = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A) ret %struct.__neon_float64x1x3_t %tmp2 } @@ -311,13 +311,13 @@ define %struct.__neon_float64x1x4_t @ld4_1df64(double* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld1.1d { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_float64x1x4_t @llvm.arm64.neon.ld4.v1f64.p0f64(double* %A) + %tmp2 = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A) ret %struct.__neon_float64x1x4_t %tmp2 } -declare %struct.__neon_float64x1x2_t @llvm.arm64.neon.ld2.v1f64.p0f64(double*) nounwind readonly -declare %struct.__neon_float64x1x3_t @llvm.arm64.neon.ld3.v1f64.p0f64(double*) nounwind readonly -declare %struct.__neon_float64x1x4_t @llvm.arm64.neon.ld4.v1f64.p0f64(double*) nounwind readonly +declare %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0f64(double*) nounwind readonly +declare %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0f64(double*) nounwind readonly +declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0f64(double*) nounwind readonly define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8> %L2, i8* %A) nounwind { @@ -325,7 +325,7 @@ define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8> %L2, i8* ; CHECK: ld2lane_16b ; CHECK ld2.b { v0, v1 }[1], [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int8x16x2_t @llvm.arm64.neon.ld2lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, i64 1, i8* %A) + %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, i64 1, i8* %A) ret %struct.__neon_int8x16x2_t %tmp2 } @@ -334,7 +334,7 @@ define %struct.__neon_int8x16x3_t @ld3lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 ; CHECK: ld3lane_16b ; CHECK ld3.b { v0, v1, v2 }[1], [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int8x16x3_t @llvm.arm64.neon.ld3lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i64 1, i8* %A) + %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i64 1, i8* %A) ret %struct.__neon_int8x16x3_t %tmp2 } @@ -343,20 +343,20 @@ define %struct.__neon_int8x16x4_t @ld4lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 ; CHECK: ld4lane_16b ; CHECK ld4.b { v0, v1, v2, v3 }[1], [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld4lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i64 1, i8* %A) + %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i64 1, i8* %A) ret %struct.__neon_int8x16x4_t %tmp2 } -declare %struct.__neon_int8x16x2_t @llvm.arm64.neon.ld2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readonly -declare %struct.__neon_int8x16x3_t @llvm.arm64.neon.ld3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly -declare %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly +declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readonly +declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly +declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly define %struct.__neon_int16x8x2_t @ld2lane_8h(<8 x i16> %L1, <8 x i16> %L2, i16* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK: ld2lane_8h ; CHECK ld2.h { v0, v1 }[1], [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm64.neon.ld2lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, i64 1, i16* %A) + %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, i64 1, i16* %A) ret %struct.__neon_int16x8x2_t %tmp2 } @@ -365,7 +365,7 @@ define %struct.__neon_int16x8x3_t @ld3lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x ; CHECK: ld3lane_8h ; CHECK ld3.h { v0, v1, v3 }[1], [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm64.neon.ld3lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i64 1, i16* %A) + %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i64 1, i16* %A) ret %struct.__neon_int16x8x3_t %tmp2 } @@ -374,20 +374,20 @@ define %struct.__neon_int16x8x4_t @ld4lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x ; CHECK: ld4lane_8h ; CHECK ld4.h { v0, v1, v2, v3 }[1], [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld4lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i64 1, i16* %A) + %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i64 1, i16* %A) ret %struct.__neon_int16x8x4_t %tmp2 } -declare %struct.__neon_int16x8x2_t @llvm.arm64.neon.ld2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readonly -declare %struct.__neon_int16x8x3_t @llvm.arm64.neon.ld3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly -declare %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly +declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readonly +declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly +declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly define %struct.__neon_int32x4x2_t @ld2lane_4s(<4 x i32> %L1, <4 x i32> %L2, i32* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK: ld2lane_4s ; CHECK ld2.s { v0, v1 }[1], [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, i64 1, i32* %A) + %tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, i64 1, i32* %A) ret %struct.__neon_int32x4x2_t %tmp2 } @@ -396,7 +396,7 @@ define %struct.__neon_int32x4x3_t @ld3lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x ; CHECK: ld3lane_4s ; CHECK ld3.s { v0, v1, v2 }[1], [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 1, i32* %A) + %tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 1, i32* %A) ret %struct.__neon_int32x4x3_t %tmp2 } @@ -405,20 +405,20 @@ define %struct.__neon_int32x4x4_t @ld4lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x ; CHECK: ld4lane_4s ; CHECK ld4.s { v0, v1, v2, v3 }[1], [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i64 1, i32* %A) + %tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i64 1, i32* %A) ret %struct.__neon_int32x4x4_t %tmp2 } -declare %struct.__neon_int32x4x2_t @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readonly -declare %struct.__neon_int32x4x3_t @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly -declare %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly +declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readonly +declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly +declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly define %struct.__neon_int64x2x2_t @ld2lane_2d(<2 x i64> %L1, <2 x i64> %L2, i64* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK: ld2lane_2d ; CHECK ld2.d { v0, v1 }[1], [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int64x2x2_t @llvm.arm64.neon.ld2lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, i64 1, i64* %A) + %tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, i64 1, i64* %A) ret %struct.__neon_int64x2x2_t %tmp2 } @@ -427,7 +427,7 @@ define %struct.__neon_int64x2x3_t @ld3lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x ; CHECK: ld3lane_2d ; CHECK ld3.d { v0, v1, v3 }[1], [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld3lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64 1, i64* %A) + %tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64 1, i64* %A) ret %struct.__neon_int64x2x3_t %tmp2 } @@ -436,13 +436,13 @@ define %struct.__neon_int64x2x4_t @ld4lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x ; CHECK: ld4lane_2d ; CHECK ld4.d { v0, v1, v2, v3 }[1], [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld4lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64 1, i64* %A) + %tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64 1, i64* %A) ret %struct.__neon_int64x2x4_t %tmp2 } -declare %struct.__neon_int64x2x2_t @llvm.arm64.neon.ld2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readonly -declare %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly -declare %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly +declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readonly +declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly +declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly define <8 x i8> @ld1r_8b(i8* %bar) { ; CHECK: ld1r_8b @@ -556,7 +556,7 @@ define %struct.__neon_int8x8x2_t @ld2r_8b(i8* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld2r.8b { v0, v1 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm64.neon.ld2r.v8i8.p0i8(i8* %A) + %tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A) ret %struct.__neon_int8x8x2_t %tmp2 } @@ -565,7 +565,7 @@ define %struct.__neon_int8x8x3_t @ld3r_8b(i8* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld3r.8b { v0, v1, v2 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm64.neon.ld3r.v8i8.p0i8(i8* %A) + %tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A) ret %struct.__neon_int8x8x3_t %tmp2 } @@ -574,20 +574,20 @@ define %struct.__neon_int8x8x4_t @ld4r_8b(i8* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld4r.8b { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm64.neon.ld4r.v8i8.p0i8(i8* %A) + %tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A) ret %struct.__neon_int8x8x4_t %tmp2 } -declare %struct.__neon_int8x8x2_t @llvm.arm64.neon.ld2r.v8i8.p0i8(i8*) nounwind readonly -declare %struct.__neon_int8x8x3_t @llvm.arm64.neon.ld3r.v8i8.p0i8(i8*) nounwind readonly -declare %struct.__neon_int8x8x4_t @llvm.arm64.neon.ld4r.v8i8.p0i8(i8*) nounwind readonly +declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8*) nounwind readonly +declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8*) nounwind readonly +declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8*) nounwind readonly define %struct.__neon_int8x16x2_t @ld2r_16b(i8* %A) nounwind { ; CHECK: ld2r_16b ; Make sure we are using the operands defined by the ABI ; CHECK ld2r.16b { v0, v1 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int8x16x2_t @llvm.arm64.neon.ld2r.v16i8.p0i8(i8* %A) + %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A) ret %struct.__neon_int8x16x2_t %tmp2 } @@ -596,7 +596,7 @@ define %struct.__neon_int8x16x3_t @ld3r_16b(i8* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld3r.16b { v0, v1, v2 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int8x16x3_t @llvm.arm64.neon.ld3r.v16i8.p0i8(i8* %A) + %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A) ret %struct.__neon_int8x16x3_t %tmp2 } @@ -605,20 +605,20 @@ define %struct.__neon_int8x16x4_t @ld4r_16b(i8* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld4r.16b { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld4r.v16i8.p0i8(i8* %A) + %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A) ret %struct.__neon_int8x16x4_t %tmp2 } -declare %struct.__neon_int8x16x2_t @llvm.arm64.neon.ld2r.v16i8.p0i8(i8*) nounwind readonly -declare %struct.__neon_int8x16x3_t @llvm.arm64.neon.ld3r.v16i8.p0i8(i8*) nounwind readonly -declare %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld4r.v16i8.p0i8(i8*) nounwind readonly +declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8*) nounwind readonly +declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8*) nounwind readonly +declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8*) nounwind readonly define %struct.__neon_int16x4x2_t @ld2r_4h(i16* %A) nounwind { ; CHECK: ld2r_4h ; Make sure we are using the operands defined by the ABI ; CHECK ld2r.4h { v0, v1 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm64.neon.ld2r.v4i16.p0i16(i16* %A) + %tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A) ret %struct.__neon_int16x4x2_t %tmp2 } @@ -627,7 +627,7 @@ define %struct.__neon_int16x4x3_t @ld3r_4h(i16* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld3r.4h { v0, v1, v2 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm64.neon.ld3r.v4i16.p0i16(i16* %A) + %tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A) ret %struct.__neon_int16x4x3_t %tmp2 } @@ -636,20 +636,20 @@ define %struct.__neon_int16x4x4_t @ld4r_4h(i16* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld4r.4h { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm64.neon.ld4r.v4i16.p0i16(i16* %A) + %tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A) ret %struct.__neon_int16x4x4_t %tmp2 } -declare %struct.__neon_int16x4x2_t @llvm.arm64.neon.ld2r.v4i16.p0i16(i16*) nounwind readonly -declare %struct.__neon_int16x4x3_t @llvm.arm64.neon.ld3r.v4i16.p0i16(i16*) nounwind readonly -declare %struct.__neon_int16x4x4_t @llvm.arm64.neon.ld4r.v4i16.p0i16(i16*) nounwind readonly +declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16*) nounwind readonly +declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16*) nounwind readonly +declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16*) nounwind readonly define %struct.__neon_int16x8x2_t @ld2r_8h(i16* %A) nounwind { ; CHECK: ld2r_8h ; Make sure we are using the operands defined by the ABI ; CHECK ld2r.8h { v0, v1 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm64.neon.ld2r.v8i16.p0i16(i16* %A) + %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A) ret %struct.__neon_int16x8x2_t %tmp2 } @@ -658,7 +658,7 @@ define %struct.__neon_int16x8x3_t @ld3r_8h(i16* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld3r.8h { v0, v1, v2 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm64.neon.ld3r.v8i16.p0i16(i16* %A) + %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A) ret %struct.__neon_int16x8x3_t %tmp2 } @@ -667,20 +667,20 @@ define %struct.__neon_int16x8x4_t @ld4r_8h(i16* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld4r.8h { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld4r.v8i16.p0i16(i16* %A) + %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A) ret %struct.__neon_int16x8x4_t %tmp2 } -declare %struct.__neon_int16x8x2_t @llvm.arm64.neon.ld2r.v8i16.p0i16(i16*) nounwind readonly -declare %struct.__neon_int16x8x3_t @llvm.arm64.neon.ld3r.v8i16.p0i16(i16*) nounwind readonly -declare %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld4r.v8i16.p0i16(i16*) nounwind readonly +declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16*) nounwind readonly +declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16*) nounwind readonly +declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16*) nounwind readonly define %struct.__neon_int32x2x2_t @ld2r_2s(i32* %A) nounwind { ; CHECK: ld2r_2s ; Make sure we are using the operands defined by the ABI ; CHECK ld2r.2s { v0, v1 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm64.neon.ld2r.v2i32.p0i32(i32* %A) + %tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A) ret %struct.__neon_int32x2x2_t %tmp2 } @@ -689,7 +689,7 @@ define %struct.__neon_int32x2x3_t @ld3r_2s(i32* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld3r.2s { v0, v1, v2 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm64.neon.ld3r.v2i32.p0i32(i32* %A) + %tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A) ret %struct.__neon_int32x2x3_t %tmp2 } @@ -698,20 +698,20 @@ define %struct.__neon_int32x2x4_t @ld4r_2s(i32* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld4r.2s { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm64.neon.ld4r.v2i32.p0i32(i32* %A) + %tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A) ret %struct.__neon_int32x2x4_t %tmp2 } -declare %struct.__neon_int32x2x2_t @llvm.arm64.neon.ld2r.v2i32.p0i32(i32*) nounwind readonly -declare %struct.__neon_int32x2x3_t @llvm.arm64.neon.ld3r.v2i32.p0i32(i32*) nounwind readonly -declare %struct.__neon_int32x2x4_t @llvm.arm64.neon.ld4r.v2i32.p0i32(i32*) nounwind readonly +declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32*) nounwind readonly +declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32*) nounwind readonly +declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32*) nounwind readonly define %struct.__neon_int32x4x2_t @ld2r_4s(i32* %A) nounwind { ; CHECK: ld2r_4s ; Make sure we are using the operands defined by the ABI ; CHECK ld2r.4s { v0, v1 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm64.neon.ld2r.v4i32.p0i32(i32* %A) + %tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A) ret %struct.__neon_int32x4x2_t %tmp2 } @@ -720,7 +720,7 @@ define %struct.__neon_int32x4x3_t @ld3r_4s(i32* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld3r.4s { v0, v1, v2 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm64.neon.ld3r.v4i32.p0i32(i32* %A) + %tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A) ret %struct.__neon_int32x4x3_t %tmp2 } @@ -729,20 +729,20 @@ define %struct.__neon_int32x4x4_t @ld4r_4s(i32* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld4r.4s { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld4r.v4i32.p0i32(i32* %A) + %tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A) ret %struct.__neon_int32x4x4_t %tmp2 } -declare %struct.__neon_int32x4x2_t @llvm.arm64.neon.ld2r.v4i32.p0i32(i32*) nounwind readonly -declare %struct.__neon_int32x4x3_t @llvm.arm64.neon.ld3r.v4i32.p0i32(i32*) nounwind readonly -declare %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld4r.v4i32.p0i32(i32*) nounwind readonly +declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32*) nounwind readonly +declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32*) nounwind readonly +declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32*) nounwind readonly define %struct.__neon_int64x1x2_t @ld2r_1d(i64* %A) nounwind { ; CHECK: ld2r_1d ; Make sure we are using the operands defined by the ABI ; CHECK ld2r.1d { v0, v1 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int64x1x2_t @llvm.arm64.neon.ld2r.v1i64.p0i64(i64* %A) + %tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A) ret %struct.__neon_int64x1x2_t %tmp2 } @@ -751,7 +751,7 @@ define %struct.__neon_int64x1x3_t @ld3r_1d(i64* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld3r.1d { v0, v1, v2 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int64x1x3_t @llvm.arm64.neon.ld3r.v1i64.p0i64(i64* %A) + %tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A) ret %struct.__neon_int64x1x3_t %tmp2 } @@ -760,20 +760,20 @@ define %struct.__neon_int64x1x4_t @ld4r_1d(i64* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld4r.1d { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int64x1x4_t @llvm.arm64.neon.ld4r.v1i64.p0i64(i64* %A) + %tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A) ret %struct.__neon_int64x1x4_t %tmp2 } -declare %struct.__neon_int64x1x2_t @llvm.arm64.neon.ld2r.v1i64.p0i64(i64*) nounwind readonly -declare %struct.__neon_int64x1x3_t @llvm.arm64.neon.ld3r.v1i64.p0i64(i64*) nounwind readonly -declare %struct.__neon_int64x1x4_t @llvm.arm64.neon.ld4r.v1i64.p0i64(i64*) nounwind readonly +declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64*) nounwind readonly +declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64*) nounwind readonly +declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64*) nounwind readonly define %struct.__neon_int64x2x2_t @ld2r_2d(i64* %A) nounwind { ; CHECK: ld2r_2d ; Make sure we are using the operands defined by the ABI ; CHECK ld2r.2d { v0, v1 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int64x2x2_t @llvm.arm64.neon.ld2r.v2i64.p0i64(i64* %A) + %tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A) ret %struct.__neon_int64x2x2_t %tmp2 } @@ -782,7 +782,7 @@ define %struct.__neon_int64x2x3_t @ld3r_2d(i64* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld3r.2d { v0, v1, v2 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld3r.v2i64.p0i64(i64* %A) + %tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A) ret %struct.__neon_int64x2x3_t %tmp2 } @@ -791,13 +791,13 @@ define %struct.__neon_int64x2x4_t @ld4r_2d(i64* %A) nounwind { ; Make sure we are using the operands defined by the ABI ; CHECK ld4r.2d { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret - %tmp2 = call %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld4r.v2i64.p0i64(i64* %A) + %tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A) ret %struct.__neon_int64x2x4_t %tmp2 } -declare %struct.__neon_int64x2x2_t @llvm.arm64.neon.ld2r.v2i64.p0i64(i64*) nounwind readonly -declare %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld3r.v2i64.p0i64(i64*) nounwind readonly -declare %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly +declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64*) nounwind readonly +declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64*) nounwind readonly +declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly define <16 x i8> @ld1_16b(<16 x i8> %V, i8* %bar) { ; CHECK-LABEL: ld1_16b @@ -1041,52 +1041,52 @@ entry: %struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> } %struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> } -declare %struct.__neon_int8x8x2_t @llvm.arm64.neon.ld1x2.v8i8.p0i8(i8*) nounwind readonly -declare %struct.__neon_int16x4x2_t @llvm.arm64.neon.ld1x2.v4i16.p0i16(i16*) nounwind readonly -declare %struct.__neon_int32x2x2_t @llvm.arm64.neon.ld1x2.v2i32.p0i32(i32*) nounwind readonly -declare %struct.__neon_float32x2x2_t @llvm.arm64.neon.ld1x2.v2f32.p0f32(float*) nounwind readonly -declare %struct.__neon_int64x1x2_t @llvm.arm64.neon.ld1x2.v1i64.p0i64(i64*) nounwind readonly -declare %struct.__neon_float64x1x2_t @llvm.arm64.neon.ld1x2.v1f64.p0f64(double*) nounwind readonly +declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8*) nounwind readonly +declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16*) nounwind readonly +declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32*) nounwind readonly +declare %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float*) nounwind readonly +declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64*) nounwind readonly +declare %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double*) nounwind readonly define %struct.__neon_int8x8x2_t @ld1_x2_v8i8(i8* %addr) { ; CHECK-LABEL: ld1_x2_v8i8: ; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_int8x8x2_t @llvm.arm64.neon.ld1x2.v8i8.p0i8(i8* %addr) + %val = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %addr) ret %struct.__neon_int8x8x2_t %val } define %struct.__neon_int16x4x2_t @ld1_x2_v4i16(i16* %addr) { ; CHECK-LABEL: ld1_x2_v4i16: ; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_int16x4x2_t @llvm.arm64.neon.ld1x2.v4i16.p0i16(i16* %addr) + %val = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %addr) ret %struct.__neon_int16x4x2_t %val } define %struct.__neon_int32x2x2_t @ld1_x2_v2i32(i32* %addr) { ; CHECK-LABEL: ld1_x2_v2i32: ; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_int32x2x2_t @llvm.arm64.neon.ld1x2.v2i32.p0i32(i32* %addr) + %val = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %addr) ret %struct.__neon_int32x2x2_t %val } define %struct.__neon_float32x2x2_t @ld1_x2_v2f32(float* %addr) { ; CHECK-LABEL: ld1_x2_v2f32: ; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_float32x2x2_t @llvm.arm64.neon.ld1x2.v2f32.p0f32(float* %addr) + %val = call %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %addr) ret %struct.__neon_float32x2x2_t %val } define %struct.__neon_int64x1x2_t @ld1_x2_v1i64(i64* %addr) { ; CHECK-LABEL: ld1_x2_v1i64: ; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_int64x1x2_t @llvm.arm64.neon.ld1x2.v1i64.p0i64(i64* %addr) + %val = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %addr) ret %struct.__neon_int64x1x2_t %val } define %struct.__neon_float64x1x2_t @ld1_x2_v1f64(double* %addr) { ; CHECK-LABEL: ld1_x2_v1f64: ; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_float64x1x2_t @llvm.arm64.neon.ld1x2.v1f64.p0f64(double* %addr) + %val = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %addr) ret %struct.__neon_float64x1x2_t %val } @@ -1099,247 +1099,247 @@ define %struct.__neon_float64x1x2_t @ld1_x2_v1f64(double* %addr) { %struct.__neon_float64x2x3_t = type { <2 x double>, <2 x double>, <2 x double> } %struct.__neon_float64x2x4_t = type { <2 x double>, <2 x double>, <2 x double>, <2 x double> } -declare %struct.__neon_int8x16x2_t @llvm.arm64.neon.ld1x2.v16i8.p0i8(i8*) nounwind readonly -declare %struct.__neon_int16x8x2_t @llvm.arm64.neon.ld1x2.v8i16.p0i16(i16*) nounwind readonly -declare %struct.__neon_int32x4x2_t @llvm.arm64.neon.ld1x2.v4i32.p0i32(i32*) nounwind readonly -declare %struct.__neon_float32x4x2_t @llvm.arm64.neon.ld1x2.v4f32.p0f32(float*) nounwind readonly -declare %struct.__neon_int64x2x2_t @llvm.arm64.neon.ld1x2.v2i64.p0i64(i64*) nounwind readonly -declare %struct.__neon_float64x2x2_t @llvm.arm64.neon.ld1x2.v2f64.p0f64(double*) nounwind readonly +declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8*) nounwind readonly +declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16*) nounwind readonly +declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32*) nounwind readonly +declare %struct.__neon_float32x4x2_t @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float*) nounwind readonly +declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64*) nounwind readonly +declare %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double*) nounwind readonly define %struct.__neon_int8x16x2_t @ld1_x2_v16i8(i8* %addr) { ; CHECK-LABEL: ld1_x2_v16i8: ; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_int8x16x2_t @llvm.arm64.neon.ld1x2.v16i8.p0i8(i8* %addr) + %val = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %addr) ret %struct.__neon_int8x16x2_t %val } define %struct.__neon_int16x8x2_t @ld1_x2_v8i16(i16* %addr) { ; CHECK-LABEL: ld1_x2_v8i16: ; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_int16x8x2_t @llvm.arm64.neon.ld1x2.v8i16.p0i16(i16* %addr) + %val = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %addr) ret %struct.__neon_int16x8x2_t %val } define %struct.__neon_int32x4x2_t @ld1_x2_v4i32(i32* %addr) { ; CHECK-LABEL: ld1_x2_v4i32: ; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_int32x4x2_t @llvm.arm64.neon.ld1x2.v4i32.p0i32(i32* %addr) + %val = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %addr) ret %struct.__neon_int32x4x2_t %val } define %struct.__neon_float32x4x2_t @ld1_x2_v4f32(float* %addr) { ; CHECK-LABEL: ld1_x2_v4f32: ; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_float32x4x2_t @llvm.arm64.neon.ld1x2.v4f32.p0f32(float* %addr) + %val = call %struct.__neon_float32x4x2_t @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %addr) ret %struct.__neon_float32x4x2_t %val } define %struct.__neon_int64x2x2_t @ld1_x2_v2i64(i64* %addr) { ; CHECK-LABEL: ld1_x2_v2i64: ; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_int64x2x2_t @llvm.arm64.neon.ld1x2.v2i64.p0i64(i64* %addr) + %val = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %addr) ret %struct.__neon_int64x2x2_t %val } define %struct.__neon_float64x2x2_t @ld1_x2_v2f64(double* %addr) { ; CHECK-LABEL: ld1_x2_v2f64: ; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_float64x2x2_t @llvm.arm64.neon.ld1x2.v2f64.p0f64(double* %addr) + %val = call %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %addr) ret %struct.__neon_float64x2x2_t %val } -declare %struct.__neon_int8x8x3_t @llvm.arm64.neon.ld1x3.v8i8.p0i8(i8*) nounwind readonly -declare %struct.__neon_int16x4x3_t @llvm.arm64.neon.ld1x3.v4i16.p0i16(i16*) nounwind readonly -declare %struct.__neon_int32x2x3_t @llvm.arm64.neon.ld1x3.v2i32.p0i32(i32*) nounwind readonly -declare %struct.__neon_float32x2x3_t @llvm.arm64.neon.ld1x3.v2f32.p0f32(float*) nounwind readonly -declare %struct.__neon_int64x1x3_t @llvm.arm64.neon.ld1x3.v1i64.p0i64(i64*) nounwind readonly -declare %struct.__neon_float64x1x3_t @llvm.arm64.neon.ld1x3.v1f64.p0f64(double*) nounwind readonly +declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8*) nounwind readonly +declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16*) nounwind readonly +declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32*) nounwind readonly +declare %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float*) nounwind readonly +declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64*) nounwind readonly +declare %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double*) nounwind readonly define %struct.__neon_int8x8x3_t @ld1_x3_v8i8(i8* %addr) { ; CHECK-LABEL: ld1_x3_v8i8: ; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_int8x8x3_t @llvm.arm64.neon.ld1x3.v8i8.p0i8(i8* %addr) + %val = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %addr) ret %struct.__neon_int8x8x3_t %val } define %struct.__neon_int16x4x3_t @ld1_x3_v4i16(i16* %addr) { ; CHECK-LABEL: ld1_x3_v4i16: ; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_int16x4x3_t @llvm.arm64.neon.ld1x3.v4i16.p0i16(i16* %addr) + %val = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %addr) ret %struct.__neon_int16x4x3_t %val } define %struct.__neon_int32x2x3_t @ld1_x3_v2i32(i32* %addr) { ; CHECK-LABEL: ld1_x3_v2i32: ; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_int32x2x3_t @llvm.arm64.neon.ld1x3.v2i32.p0i32(i32* %addr) + %val = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %addr) ret %struct.__neon_int32x2x3_t %val } define %struct.__neon_float32x2x3_t @ld1_x3_v2f32(float* %addr) { ; CHECK-LABEL: ld1_x3_v2f32: ; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_float32x2x3_t @llvm.arm64.neon.ld1x3.v2f32.p0f32(float* %addr) + %val = call %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %addr) ret %struct.__neon_float32x2x3_t %val } define %struct.__neon_int64x1x3_t @ld1_x3_v1i64(i64* %addr) { ; CHECK-LABEL: ld1_x3_v1i64: ; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_int64x1x3_t @llvm.arm64.neon.ld1x3.v1i64.p0i64(i64* %addr) + %val = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %addr) ret %struct.__neon_int64x1x3_t %val } define %struct.__neon_float64x1x3_t @ld1_x3_v1f64(double* %addr) { ; CHECK-LABEL: ld1_x3_v1f64: ; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_float64x1x3_t @llvm.arm64.neon.ld1x3.v1f64.p0f64(double* %addr) + %val = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %addr) ret %struct.__neon_float64x1x3_t %val } -declare %struct.__neon_int8x16x3_t @llvm.arm64.neon.ld1x3.v16i8.p0i8(i8*) nounwind readonly -declare %struct.__neon_int16x8x3_t @llvm.arm64.neon.ld1x3.v8i16.p0i16(i16*) nounwind readonly -declare %struct.__neon_int32x4x3_t @llvm.arm64.neon.ld1x3.v4i32.p0i32(i32*) nounwind readonly -declare %struct.__neon_float32x4x3_t @llvm.arm64.neon.ld1x3.v4f32.p0f32(float*) nounwind readonly -declare %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld1x3.v2i64.p0i64(i64*) nounwind readonly -declare %struct.__neon_float64x2x3_t @llvm.arm64.neon.ld1x3.v2f64.p0f64(double*) nounwind readonly +declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8*) nounwind readonly +declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16*) nounwind readonly +declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32*) nounwind readonly +declare %struct.__neon_float32x4x3_t @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float*) nounwind readonly +declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64*) nounwind readonly +declare %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double*) nounwind readonly define %struct.__neon_int8x16x3_t @ld1_x3_v16i8(i8* %addr) { ; CHECK-LABEL: ld1_x3_v16i8: ; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_int8x16x3_t @llvm.arm64.neon.ld1x3.v16i8.p0i8(i8* %addr) + %val = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %addr) ret %struct.__neon_int8x16x3_t %val } define %struct.__neon_int16x8x3_t @ld1_x3_v8i16(i16* %addr) { ; CHECK-LABEL: ld1_x3_v8i16: ; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_int16x8x3_t @llvm.arm64.neon.ld1x3.v8i16.p0i16(i16* %addr) + %val = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %addr) ret %struct.__neon_int16x8x3_t %val } define %struct.__neon_int32x4x3_t @ld1_x3_v4i32(i32* %addr) { ; CHECK-LABEL: ld1_x3_v4i32: ; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_int32x4x3_t @llvm.arm64.neon.ld1x3.v4i32.p0i32(i32* %addr) + %val = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %addr) ret %struct.__neon_int32x4x3_t %val } define %struct.__neon_float32x4x3_t @ld1_x3_v4f32(float* %addr) { ; CHECK-LABEL: ld1_x3_v4f32: ; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_float32x4x3_t @llvm.arm64.neon.ld1x3.v4f32.p0f32(float* %addr) + %val = call %struct.__neon_float32x4x3_t @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %addr) ret %struct.__neon_float32x4x3_t %val } define %struct.__neon_int64x2x3_t @ld1_x3_v2i64(i64* %addr) { ; CHECK-LABEL: ld1_x3_v2i64: ; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld1x3.v2i64.p0i64(i64* %addr) + %val = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %addr) ret %struct.__neon_int64x2x3_t %val } define %struct.__neon_float64x2x3_t @ld1_x3_v2f64(double* %addr) { ; CHECK-LABEL: ld1_x3_v2f64: ; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_float64x2x3_t @llvm.arm64.neon.ld1x3.v2f64.p0f64(double* %addr) + %val = call %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %addr) ret %struct.__neon_float64x2x3_t %val } -declare %struct.__neon_int8x8x4_t @llvm.arm64.neon.ld1x4.v8i8.p0i8(i8*) nounwind readonly -declare %struct.__neon_int16x4x4_t @llvm.arm64.neon.ld1x4.v4i16.p0i16(i16*) nounwind readonly -declare %struct.__neon_int32x2x4_t @llvm.arm64.neon.ld1x4.v2i32.p0i32(i32*) nounwind readonly -declare %struct.__neon_float32x2x4_t @llvm.arm64.neon.ld1x4.v2f32.p0f32(float*) nounwind readonly -declare %struct.__neon_int64x1x4_t @llvm.arm64.neon.ld1x4.v1i64.p0i64(i64*) nounwind readonly -declare %struct.__neon_float64x1x4_t @llvm.arm64.neon.ld1x4.v1f64.p0f64(double*) nounwind readonly +declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8*) nounwind readonly +declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16*) nounwind readonly +declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32*) nounwind readonly +declare %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float*) nounwind readonly +declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64*) nounwind readonly +declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double*) nounwind readonly define %struct.__neon_int8x8x4_t @ld1_x4_v8i8(i8* %addr) { ; CHECK-LABEL: ld1_x4_v8i8: ; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_int8x8x4_t @llvm.arm64.neon.ld1x4.v8i8.p0i8(i8* %addr) + %val = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %addr) ret %struct.__neon_int8x8x4_t %val } define %struct.__neon_int16x4x4_t @ld1_x4_v4i16(i16* %addr) { ; CHECK-LABEL: ld1_x4_v4i16: ; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_int16x4x4_t @llvm.arm64.neon.ld1x4.v4i16.p0i16(i16* %addr) + %val = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %addr) ret %struct.__neon_int16x4x4_t %val } define %struct.__neon_int32x2x4_t @ld1_x4_v2i32(i32* %addr) { ; CHECK-LABEL: ld1_x4_v2i32: ; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_int32x2x4_t @llvm.arm64.neon.ld1x4.v2i32.p0i32(i32* %addr) + %val = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %addr) ret %struct.__neon_int32x2x4_t %val } define %struct.__neon_float32x2x4_t @ld1_x4_v2f32(float* %addr) { ; CHECK-LABEL: ld1_x4_v2f32: ; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_float32x2x4_t @llvm.arm64.neon.ld1x4.v2f32.p0f32(float* %addr) + %val = call %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %addr) ret %struct.__neon_float32x2x4_t %val } define %struct.__neon_int64x1x4_t @ld1_x4_v1i64(i64* %addr) { ; CHECK-LABEL: ld1_x4_v1i64: ; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_int64x1x4_t @llvm.arm64.neon.ld1x4.v1i64.p0i64(i64* %addr) + %val = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %addr) ret %struct.__neon_int64x1x4_t %val } define %struct.__neon_float64x1x4_t @ld1_x4_v1f64(double* %addr) { ; CHECK-LABEL: ld1_x4_v1f64: ; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_float64x1x4_t @llvm.arm64.neon.ld1x4.v1f64.p0f64(double* %addr) + %val = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %addr) ret %struct.__neon_float64x1x4_t %val } -declare %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld1x4.v16i8.p0i8(i8*) nounwind readonly -declare %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld1x4.v8i16.p0i16(i16*) nounwind readonly -declare %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld1x4.v4i32.p0i32(i32*) nounwind readonly -declare %struct.__neon_float32x4x4_t @llvm.arm64.neon.ld1x4.v4f32.p0f32(float*) nounwind readonly -declare %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld1x4.v2i64.p0i64(i64*) nounwind readonly -declare %struct.__neon_float64x2x4_t @llvm.arm64.neon.ld1x4.v2f64.p0f64(double*) nounwind readonly +declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8*) nounwind readonly +declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16*) nounwind readonly +declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32*) nounwind readonly +declare %struct.__neon_float32x4x4_t @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float*) nounwind readonly +declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64*) nounwind readonly +declare %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double*) nounwind readonly define %struct.__neon_int8x16x4_t @ld1_x4_v16i8(i8* %addr) { ; CHECK-LABEL: ld1_x4_v16i8: ; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld1x4.v16i8.p0i8(i8* %addr) + %val = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %addr) ret %struct.__neon_int8x16x4_t %val } define %struct.__neon_int16x8x4_t @ld1_x4_v8i16(i16* %addr) { ; CHECK-LABEL: ld1_x4_v8i16: ; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld1x4.v8i16.p0i16(i16* %addr) + %val = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %addr) ret %struct.__neon_int16x8x4_t %val } define %struct.__neon_int32x4x4_t @ld1_x4_v4i32(i32* %addr) { ; CHECK-LABEL: ld1_x4_v4i32: ; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld1x4.v4i32.p0i32(i32* %addr) + %val = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %addr) ret %struct.__neon_int32x4x4_t %val } define %struct.__neon_float32x4x4_t @ld1_x4_v4f32(float* %addr) { ; CHECK-LABEL: ld1_x4_v4f32: ; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_float32x4x4_t @llvm.arm64.neon.ld1x4.v4f32.p0f32(float* %addr) + %val = call %struct.__neon_float32x4x4_t @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %addr) ret %struct.__neon_float32x4x4_t %val } define %struct.__neon_int64x2x4_t @ld1_x4_v2i64(i64* %addr) { ; CHECK-LABEL: ld1_x4_v2i64: ; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld1x4.v2i64.p0i64(i64* %addr) + %val = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %addr) ret %struct.__neon_int64x2x4_t %val } define %struct.__neon_float64x2x4_t @ld1_x4_v2f64(double* %addr) { ; CHECK-LABEL: ld1_x4_v2f64: ; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - %val = call %struct.__neon_float64x2x4_t @llvm.arm64.neon.ld1x4.v2f64.p0f64(double* %addr) + %val = call %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %addr) ret %struct.__neon_float64x2x4_t %val } diff --git a/test/CodeGen/ARM64/ldp.ll b/test/CodeGen/AArch64/arm64-ldp.ll similarity index 98% rename from test/CodeGen/ARM64/ldp.ll rename to test/CodeGen/AArch64/arm64-ldp.ll index 9444385f8ab3..5a986261b31b 100644 --- a/test/CodeGen/ARM64/ldp.ll +++ b/test/CodeGen/AArch64/arm64-ldp.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=arm64 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -march=arm64 -arm64-unscaled-mem-op=true\ +; RUN: llc < %s -march=arm64 -aarch64-unscaled-mem-op=true\ ; RUN: -verify-machineinstrs | FileCheck -check-prefix=LDUR_CHK %s ; CHECK: ldp_int diff --git a/test/CodeGen/ARM64/ldur.ll b/test/CodeGen/AArch64/arm64-ldur.ll similarity index 100% rename from test/CodeGen/ARM64/ldur.ll rename to test/CodeGen/AArch64/arm64-ldur.ll diff --git a/test/CodeGen/ARM64/ldxr-stxr.ll b/test/CodeGen/AArch64/arm64-ldxr-stxr.ll similarity index 69% rename from test/CodeGen/ARM64/ldxr-stxr.ll rename to test/CodeGen/AArch64/arm64-ldxr-stxr.ll index ed53a14ca8cc..9093df27cddc 100644 --- a/test/CodeGen/ARM64/ldxr-stxr.ll +++ b/test/CodeGen/AArch64/arm64-ldxr-stxr.ll @@ -6,7 +6,7 @@ define i128 @f0(i8* %p) nounwind readonly { ; CHECK-LABEL: f0: ; CHECK: ldxp {{x[0-9]+}}, {{x[0-9]+}}, [x0] entry: - %ldrexd = tail call %0 @llvm.arm64.ldxp(i8* %p) + %ldrexd = tail call %0 @llvm.aarch64.ldxp(i8* %p) %0 = extractvalue %0 %ldrexd, 1 %1 = extractvalue %0 %ldrexd, 0 %2 = zext i64 %0 to i128 @@ -23,12 +23,12 @@ entry: %tmp4 = trunc i128 %val to i64 %tmp6 = lshr i128 %val, 64 %tmp7 = trunc i128 %tmp6 to i64 - %strexd = tail call i32 @llvm.arm64.stxp(i64 %tmp4, i64 %tmp7, i8* %ptr) + %strexd = tail call i32 @llvm.aarch64.stxp(i64 %tmp4, i64 %tmp7, i8* %ptr) ret i32 %strexd } -declare %0 @llvm.arm64.ldxp(i8*) nounwind -declare i32 @llvm.arm64.stxp(i64, i64, i8*) nounwind +declare %0 @llvm.aarch64.ldxp(i8*) nounwind +declare i32 @llvm.aarch64.stxp(i64, i64, i8*) nounwind @var = global i64 0, align 8 @@ -39,7 +39,7 @@ define void @test_load_i8(i8* %addr) { ; CHECK-NOT: and ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] - %val = call i64 @llvm.arm64.ldxr.p0i8(i8* %addr) + %val = call i64 @llvm.aarch64.ldxr.p0i8(i8* %addr) %shortval = trunc i64 %val to i8 %extval = zext i8 %shortval to i64 store i64 %extval, i64* @var, align 8 @@ -53,7 +53,7 @@ define void @test_load_i16(i16* %addr) { ; CHECK-NOT: and ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] - %val = call i64 @llvm.arm64.ldxr.p0i16(i16* %addr) + %val = call i64 @llvm.aarch64.ldxr.p0i16(i16* %addr) %shortval = trunc i64 %val to i16 %extval = zext i16 %shortval to i64 store i64 %extval, i64* @var, align 8 @@ -67,7 +67,7 @@ define void @test_load_i32(i32* %addr) { ; CHECK-NOT: and ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] - %val = call i64 @llvm.arm64.ldxr.p0i32(i32* %addr) + %val = call i64 @llvm.aarch64.ldxr.p0i32(i32* %addr) %shortval = trunc i64 %val to i32 %extval = zext i32 %shortval to i64 store i64 %extval, i64* @var, align 8 @@ -79,16 +79,16 @@ define void @test_load_i64(i64* %addr) { ; CHECK: ldxr x[[LOADVAL:[0-9]+]], [x0] ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] - %val = call i64 @llvm.arm64.ldxr.p0i64(i64* %addr) + %val = call i64 @llvm.aarch64.ldxr.p0i64(i64* %addr) store i64 %val, i64* @var, align 8 ret void } -declare i64 @llvm.arm64.ldxr.p0i8(i8*) nounwind -declare i64 @llvm.arm64.ldxr.p0i16(i16*) nounwind -declare i64 @llvm.arm64.ldxr.p0i32(i32*) nounwind -declare i64 @llvm.arm64.ldxr.p0i64(i64*) nounwind +declare i64 @llvm.aarch64.ldxr.p0i8(i8*) nounwind +declare i64 @llvm.aarch64.ldxr.p0i16(i16*) nounwind +declare i64 @llvm.aarch64.ldxr.p0i32(i32*) nounwind +declare i64 @llvm.aarch64.ldxr.p0i64(i64*) nounwind define i32 @test_store_i8(i32, i8 %val, i8* %addr) { ; CHECK-LABEL: test_store_i8: @@ -96,7 +96,7 @@ define i32 @test_store_i8(i32, i8 %val, i8* %addr) { ; CHECK-NOT: and ; CHECK: stxrb w0, w1, [x2] %extval = zext i8 %val to i64 - %res = call i32 @llvm.arm64.stxr.p0i8(i64 %extval, i8* %addr) + %res = call i32 @llvm.aarch64.stxr.p0i8(i64 %extval, i8* %addr) ret i32 %res } @@ -106,7 +106,7 @@ define i32 @test_store_i16(i32, i16 %val, i16* %addr) { ; CHECK-NOT: and ; CHECK: stxrh w0, w1, [x2] %extval = zext i16 %val to i64 - %res = call i32 @llvm.arm64.stxr.p0i16(i64 %extval, i16* %addr) + %res = call i32 @llvm.aarch64.stxr.p0i16(i64 %extval, i16* %addr) ret i32 %res } @@ -116,36 +116,36 @@ define i32 @test_store_i32(i32, i32 %val, i32* %addr) { ; CHECK-NOT: and ; CHECK: stxr w0, w1, [x2] %extval = zext i32 %val to i64 - %res = call i32 @llvm.arm64.stxr.p0i32(i64 %extval, i32* %addr) + %res = call i32 @llvm.aarch64.stxr.p0i32(i64 %extval, i32* %addr) ret i32 %res } define i32 @test_store_i64(i32, i64 %val, i64* %addr) { ; CHECK-LABEL: test_store_i64: ; CHECK: stxr w0, x1, [x2] - %res = call i32 @llvm.arm64.stxr.p0i64(i64 %val, i64* %addr) + %res = call i32 @llvm.aarch64.stxr.p0i64(i64 %val, i64* %addr) ret i32 %res } -declare i32 @llvm.arm64.stxr.p0i8(i64, i8*) nounwind -declare i32 @llvm.arm64.stxr.p0i16(i64, i16*) nounwind -declare i32 @llvm.arm64.stxr.p0i32(i64, i32*) nounwind -declare i32 @llvm.arm64.stxr.p0i64(i64, i64*) nounwind +declare i32 @llvm.aarch64.stxr.p0i8(i64, i8*) nounwind +declare i32 @llvm.aarch64.stxr.p0i16(i64, i16*) nounwind +declare i32 @llvm.aarch64.stxr.p0i32(i64, i32*) nounwind +declare i32 @llvm.aarch64.stxr.p0i64(i64, i64*) nounwind ; CHECK: test_clear: ; CHECK: clrex define void @test_clear() { - call void @llvm.arm64.clrex() + call void @llvm.aarch64.clrex() ret void } -declare void @llvm.arm64.clrex() nounwind +declare void @llvm.aarch64.clrex() nounwind define i128 @test_load_acquire_i128(i8* %p) nounwind readonly { ; CHECK-LABEL: test_load_acquire_i128: ; CHECK: ldaxp {{x[0-9]+}}, {{x[0-9]+}}, [x0] entry: - %ldrexd = tail call %0 @llvm.arm64.ldaxp(i8* %p) + %ldrexd = tail call %0 @llvm.aarch64.ldaxp(i8* %p) %0 = extractvalue %0 %ldrexd, 1 %1 = extractvalue %0 %ldrexd, 0 %2 = zext i64 %0 to i128 @@ -162,12 +162,12 @@ entry: %tmp4 = trunc i128 %val to i64 %tmp6 = lshr i128 %val, 64 %tmp7 = trunc i128 %tmp6 to i64 - %strexd = tail call i32 @llvm.arm64.stlxp(i64 %tmp4, i64 %tmp7, i8* %ptr) + %strexd = tail call i32 @llvm.aarch64.stlxp(i64 %tmp4, i64 %tmp7, i8* %ptr) ret i32 %strexd } -declare %0 @llvm.arm64.ldaxp(i8*) nounwind -declare i32 @llvm.arm64.stlxp(i64, i64, i8*) nounwind +declare %0 @llvm.aarch64.ldaxp(i8*) nounwind +declare i32 @llvm.aarch64.stlxp(i64, i64, i8*) nounwind define void @test_load_acquire_i8(i8* %addr) { ; CHECK-LABEL: test_load_acquire_i8: @@ -176,7 +176,7 @@ define void @test_load_acquire_i8(i8* %addr) { ; CHECK-NOT: and ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] - %val = call i64 @llvm.arm64.ldaxr.p0i8(i8* %addr) + %val = call i64 @llvm.aarch64.ldaxr.p0i8(i8* %addr) %shortval = trunc i64 %val to i8 %extval = zext i8 %shortval to i64 store i64 %extval, i64* @var, align 8 @@ -190,7 +190,7 @@ define void @test_load_acquire_i16(i16* %addr) { ; CHECK-NOT: and ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] - %val = call i64 @llvm.arm64.ldaxr.p0i16(i16* %addr) + %val = call i64 @llvm.aarch64.ldaxr.p0i16(i16* %addr) %shortval = trunc i64 %val to i16 %extval = zext i16 %shortval to i64 store i64 %extval, i64* @var, align 8 @@ -204,7 +204,7 @@ define void @test_load_acquire_i32(i32* %addr) { ; CHECK-NOT: and ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] - %val = call i64 @llvm.arm64.ldaxr.p0i32(i32* %addr) + %val = call i64 @llvm.aarch64.ldaxr.p0i32(i32* %addr) %shortval = trunc i64 %val to i32 %extval = zext i32 %shortval to i64 store i64 %extval, i64* @var, align 8 @@ -216,16 +216,16 @@ define void @test_load_acquire_i64(i64* %addr) { ; CHECK: ldaxr x[[LOADVAL:[0-9]+]], [x0] ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] - %val = call i64 @llvm.arm64.ldaxr.p0i64(i64* %addr) + %val = call i64 @llvm.aarch64.ldaxr.p0i64(i64* %addr) store i64 %val, i64* @var, align 8 ret void } -declare i64 @llvm.arm64.ldaxr.p0i8(i8*) nounwind -declare i64 @llvm.arm64.ldaxr.p0i16(i16*) nounwind -declare i64 @llvm.arm64.ldaxr.p0i32(i32*) nounwind -declare i64 @llvm.arm64.ldaxr.p0i64(i64*) nounwind +declare i64 @llvm.aarch64.ldaxr.p0i8(i8*) nounwind +declare i64 @llvm.aarch64.ldaxr.p0i16(i16*) nounwind +declare i64 @llvm.aarch64.ldaxr.p0i32(i32*) nounwind +declare i64 @llvm.aarch64.ldaxr.p0i64(i64*) nounwind define i32 @test_store_release_i8(i32, i8 %val, i8* %addr) { ; CHECK-LABEL: test_store_release_i8: @@ -233,7 +233,7 @@ define i32 @test_store_release_i8(i32, i8 %val, i8* %addr) { ; CHECK-NOT: and ; CHECK: stlxrb w0, w1, [x2] %extval = zext i8 %val to i64 - %res = call i32 @llvm.arm64.stlxr.p0i8(i64 %extval, i8* %addr) + %res = call i32 @llvm.aarch64.stlxr.p0i8(i64 %extval, i8* %addr) ret i32 %res } @@ -243,7 +243,7 @@ define i32 @test_store_release_i16(i32, i16 %val, i16* %addr) { ; CHECK-NOT: and ; CHECK: stlxrh w0, w1, [x2] %extval = zext i16 %val to i64 - %res = call i32 @llvm.arm64.stlxr.p0i16(i64 %extval, i16* %addr) + %res = call i32 @llvm.aarch64.stlxr.p0i16(i64 %extval, i16* %addr) ret i32 %res } @@ -253,18 +253,18 @@ define i32 @test_store_release_i32(i32, i32 %val, i32* %addr) { ; CHECK-NOT: and ; CHECK: stlxr w0, w1, [x2] %extval = zext i32 %val to i64 - %res = call i32 @llvm.arm64.stlxr.p0i32(i64 %extval, i32* %addr) + %res = call i32 @llvm.aarch64.stlxr.p0i32(i64 %extval, i32* %addr) ret i32 %res } define i32 @test_store_release_i64(i32, i64 %val, i64* %addr) { ; CHECK-LABEL: test_store_release_i64: ; CHECK: stlxr w0, x1, [x2] - %res = call i32 @llvm.arm64.stlxr.p0i64(i64 %val, i64* %addr) + %res = call i32 @llvm.aarch64.stlxr.p0i64(i64 %val, i64* %addr) ret i32 %res } -declare i32 @llvm.arm64.stlxr.p0i8(i64, i8*) nounwind -declare i32 @llvm.arm64.stlxr.p0i16(i64, i16*) nounwind -declare i32 @llvm.arm64.stlxr.p0i32(i64, i32*) nounwind -declare i32 @llvm.arm64.stlxr.p0i64(i64, i64*) nounwind +declare i32 @llvm.aarch64.stlxr.p0i8(i64, i8*) nounwind +declare i32 @llvm.aarch64.stlxr.p0i16(i64, i16*) nounwind +declare i32 @llvm.aarch64.stlxr.p0i32(i64, i32*) nounwind +declare i32 @llvm.aarch64.stlxr.p0i64(i64, i64*) nounwind diff --git a/test/CodeGen/ARM64/leaf.ll b/test/CodeGen/AArch64/arm64-leaf.ll similarity index 100% rename from test/CodeGen/ARM64/leaf.ll rename to test/CodeGen/AArch64/arm64-leaf.ll diff --git a/test/CodeGen/ARM64/long-shift.ll b/test/CodeGen/AArch64/arm64-long-shift.ll similarity index 100% rename from test/CodeGen/ARM64/long-shift.ll rename to test/CodeGen/AArch64/arm64-long-shift.ll diff --git a/test/CodeGen/ARM64/memcpy-inline.ll b/test/CodeGen/AArch64/arm64-memcpy-inline.ll similarity index 100% rename from test/CodeGen/ARM64/memcpy-inline.ll rename to test/CodeGen/AArch64/arm64-memcpy-inline.ll diff --git a/test/CodeGen/ARM64/memset-inline.ll b/test/CodeGen/AArch64/arm64-memset-inline.ll similarity index 100% rename from test/CodeGen/ARM64/memset-inline.ll rename to test/CodeGen/AArch64/arm64-memset-inline.ll diff --git a/test/CodeGen/ARM64/memset-to-bzero.ll b/test/CodeGen/AArch64/arm64-memset-to-bzero.ll similarity index 100% rename from test/CodeGen/ARM64/memset-to-bzero.ll rename to test/CodeGen/AArch64/arm64-memset-to-bzero.ll diff --git a/test/CodeGen/ARM64/misched-basic-A53.ll b/test/CodeGen/AArch64/arm64-misched-basic-A53.ll similarity index 96% rename from test/CodeGen/ARM64/misched-basic-A53.ll rename to test/CodeGen/AArch64/arm64-misched-basic-A53.ll index d69b097a9b58..f88bd6a4fe32 100644 --- a/test/CodeGen/ARM64/misched-basic-A53.ll +++ b/test/CodeGen/AArch64/arm64-misched-basic-A53.ll @@ -115,10 +115,10 @@ attributes #1 = { nounwind } ; ; Nothing explicit to check other than llc not crashing. define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) { - %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2.v16i8.p0i8(i8* %A) + %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A) %tmp = getelementptr i8* %A, i32 32 store i8* %tmp, i8** %ptr ret { <16 x i8>, <16 x i8> } %ld2 } -declare { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2.v16i8.p0i8(i8*) +declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*) diff --git a/test/CodeGen/ARM64/misched-forwarding-A53.ll b/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll similarity index 100% rename from test/CodeGen/ARM64/misched-forwarding-A53.ll rename to test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll diff --git a/test/CodeGen/ARM64/movi.ll b/test/CodeGen/AArch64/arm64-movi.ll similarity index 100% rename from test/CodeGen/ARM64/movi.ll rename to test/CodeGen/AArch64/arm64-movi.ll diff --git a/test/CodeGen/ARM64/mul.ll b/test/CodeGen/AArch64/arm64-mul.ll similarity index 100% rename from test/CodeGen/ARM64/mul.ll rename to test/CodeGen/AArch64/arm64-mul.ll diff --git a/test/CodeGen/ARM64/named-reg-alloc.ll b/test/CodeGen/AArch64/arm64-named-reg-alloc.ll similarity index 100% rename from test/CodeGen/ARM64/named-reg-alloc.ll rename to test/CodeGen/AArch64/arm64-named-reg-alloc.ll diff --git a/test/CodeGen/ARM64/named-reg-notareg.ll b/test/CodeGen/AArch64/arm64-named-reg-notareg.ll similarity index 100% rename from test/CodeGen/ARM64/named-reg-notareg.ll rename to test/CodeGen/AArch64/arm64-named-reg-notareg.ll diff --git a/test/CodeGen/ARM64/neg.ll b/test/CodeGen/AArch64/arm64-neg.ll similarity index 100% rename from test/CodeGen/ARM64/neg.ll rename to test/CodeGen/AArch64/arm64-neg.ll diff --git a/test/CodeGen/ARM64/aarch64-neon-2velem-high.ll b/test/CodeGen/AArch64/arm64-neon-2velem-high.ll similarity index 82% rename from test/CodeGen/ARM64/aarch64-neon-2velem-high.ll rename to test/CodeGen/AArch64/arm64-neon-2velem-high.ll index 2013747713b9..58df094d1922 100644 --- a/test/CodeGen/ARM64/aarch64-neon-2velem-high.ll +++ b/test/CodeGen/AArch64/arm64-neon-2velem-high.ll @@ -4,25 +4,25 @@ declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) -declare <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) +declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) -declare <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) -declare <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) +declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) -declare <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) -declare <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) -declare <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) -declare <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32>, <2 x i32>) +declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) -declare <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16>, <4 x i16>) +declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) -declare <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32>, <2 x i32>) +declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) -declare <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16>, <4 x i16>) +declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) { ; CHECK-LABEL: test_vmull_high_n_s16: @@ -34,7 +34,7 @@ entry: %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 - %vmull15.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) ret <4 x i32> %vmull15.i.i } @@ -46,7 +46,7 @@ entry: %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 - %vmull9.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) ret <2 x i64> %vmull9.i.i } @@ -60,7 +60,7 @@ entry: %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 - %vmull15.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) ret <4 x i32> %vmull15.i.i } @@ -72,7 +72,7 @@ entry: %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 - %vmull9.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) ret <2 x i64> %vmull9.i.i } @@ -86,7 +86,7 @@ entry: %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 - %vqdmull15.i.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vqdmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) ret <4 x i32> %vqdmull15.i.i } @@ -98,7 +98,7 @@ entry: %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 - %vqdmull9.i.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vqdmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) ret <2 x i64> %vqdmull9.i.i } @@ -112,7 +112,7 @@ entry: %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) %add.i.i = add <4 x i32> %vmull2.i.i.i, %a ret <4 x i32> %add.i.i } @@ -125,7 +125,7 @@ entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) %add.i.i = add <2 x i64> %vmull2.i.i.i, %a ret <2 x i64> %add.i.i } @@ -140,7 +140,7 @@ entry: %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) %add.i.i = add <4 x i32> %vmull2.i.i.i, %a ret <4 x i32> %add.i.i } @@ -153,7 +153,7 @@ entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) %add.i.i = add <2 x i64> %vmull2.i.i.i, %a ret <2 x i64> %add.i.i } @@ -167,8 +167,8 @@ entry: %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vqdmlal15.i.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) - %vqdmlal17.i.i = tail call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i) + %vqdmlal15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vqdmlal17.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i) ret <4 x i32> %vqdmlal17.i.i } @@ -179,8 +179,8 @@ entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vqdmlal9.i.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) - %vqdmlal11.i.i = tail call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i) + %vqdmlal9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vqdmlal11.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i) ret <2 x i64> %vqdmlal11.i.i } @@ -193,7 +193,7 @@ entry: %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i ret <4 x i32> %sub.i.i } @@ -205,7 +205,7 @@ entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i ret <2 x i64> %sub.i.i } @@ -219,7 +219,7 @@ entry: %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i ret <4 x i32> %sub.i.i } @@ -231,7 +231,7 @@ entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i ret <2 x i64> %sub.i.i } @@ -245,8 +245,8 @@ entry: %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vqdmlsl15.i.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) - %vqdmlsl17.i.i = tail call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i) + %vqdmlsl15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vqdmlsl17.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i) ret <4 x i32> %vqdmlsl17.i.i } @@ -257,8 +257,8 @@ entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vqdmlsl9.i.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) - %vqdmlsl11.i.i = tail call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i) + %vqdmlsl9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vqdmlsl11.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i) ret <2 x i64> %vqdmlsl11.i.i } diff --git a/test/CodeGen/ARM64/aarch64-neon-2velem.ll b/test/CodeGen/AArch64/arm64-neon-2velem.ll similarity index 84% rename from test/CodeGen/ARM64/aarch64-neon-2velem.ll rename to test/CodeGen/AArch64/arm64-neon-2velem.ll index 4c6b72d55c34..869966caa3ae 100644 --- a/test/CodeGen/ARM64/aarch64-neon-2velem.ll +++ b/test/CodeGen/AArch64/arm64-neon-2velem.ll @@ -1,46 +1,46 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s -declare <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double>, <2 x double>) +declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>) -declare <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float>, <4 x float>) +declare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>) -declare <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float>, <2 x float>) +declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>) -declare <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) -declare <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) +declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) -declare <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) -declare <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) +declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) -declare <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) +declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) -declare <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) -declare <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) +declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) -declare <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) +declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) -declare <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) +declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) -declare <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) -declare <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) -declare <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) -declare <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) -declare <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32>, <2 x i32>) +declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) -declare <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16>, <4 x i16>) +declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) -declare <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32>, <2 x i32>) +declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) -declare <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16>, <4 x i16>) +declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmla_lane_s16: @@ -563,7 +563,7 @@ define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) %add = add <4 x i32> %vmull2.i, %a ret <4 x i32> %add } @@ -574,7 +574,7 @@ define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) %add = add <2 x i64> %vmull2.i, %a ret <2 x i64> %add } @@ -585,7 +585,7 @@ define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) ; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) %add = add <4 x i32> %vmull2.i, %a ret <4 x i32> %add } @@ -596,7 +596,7 @@ define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) %add = add <2 x i64> %vmull2.i, %a ret <2 x i64> %add } @@ -608,7 +608,7 @@ define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) %add = add <4 x i32> %vmull2.i, %a ret <4 x i32> %add } @@ -620,7 +620,7 @@ define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) %add = add <2 x i64> %vmull2.i, %a ret <2 x i64> %add } @@ -632,7 +632,7 @@ define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16 entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) %add = add <4 x i32> %vmull2.i, %a ret <4 x i32> %add } @@ -644,7 +644,7 @@ define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32 entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) %add = add <2 x i64> %vmull2.i, %a ret <2 x i64> %add } @@ -655,7 +655,7 @@ define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) %sub = sub <4 x i32> %a, %vmull2.i ret <4 x i32> %sub } @@ -666,7 +666,7 @@ define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) %sub = sub <2 x i64> %a, %vmull2.i ret <2 x i64> %sub } @@ -677,7 +677,7 @@ define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) ; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) %sub = sub <4 x i32> %a, %vmull2.i ret <4 x i32> %sub } @@ -688,7 +688,7 @@ define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) %sub = sub <2 x i64> %a, %vmull2.i ret <2 x i64> %sub } @@ -700,7 +700,7 @@ define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) %sub = sub <4 x i32> %a, %vmull2.i ret <4 x i32> %sub } @@ -712,7 +712,7 @@ define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) %sub = sub <2 x i64> %a, %vmull2.i ret <2 x i64> %sub } @@ -724,7 +724,7 @@ define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16 entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) %sub = sub <4 x i32> %a, %vmull2.i ret <4 x i32> %sub } @@ -736,7 +736,7 @@ define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32 entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) %sub = sub <2 x i64> %a, %vmull2.i ret <2 x i64> %sub } @@ -747,7 +747,7 @@ define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) %add = add <4 x i32> %vmull2.i, %a ret <4 x i32> %add } @@ -758,7 +758,7 @@ define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) %add = add <2 x i64> %vmull2.i, %a ret <2 x i64> %add } @@ -769,7 +769,7 @@ define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) ; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) %add = add <4 x i32> %vmull2.i, %a ret <4 x i32> %add } @@ -780,7 +780,7 @@ define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) %add = add <2 x i64> %vmull2.i, %a ret <2 x i64> %add } @@ -792,7 +792,7 @@ define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) %add = add <4 x i32> %vmull2.i, %a ret <4 x i32> %add } @@ -804,7 +804,7 @@ define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) %add = add <2 x i64> %vmull2.i, %a ret <2 x i64> %add } @@ -816,7 +816,7 @@ define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16 entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) %add = add <4 x i32> %vmull2.i, %a ret <4 x i32> %add } @@ -828,7 +828,7 @@ define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32 entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) %add = add <2 x i64> %vmull2.i, %a ret <2 x i64> %add } @@ -839,7 +839,7 @@ define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) %sub = sub <4 x i32> %a, %vmull2.i ret <4 x i32> %sub } @@ -850,7 +850,7 @@ define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) %sub = sub <2 x i64> %a, %vmull2.i ret <2 x i64> %sub } @@ -861,7 +861,7 @@ define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) ; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) %sub = sub <4 x i32> %a, %vmull2.i ret <4 x i32> %sub } @@ -872,7 +872,7 @@ define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) %sub = sub <2 x i64> %a, %vmull2.i ret <2 x i64> %sub } @@ -884,7 +884,7 @@ define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) %sub = sub <4 x i32> %a, %vmull2.i ret <4 x i32> %sub } @@ -896,7 +896,7 @@ define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) %sub = sub <2 x i64> %a, %vmull2.i ret <2 x i64> %sub } @@ -908,7 +908,7 @@ define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16 entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) %sub = sub <4 x i32> %a, %vmull2.i ret <4 x i32> %sub } @@ -920,7 +920,7 @@ define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32 entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) %sub = sub <2 x i64> %a, %vmull2.i ret <2 x i64> %sub } @@ -931,7 +931,7 @@ define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) ret <4 x i32> %vmull2.i } @@ -941,7 +941,7 @@ define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) ret <2 x i64> %vmull2.i } @@ -951,7 +951,7 @@ define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) ret <4 x i32> %vmull2.i } @@ -961,7 +961,7 @@ define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) ret <2 x i64> %vmull2.i } @@ -972,7 +972,7 @@ define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) ret <4 x i32> %vmull2.i } @@ -983,7 +983,7 @@ define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) ret <2 x i64> %vmull2.i } @@ -994,7 +994,7 @@ define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) { entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) ret <4 x i32> %vmull2.i } @@ -1005,7 +1005,7 @@ define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) { entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) ret <2 x i64> %vmull2.i } @@ -1015,7 +1015,7 @@ define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) ret <4 x i32> %vmull2.i } @@ -1025,7 +1025,7 @@ define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) ret <2 x i64> %vmull2.i } @@ -1035,7 +1035,7 @@ define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) ret <4 x i32> %vmull2.i } @@ -1045,7 +1045,7 @@ define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) ret <2 x i64> %vmull2.i } @@ -1056,7 +1056,7 @@ define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) ret <4 x i32> %vmull2.i } @@ -1067,7 +1067,7 @@ define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) ret <2 x i64> %vmull2.i } @@ -1078,7 +1078,7 @@ define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) { entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) ret <4 x i32> %vmull2.i } @@ -1089,7 +1089,7 @@ define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) { entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) ret <2 x i64> %vmull2.i } @@ -1099,8 +1099,8 @@ define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vqdmlal2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %vqdmlal4.i = tail call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) + %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) + %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) ret <4 x i32> %vqdmlal4.i } @@ -1110,8 +1110,8 @@ define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vqdmlal2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %vqdmlal4.i = tail call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) + %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) + %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) ret <2 x i64> %vqdmlal4.i } @@ -1122,8 +1122,8 @@ define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i1 entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vqdmlal2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %vqdmlal4.i = tail call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) + %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) ret <4 x i32> %vqdmlal4.i } @@ -1134,8 +1134,8 @@ define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i3 entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vqdmlal2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %vqdmlal4.i = tail call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) + %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) ret <2 x i64> %vqdmlal4.i } @@ -1145,8 +1145,8 @@ define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vqdmlsl2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %vqdmlsl4.i = tail call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) + %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) + %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) ret <4 x i32> %vqdmlsl4.i } @@ -1156,8 +1156,8 @@ define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vqdmlsl2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %vqdmlsl4.i = tail call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) + %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) + %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) ret <2 x i64> %vqdmlsl4.i } @@ -1168,8 +1168,8 @@ define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i1 entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vqdmlsl2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %vqdmlsl4.i = tail call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) + %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) ret <4 x i32> %vqdmlsl4.i } @@ -1180,8 +1180,8 @@ define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i3 entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vqdmlsl2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %vqdmlsl4.i = tail call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) + %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) ret <2 x i64> %vqdmlsl4.i } @@ -1191,7 +1191,7 @@ define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) + %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) ret <4 x i32> %vqdmull2.i } @@ -1201,7 +1201,7 @@ define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) + %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) ret <2 x i64> %vqdmull2.i } @@ -1211,7 +1211,7 @@ define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) + %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) ret <4 x i32> %vqdmull2.i } @@ -1221,7 +1221,7 @@ define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) + %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) ret <2 x i64> %vqdmull2.i } @@ -1232,7 +1232,7 @@ define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) ret <4 x i32> %vqdmull2.i } @@ -1243,7 +1243,7 @@ define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) ret <2 x i64> %vqdmull2.i } @@ -1254,7 +1254,7 @@ define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> - %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) ret <4 x i32> %vqdmull2.i } @@ -1265,7 +1265,7 @@ define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> - %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) ret <2 x i64> %vqdmull2.i } @@ -1275,7 +1275,7 @@ define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vqdmulh2.i = tail call <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) + %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) ret <4 x i16> %vqdmulh2.i } @@ -1285,7 +1285,7 @@ define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> - %vqdmulh2.i = tail call <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) + %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) ret <8 x i16> %vqdmulh2.i } @@ -1295,7 +1295,7 @@ define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vqdmulh2.i = tail call <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) + %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) ret <2 x i32> %vqdmulh2.i } @@ -1305,7 +1305,7 @@ define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> - %vqdmulh2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) + %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) ret <4 x i32> %vqdmulh2.i } @@ -1315,7 +1315,7 @@ define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> - %vqrdmulh2.i = tail call <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) + %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) ret <4 x i16> %vqrdmulh2.i } @@ -1325,7 +1325,7 @@ define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> - %vqrdmulh2.i = tail call <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) + %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) ret <8 x i16> %vqrdmulh2.i } @@ -1335,7 +1335,7 @@ define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> - %vqrdmulh2.i = tail call <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) + %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) ret <2 x i32> %vqrdmulh2.i } @@ -1345,7 +1345,7 @@ define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> - %vqrdmulh2.i = tail call <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) + %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) ret <4 x i32> %vqrdmulh2.i } @@ -1441,7 +1441,7 @@ define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> - %vmulx2.i = tail call <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) + %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) ret <2 x float> %vmulx2.i } @@ -1451,7 +1451,7 @@ define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> - %vmulx2.i = tail call <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) + %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) ret <4 x float> %vmulx2.i } @@ -1461,7 +1461,7 @@ define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer - %vmulx2.i = tail call <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) + %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) ret <2 x double> %vmulx2.i } @@ -1471,7 +1471,7 @@ define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> - %vmulx2.i = tail call <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) + %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) ret <2 x float> %vmulx2.i } @@ -1481,7 +1481,7 @@ define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> - %vmulx2.i = tail call <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) + %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) ret <4 x float> %vmulx2.i } @@ -1491,7 +1491,7 @@ define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> - %vmulx2.i = tail call <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) + %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) ret <2 x double> %vmulx2.i } @@ -1942,7 +1942,7 @@ define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) %add = add <4 x i32> %vmull2.i, %a ret <4 x i32> %add } @@ -1953,7 +1953,7 @@ define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) %add = add <2 x i64> %vmull2.i, %a ret <2 x i64> %add } @@ -1964,7 +1964,7 @@ define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> % ; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) %add = add <4 x i32> %vmull2.i, %a ret <4 x i32> %add } @@ -1975,7 +1975,7 @@ define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> % ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) %add = add <2 x i64> %vmull2.i, %a ret <2 x i64> %add } @@ -1987,7 +1987,7 @@ define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i1 entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) %add = add <4 x i32> %vmull2.i, %a ret <4 x i32> %add } @@ -1999,7 +1999,7 @@ define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i3 entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) %add = add <2 x i64> %vmull2.i, %a ret <2 x i64> %add } @@ -2011,7 +2011,7 @@ define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) %add = add <4 x i32> %vmull2.i, %a ret <4 x i32> %add } @@ -2023,7 +2023,7 @@ define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) %add = add <2 x i64> %vmull2.i, %a ret <2 x i64> %add } @@ -2034,7 +2034,7 @@ define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) %sub = sub <4 x i32> %a, %vmull2.i ret <4 x i32> %sub } @@ -2045,7 +2045,7 @@ define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) %sub = sub <2 x i64> %a, %vmull2.i ret <2 x i64> %sub } @@ -2056,7 +2056,7 @@ define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> % ; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) %sub = sub <4 x i32> %a, %vmull2.i ret <4 x i32> %sub } @@ -2067,7 +2067,7 @@ define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> % ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) %sub = sub <2 x i64> %a, %vmull2.i ret <2 x i64> %sub } @@ -2079,7 +2079,7 @@ define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i1 entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) %sub = sub <4 x i32> %a, %vmull2.i ret <4 x i32> %sub } @@ -2091,7 +2091,7 @@ define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i3 entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) %sub = sub <2 x i64> %a, %vmull2.i ret <2 x i64> %sub } @@ -2103,7 +2103,7 @@ define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) %sub = sub <4 x i32> %a, %vmull2.i ret <4 x i32> %sub } @@ -2115,7 +2115,7 @@ define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) %sub = sub <2 x i64> %a, %vmull2.i ret <2 x i64> %sub } @@ -2126,7 +2126,7 @@ define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) %add = add <4 x i32> %vmull2.i, %a ret <4 x i32> %add } @@ -2137,7 +2137,7 @@ define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) %add = add <2 x i64> %vmull2.i, %a ret <2 x i64> %add } @@ -2148,7 +2148,7 @@ define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> % ; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) %add = add <4 x i32> %vmull2.i, %a ret <4 x i32> %add } @@ -2159,7 +2159,7 @@ define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> % ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) %add = add <2 x i64> %vmull2.i, %a ret <2 x i64> %add } @@ -2171,7 +2171,7 @@ define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i1 entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) %add = add <4 x i32> %vmull2.i, %a ret <4 x i32> %add } @@ -2183,7 +2183,7 @@ define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i3 entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) %add = add <2 x i64> %vmull2.i, %a ret <2 x i64> %add } @@ -2195,7 +2195,7 @@ define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) %add = add <4 x i32> %vmull2.i, %a ret <4 x i32> %add } @@ -2207,7 +2207,7 @@ define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) %add = add <2 x i64> %vmull2.i, %a ret <2 x i64> %add } @@ -2218,7 +2218,7 @@ define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) %sub = sub <4 x i32> %a, %vmull2.i ret <4 x i32> %sub } @@ -2229,7 +2229,7 @@ define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) %sub = sub <2 x i64> %a, %vmull2.i ret <2 x i64> %sub } @@ -2240,7 +2240,7 @@ define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> % ; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) %sub = sub <4 x i32> %a, %vmull2.i ret <4 x i32> %sub } @@ -2251,7 +2251,7 @@ define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> % ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) %sub = sub <2 x i64> %a, %vmull2.i ret <2 x i64> %sub } @@ -2263,7 +2263,7 @@ define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i1 entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) %sub = sub <4 x i32> %a, %vmull2.i ret <4 x i32> %sub } @@ -2275,7 +2275,7 @@ define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i3 entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) %sub = sub <2 x i64> %a, %vmull2.i ret <2 x i64> %sub } @@ -2287,7 +2287,7 @@ define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) %sub = sub <4 x i32> %a, %vmull2.i ret <4 x i32> %sub } @@ -2299,7 +2299,7 @@ define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) %sub = sub <2 x i64> %a, %vmull2.i ret <2 x i64> %sub } @@ -2310,7 +2310,7 @@ define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) ret <4 x i32> %vmull2.i } @@ -2320,7 +2320,7 @@ define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) ret <2 x i64> %vmull2.i } @@ -2330,7 +2330,7 @@ define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) ret <4 x i32> %vmull2.i } @@ -2340,7 +2340,7 @@ define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) ret <2 x i64> %vmull2.i } @@ -2351,7 +2351,7 @@ define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) ret <4 x i32> %vmull2.i } @@ -2362,7 +2362,7 @@ define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) ret <2 x i64> %vmull2.i } @@ -2373,7 +2373,7 @@ define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) { entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) ret <4 x i32> %vmull2.i } @@ -2384,7 +2384,7 @@ define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) { entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) ret <2 x i64> %vmull2.i } @@ -2394,7 +2394,7 @@ define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) ret <4 x i32> %vmull2.i } @@ -2404,7 +2404,7 @@ define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) ret <2 x i64> %vmull2.i } @@ -2414,7 +2414,7 @@ define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) ret <4 x i32> %vmull2.i } @@ -2424,7 +2424,7 @@ define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) ret <2 x i64> %vmull2.i } @@ -2435,7 +2435,7 @@ define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) ret <4 x i32> %vmull2.i } @@ -2446,7 +2446,7 @@ define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) ret <2 x i64> %vmull2.i } @@ -2457,7 +2457,7 @@ define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) { entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) ret <4 x i32> %vmull2.i } @@ -2468,7 +2468,7 @@ define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) { entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) ret <2 x i64> %vmull2.i } @@ -2478,8 +2478,8 @@ define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vqdmlal2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %vqdmlal4.i = tail call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) + %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) + %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) ret <4 x i32> %vqdmlal4.i } @@ -2489,8 +2489,8 @@ define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vqdmlal2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %vqdmlal4.i = tail call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) + %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) + %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) ret <2 x i64> %vqdmlal4.i } @@ -2501,8 +2501,8 @@ define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vqdmlal2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %vqdmlal4.i = tail call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) + %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) ret <4 x i32> %vqdmlal4.i } @@ -2513,8 +2513,8 @@ define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vqdmlal2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %vqdmlal4.i = tail call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) + %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) ret <2 x i64> %vqdmlal4.i } @@ -2524,8 +2524,8 @@ define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vqdmlsl2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %vqdmlsl4.i = tail call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) + %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) + %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) ret <4 x i32> %vqdmlsl4.i } @@ -2535,8 +2535,8 @@ define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vqdmlsl2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %vqdmlsl4.i = tail call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) + %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) + %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) ret <2 x i64> %vqdmlsl4.i } @@ -2547,8 +2547,8 @@ define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vqdmlsl2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %vqdmlsl4.i = tail call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) + %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) ret <4 x i32> %vqdmlsl4.i } @@ -2559,8 +2559,8 @@ define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vqdmlsl2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %vqdmlsl4.i = tail call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) + %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) ret <2 x i64> %vqdmlsl4.i } @@ -2570,7 +2570,7 @@ define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) + %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) ret <4 x i32> %vqdmull2.i } @@ -2580,7 +2580,7 @@ define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) + %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) ret <2 x i64> %vqdmull2.i } @@ -2590,7 +2590,7 @@ define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) + %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) ret <4 x i32> %vqdmull2.i } @@ -2600,7 +2600,7 @@ define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) + %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) ret <2 x i64> %vqdmull2.i } @@ -2611,7 +2611,7 @@ define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) ret <4 x i32> %vqdmull2.i } @@ -2622,7 +2622,7 @@ define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) ret <2 x i64> %vqdmull2.i } @@ -2633,7 +2633,7 @@ define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) + %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) ret <4 x i32> %vqdmull2.i } @@ -2644,7 +2644,7 @@ define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) + %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) ret <2 x i64> %vqdmull2.i } @@ -2654,7 +2654,7 @@ define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vqdmulh2.i = tail call <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) + %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) ret <4 x i16> %vqdmulh2.i } @@ -2664,7 +2664,7 @@ define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer - %vqdmulh2.i = tail call <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) + %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) ret <8 x i16> %vqdmulh2.i } @@ -2674,7 +2674,7 @@ define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vqdmulh2.i = tail call <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) + %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) ret <2 x i32> %vqdmulh2.i } @@ -2684,7 +2684,7 @@ define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer - %vqdmulh2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) + %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) ret <4 x i32> %vqdmulh2.i } @@ -2694,7 +2694,7 @@ define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vqrdmulh2.i = tail call <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) + %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) ret <4 x i16> %vqrdmulh2.i } @@ -2704,7 +2704,7 @@ define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer - %vqrdmulh2.i = tail call <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) + %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) ret <8 x i16> %vqrdmulh2.i } @@ -2714,7 +2714,7 @@ define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vqrdmulh2.i = tail call <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) + %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) ret <2 x i32> %vqrdmulh2.i } @@ -2724,7 +2724,7 @@ define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer - %vqrdmulh2.i = tail call <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) + %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) ret <4 x i32> %vqrdmulh2.i } @@ -2797,7 +2797,7 @@ define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer - %vmulx2.i = tail call <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) + %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) ret <2 x float> %vmulx2.i } @@ -2807,7 +2807,7 @@ define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer - %vmulx2.i = tail call <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) + %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) ret <4 x float> %vmulx2.i } @@ -2817,7 +2817,7 @@ define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer - %vmulx2.i = tail call <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) + %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) ret <2 x double> %vmulx2.i } @@ -2827,7 +2827,7 @@ define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer - %vmulx2.i = tail call <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) + %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) ret <2 x float> %vmulx2.i } @@ -2837,7 +2837,7 @@ define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer - %vmulx2.i = tail call <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) + %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) ret <4 x float> %vmulx2.i } @@ -2847,7 +2847,7 @@ define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { ; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer - %vmulx2.i = tail call <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) + %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) ret <2 x double> %vmulx2.i } diff --git a/test/CodeGen/ARM64/aarch64-neon-3vdiff.ll b/test/CodeGen/AArch64/arm64-neon-3vdiff.ll similarity index 85% rename from test/CodeGen/ARM64/aarch64-neon-3vdiff.ll rename to test/CodeGen/AArch64/arm64-neon-3vdiff.ll index a479844de8d6..cb9b36c4c183 100644 --- a/test/CodeGen/ARM64/aarch64-neon-3vdiff.ll +++ b/test/CodeGen/AArch64/arm64-neon-3vdiff.ll @@ -1,54 +1,54 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s -declare <8 x i16> @llvm.arm64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) +declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) -declare <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) +declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) -declare <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) -declare <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) +declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) -declare <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) -declare <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) -declare <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) -declare <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32>, <2 x i32>) +declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) -declare <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16>, <4 x i16>) +declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) -declare <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8>, <8 x i8>) +declare <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>) -declare <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32>, <2 x i32>) +declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) -declare <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16>, <4 x i16>) +declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) -declare <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8>, <8 x i8>) +declare <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8>, <8 x i8>) -declare <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32>, <2 x i32>) +declare <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32>, <2 x i32>) -declare <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16>, <4 x i16>) +declare <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16>, <4 x i16>) -declare <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8>, <8 x i8>) +declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>) -declare <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32>, <2 x i32>) +declare <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32>, <2 x i32>) -declare <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16>, <4 x i16>) +declare <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16>, <4 x i16>) -declare <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8>, <8 x i8>) +declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>) -declare <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64>, <2 x i64>) +declare <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64>, <2 x i64>) -declare <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32>, <4 x i32>) +declare <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32>, <4 x i32>) -declare <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>) +declare <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>) -declare <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64>, <2 x i64>) +declare <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64>, <2 x i64>) -declare <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>) +declare <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>) -declare <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16>, <8 x i16>) +declare <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16>, <8 x i16>) define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: test_vaddl_s8: @@ -690,7 +690,7 @@ define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_vraddhn_s16: ; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: - %vraddhn2.i = tail call <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) + %vraddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) ret <8 x i8> %vraddhn2.i } @@ -698,7 +698,7 @@ define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_vraddhn_s32: ; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: - %vraddhn2.i = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) + %vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) ret <4 x i16> %vraddhn2.i } @@ -706,7 +706,7 @@ define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vraddhn_s64: ; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: - %vraddhn2.i = tail call <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) + %vraddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) ret <2 x i32> %vraddhn2.i } @@ -714,7 +714,7 @@ define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_vraddhn_u16: ; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: - %vraddhn2.i = tail call <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) + %vraddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) ret <8 x i8> %vraddhn2.i } @@ -722,7 +722,7 @@ define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_vraddhn_u32: ; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: - %vraddhn2.i = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) + %vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) ret <4 x i16> %vraddhn2.i } @@ -730,7 +730,7 @@ define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vraddhn_u64: ; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: - %vraddhn2.i = tail call <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) + %vraddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) ret <2 x i32> %vraddhn2.i } @@ -738,7 +738,7 @@ define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) ; CHECK-LABEL: test_vraddhn_high_s16: ; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: - %vraddhn2.i.i = tail call <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) + %vraddhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) %0 = bitcast <8 x i8> %r to <1 x i64> %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64> %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> @@ -750,7 +750,7 @@ define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b ; CHECK-LABEL: test_vraddhn_high_s32: ; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: - %vraddhn2.i.i = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) + %vraddhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) %0 = bitcast <4 x i16> %r to <1 x i64> %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64> %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> @@ -762,7 +762,7 @@ define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b ; CHECK-LABEL: test_vraddhn_high_s64: ; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: - %vraddhn2.i.i = tail call <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) + %vraddhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) %0 = bitcast <2 x i32> %r to <1 x i64> %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64> %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> @@ -774,7 +774,7 @@ define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) ; CHECK-LABEL: test_vraddhn_high_u16: ; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: - %vraddhn2.i.i = tail call <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) + %vraddhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) %0 = bitcast <8 x i8> %r to <1 x i64> %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64> %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> @@ -786,7 +786,7 @@ define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b ; CHECK-LABEL: test_vraddhn_high_u32: ; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: - %vraddhn2.i.i = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) + %vraddhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) %0 = bitcast <4 x i16> %r to <1 x i64> %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64> %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> @@ -798,7 +798,7 @@ define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b ; CHECK-LABEL: test_vraddhn_high_u64: ; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: - %vraddhn2.i.i = tail call <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) + %vraddhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) %0 = bitcast <2 x i32> %r to <1 x i64> %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64> %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> @@ -954,7 +954,7 @@ define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_vrsubhn_s16: ; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: - %vrsubhn2.i = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) + %vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) ret <8 x i8> %vrsubhn2.i } @@ -962,7 +962,7 @@ define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_vrsubhn_s32: ; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: - %vrsubhn2.i = tail call <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) + %vrsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) ret <4 x i16> %vrsubhn2.i } @@ -970,7 +970,7 @@ define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vrsubhn_s64: ; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: - %vrsubhn2.i = tail call <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) + %vrsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) ret <2 x i32> %vrsubhn2.i } @@ -978,7 +978,7 @@ define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_vrsubhn_u16: ; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: - %vrsubhn2.i = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) + %vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) ret <8 x i8> %vrsubhn2.i } @@ -986,7 +986,7 @@ define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_vrsubhn_u32: ; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: - %vrsubhn2.i = tail call <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) + %vrsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) ret <4 x i16> %vrsubhn2.i } @@ -994,7 +994,7 @@ define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vrsubhn_u64: ; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: - %vrsubhn2.i = tail call <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) + %vrsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) ret <2 x i32> %vrsubhn2.i } @@ -1002,7 +1002,7 @@ define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) ; CHECK-LABEL: test_vrsubhn_high_s16: ; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: - %vrsubhn2.i.i = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) + %vrsubhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) %0 = bitcast <8 x i8> %r to <1 x i64> %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64> %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> @@ -1014,7 +1014,7 @@ define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b ; CHECK-LABEL: test_vrsubhn_high_s32: ; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: - %vrsubhn2.i.i = tail call <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) + %vrsubhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) %0 = bitcast <4 x i16> %r to <1 x i64> %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64> %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> @@ -1026,7 +1026,7 @@ define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b ; CHECK-LABEL: test_vrsubhn_high_s64: ; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: - %vrsubhn2.i.i = tail call <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) + %vrsubhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) %0 = bitcast <2 x i32> %r to <1 x i64> %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64> %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> @@ -1038,7 +1038,7 @@ define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) ; CHECK-LABEL: test_vrsubhn_high_u16: ; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: - %vrsubhn2.i.i = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) + %vrsubhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) %0 = bitcast <8 x i8> %r to <1 x i64> %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64> %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> @@ -1050,7 +1050,7 @@ define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b ; CHECK-LABEL: test_vrsubhn_high_u32: ; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: - %vrsubhn2.i.i = tail call <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) + %vrsubhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) %0 = bitcast <4 x i16> %r to <1 x i64> %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64> %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> @@ -1062,7 +1062,7 @@ define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b ; CHECK-LABEL: test_vrsubhn_high_u64: ; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: - %vrsubhn2.i.i = tail call <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) + %vrsubhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) %0 = bitcast <2 x i32> %r to <1 x i64> %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64> %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> @@ -1074,7 +1074,7 @@ define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: test_vabdl_s8: ; CHECK: sabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: - %vabd.i.i = tail call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b) + %vabd.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b) %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16> ret <8 x i16> %vmovl.i.i } @@ -1083,7 +1083,7 @@ define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: test_vabdl_s16: ; CHECK: sabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: - %vabd2.i.i = tail call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b) + %vabd2.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b) %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32> ret <4 x i32> %vmovl.i.i } @@ -1092,7 +1092,7 @@ define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vabdl_s32: ; CHECK: sabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: - %vabd2.i.i = tail call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b) + %vabd2.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b) %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64> ret <2 x i64> %vmovl.i.i } @@ -1101,7 +1101,7 @@ define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: test_vabdl_u8: ; CHECK: uabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: - %vabd.i.i = tail call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b) + %vabd.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b) %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16> ret <8 x i16> %vmovl.i.i } @@ -1110,7 +1110,7 @@ define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: test_vabdl_u16: ; CHECK: uabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: - %vabd2.i.i = tail call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b) + %vabd2.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b) %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32> ret <4 x i32> %vmovl.i.i } @@ -1119,7 +1119,7 @@ define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vabdl_u32: ; CHECK: uabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: - %vabd2.i.i = tail call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b) + %vabd2.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b) %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64> ret <2 x i64> %vmovl.i.i } @@ -1128,7 +1128,7 @@ define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { ; CHECK-LABEL: test_vabal_s8: ; CHECK: sabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: - %vabd.i.i.i = tail call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c) + %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c) %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16> %add.i = add <8 x i16> %vmovl.i.i.i, %a ret <8 x i16> %add.i @@ -1138,7 +1138,7 @@ define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { ; CHECK-LABEL: test_vabal_s16: ; CHECK: sabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: - %vabd2.i.i.i = tail call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c) + %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c) %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32> %add.i = add <4 x i32> %vmovl.i.i.i, %a ret <4 x i32> %add.i @@ -1148,7 +1148,7 @@ define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { ; CHECK-LABEL: test_vabal_s32: ; CHECK: sabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: - %vabd2.i.i.i = tail call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c) + %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c) %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64> %add.i = add <2 x i64> %vmovl.i.i.i, %a ret <2 x i64> %add.i @@ -1158,7 +1158,7 @@ define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { ; CHECK-LABEL: test_vabal_u8: ; CHECK: uabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: - %vabd.i.i.i = tail call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c) + %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c) %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16> %add.i = add <8 x i16> %vmovl.i.i.i, %a ret <8 x i16> %add.i @@ -1168,7 +1168,7 @@ define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { ; CHECK-LABEL: test_vabal_u16: ; CHECK: uabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: - %vabd2.i.i.i = tail call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c) + %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c) %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32> %add.i = add <4 x i32> %vmovl.i.i.i, %a ret <4 x i32> %add.i @@ -1178,7 +1178,7 @@ define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { ; CHECK-LABEL: test_vabal_u32: ; CHECK: uabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: - %vabd2.i.i.i = tail call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c) + %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c) %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64> %add.i = add <2 x i64> %vmovl.i.i.i, %a ret <2 x i64> %add.i @@ -1190,7 +1190,7 @@ define <8 x i16> @test_vabdl_high_s8(<16 x i8> %a, <16 x i8> %b) { entry: %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> - %vabd.i.i.i = tail call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) + %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16> ret <8 x i16> %vmovl.i.i.i } @@ -1201,7 +1201,7 @@ define <4 x i32> @test_vabdl_high_s16(<8 x i16> %a, <8 x i16> %b) { entry: %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %vabd2.i.i.i = tail call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) + %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32> ret <4 x i32> %vmovl.i.i.i } @@ -1212,7 +1212,7 @@ define <2 x i64> @test_vabdl_high_s32(<4 x i32> %a, <4 x i32> %b) { entry: %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %vabd2.i.i.i = tail call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) + %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64> ret <2 x i64> %vmovl.i.i.i } @@ -1223,7 +1223,7 @@ define <8 x i16> @test_vabdl_high_u8(<16 x i8> %a, <16 x i8> %b) { entry: %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> - %vabd.i.i.i = tail call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) + %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16> ret <8 x i16> %vmovl.i.i.i } @@ -1234,7 +1234,7 @@ define <4 x i32> @test_vabdl_high_u16(<8 x i16> %a, <8 x i16> %b) { entry: %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %vabd2.i.i.i = tail call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) + %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32> ret <4 x i32> %vmovl.i.i.i } @@ -1245,7 +1245,7 @@ define <2 x i64> @test_vabdl_high_u32(<4 x i32> %a, <4 x i32> %b) { entry: %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %vabd2.i.i.i = tail call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) + %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64> ret <2 x i64> %vmovl.i.i.i } @@ -1256,7 +1256,7 @@ define <8 x i16> @test_vabal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { entry: %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> - %vabd.i.i.i.i = tail call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) + %vabd.i.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16> %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a ret <8 x i16> %add.i.i @@ -1268,7 +1268,7 @@ define <4 x i32> @test_vabal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) entry: %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> - %vabd2.i.i.i.i = tail call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) + %vabd2.i.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32> %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a ret <4 x i32> %add.i.i @@ -1280,7 +1280,7 @@ define <2 x i64> @test_vabal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> - %vabd2.i.i.i.i = tail call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) + %vabd2.i.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64> %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a ret <2 x i64> %add.i.i @@ -1292,7 +1292,7 @@ define <8 x i16> @test_vabal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { entry: %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> - %vabd.i.i.i.i = tail call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) + %vabd.i.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16> %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a ret <8 x i16> %add.i.i @@ -1304,7 +1304,7 @@ define <4 x i32> @test_vabal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) entry: %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> - %vabd2.i.i.i.i = tail call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) + %vabd2.i.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32> %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a ret <4 x i32> %add.i.i @@ -1316,7 +1316,7 @@ define <2 x i64> @test_vabal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> - %vabd2.i.i.i.i = tail call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) + %vabd2.i.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64> %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a ret <2 x i64> %add.i.i @@ -1326,7 +1326,7 @@ define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: test_vmull_s8: ; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: - %vmull.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b) + %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b) ret <8 x i16> %vmull.i } @@ -1334,7 +1334,7 @@ define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: test_vmull_s16: ; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b) ret <4 x i32> %vmull2.i } @@ -1342,7 +1342,7 @@ define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vmull_s32: ; CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b) ret <2 x i64> %vmull2.i } @@ -1350,7 +1350,7 @@ define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: test_vmull_u8: ; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: - %vmull.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b) + %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b) ret <8 x i16> %vmull.i } @@ -1358,7 +1358,7 @@ define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: test_vmull_u16: ; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b) + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b) ret <4 x i32> %vmull2.i } @@ -1366,7 +1366,7 @@ define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vmull_u32: ; CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b) + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b) ret <2 x i64> %vmull2.i } @@ -1376,7 +1376,7 @@ define <8 x i16> @test_vmull_high_s8(<16 x i8> %a, <16 x i8> %b) { entry: %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> - %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) + %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) ret <8 x i16> %vmull.i.i } @@ -1386,7 +1386,7 @@ define <4 x i32> @test_vmull_high_s16(<8 x i16> %a, <8 x i16> %b) { entry: %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) + %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) ret <4 x i32> %vmull2.i.i } @@ -1396,7 +1396,7 @@ define <2 x i64> @test_vmull_high_s32(<4 x i32> %a, <4 x i32> %b) { entry: %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) + %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) ret <2 x i64> %vmull2.i.i } @@ -1406,7 +1406,7 @@ define <8 x i16> @test_vmull_high_u8(<16 x i8> %a, <16 x i8> %b) { entry: %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> - %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) + %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) ret <8 x i16> %vmull.i.i } @@ -1416,7 +1416,7 @@ define <4 x i32> @test_vmull_high_u16(<8 x i16> %a, <8 x i16> %b) { entry: %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) + %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) ret <4 x i32> %vmull2.i.i } @@ -1426,7 +1426,7 @@ define <2 x i64> @test_vmull_high_u32(<4 x i32> %a, <4 x i32> %b) { entry: %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) + %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) ret <2 x i64> %vmull2.i.i } @@ -1434,7 +1434,7 @@ define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { ; CHECK-LABEL: test_vmlal_s8: ; CHECK: smlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: - %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) + %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) %add.i = add <8 x i16> %vmull.i.i, %a ret <8 x i16> %add.i } @@ -1443,7 +1443,7 @@ define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { ; CHECK-LABEL: test_vmlal_s16: ; CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: - %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c) + %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c) %add.i = add <4 x i32> %vmull2.i.i, %a ret <4 x i32> %add.i } @@ -1452,7 +1452,7 @@ define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { ; CHECK-LABEL: test_vmlal_s32: ; CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: - %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c) + %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c) %add.i = add <2 x i64> %vmull2.i.i, %a ret <2 x i64> %add.i } @@ -1461,7 +1461,7 @@ define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { ; CHECK-LABEL: test_vmlal_u8: ; CHECK: umlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: - %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) + %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) %add.i = add <8 x i16> %vmull.i.i, %a ret <8 x i16> %add.i } @@ -1470,7 +1470,7 @@ define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { ; CHECK-LABEL: test_vmlal_u16: ; CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: - %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c) + %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c) %add.i = add <4 x i32> %vmull2.i.i, %a ret <4 x i32> %add.i } @@ -1479,7 +1479,7 @@ define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { ; CHECK-LABEL: test_vmlal_u32: ; CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: - %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c) + %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c) %add.i = add <2 x i64> %vmull2.i.i, %a ret <2 x i64> %add.i } @@ -1490,7 +1490,7 @@ define <8 x i16> @test_vmlal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { entry: %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> - %vmull.i.i.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) + %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) %add.i.i = add <8 x i16> %vmull.i.i.i, %a ret <8 x i16> %add.i.i } @@ -1501,7 +1501,7 @@ define <4 x i32> @test_vmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) entry: %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) + %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) %add.i.i = add <4 x i32> %vmull2.i.i.i, %a ret <4 x i32> %add.i.i } @@ -1512,7 +1512,7 @@ define <2 x i64> @test_vmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) + %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) %add.i.i = add <2 x i64> %vmull2.i.i.i, %a ret <2 x i64> %add.i.i } @@ -1523,7 +1523,7 @@ define <8 x i16> @test_vmlal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { entry: %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> - %vmull.i.i.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) + %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) %add.i.i = add <8 x i16> %vmull.i.i.i, %a ret <8 x i16> %add.i.i } @@ -1534,7 +1534,7 @@ define <4 x i32> @test_vmlal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) entry: %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) + %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) %add.i.i = add <4 x i32> %vmull2.i.i.i, %a ret <4 x i32> %add.i.i } @@ -1545,7 +1545,7 @@ define <2 x i64> @test_vmlal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) + %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) %add.i.i = add <2 x i64> %vmull2.i.i.i, %a ret <2 x i64> %add.i.i } @@ -1554,7 +1554,7 @@ define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { ; CHECK-LABEL: test_vmlsl_s8: ; CHECK: smlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: - %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) + %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) %sub.i = sub <8 x i16> %a, %vmull.i.i ret <8 x i16> %sub.i } @@ -1563,7 +1563,7 @@ define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { ; CHECK-LABEL: test_vmlsl_s16: ; CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: - %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c) + %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c) %sub.i = sub <4 x i32> %a, %vmull2.i.i ret <4 x i32> %sub.i } @@ -1572,7 +1572,7 @@ define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { ; CHECK-LABEL: test_vmlsl_s32: ; CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: - %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c) + %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c) %sub.i = sub <2 x i64> %a, %vmull2.i.i ret <2 x i64> %sub.i } @@ -1581,7 +1581,7 @@ define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { ; CHECK-LABEL: test_vmlsl_u8: ; CHECK: umlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: - %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) + %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) %sub.i = sub <8 x i16> %a, %vmull.i.i ret <8 x i16> %sub.i } @@ -1590,7 +1590,7 @@ define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { ; CHECK-LABEL: test_vmlsl_u16: ; CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: - %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c) + %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c) %sub.i = sub <4 x i32> %a, %vmull2.i.i ret <4 x i32> %sub.i } @@ -1599,7 +1599,7 @@ define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { ; CHECK-LABEL: test_vmlsl_u32: ; CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: - %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c) + %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c) %sub.i = sub <2 x i64> %a, %vmull2.i.i ret <2 x i64> %sub.i } @@ -1610,7 +1610,7 @@ define <8 x i16> @test_vmlsl_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { entry: %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> - %vmull.i.i.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) + %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i ret <8 x i16> %sub.i.i } @@ -1621,7 +1621,7 @@ define <4 x i32> @test_vmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) entry: %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) + %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i ret <4 x i32> %sub.i.i } @@ -1632,7 +1632,7 @@ define <2 x i64> @test_vmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) + %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i ret <2 x i64> %sub.i.i } @@ -1643,7 +1643,7 @@ define <8 x i16> @test_vmlsl_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { entry: %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> - %vmull.i.i.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) + %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i ret <8 x i16> %sub.i.i } @@ -1654,7 +1654,7 @@ define <4 x i32> @test_vmlsl_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) entry: %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) + %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i ret <4 x i32> %sub.i.i } @@ -1665,7 +1665,7 @@ define <2 x i64> @test_vmlsl_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) + %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i ret <2 x i64> %sub.i.i } @@ -1674,7 +1674,7 @@ define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: test_vqdmull_s16: ; CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: - %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b) + %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b) ret <4 x i32> %vqdmull2.i } @@ -1682,7 +1682,7 @@ define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vqdmull_s32: ; CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: - %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b) + %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b) ret <2 x i64> %vqdmull2.i } @@ -1690,8 +1690,8 @@ define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { ; CHECK-LABEL: test_vqdmlal_s16: ; CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: - %vqdmlal2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) - %vqdmlal4.i = tail call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) + %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) + %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) ret <4 x i32> %vqdmlal4.i } @@ -1699,8 +1699,8 @@ define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { ; CHECK-LABEL: test_vqdmlal_s32: ; CHECK: sqdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: - %vqdmlal2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) - %vqdmlal4.i = tail call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) + %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) + %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) ret <2 x i64> %vqdmlal4.i } @@ -1708,8 +1708,8 @@ define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { ; CHECK-LABEL: test_vqdmlsl_s16: ; CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: - %vqdmlsl2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) - %vqdmlsl4.i = tail call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) + %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) + %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) ret <4 x i32> %vqdmlsl4.i } @@ -1717,8 +1717,8 @@ define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { ; CHECK-LABEL: test_vqdmlsl_s32: ; CHECK: sqdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: - %vqdmlsl2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) - %vqdmlsl4.i = tail call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) + %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) + %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) ret <2 x i64> %vqdmlsl4.i } @@ -1728,7 +1728,7 @@ define <4 x i32> @test_vqdmull_high_s16(<8 x i16> %a, <8 x i16> %b) { entry: %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %vqdmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) + %vqdmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) ret <4 x i32> %vqdmull2.i.i } @@ -1738,7 +1738,7 @@ define <2 x i64> @test_vqdmull_high_s32(<4 x i32> %a, <4 x i32> %b) { entry: %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %vqdmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) + %vqdmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) ret <2 x i64> %vqdmull2.i.i } @@ -1748,8 +1748,8 @@ define <4 x i32> @test_vqdmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c entry: %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> - %vqdmlal2.i.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - %vqdmlal4.i.i = tail call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i.i) + %vqdmlal2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) + %vqdmlal4.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i.i) ret <4 x i32> %vqdmlal4.i.i } @@ -1759,8 +1759,8 @@ define <2 x i64> @test_vqdmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> - %vqdmlal2.i.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - %vqdmlal4.i.i = tail call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i.i) + %vqdmlal2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) + %vqdmlal4.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i.i) ret <2 x i64> %vqdmlal4.i.i } @@ -1770,8 +1770,8 @@ define <4 x i32> @test_vqdmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c entry: %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> - %vqdmlsl2.i.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - %vqdmlsl4.i.i = tail call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i.i) + %vqdmlsl2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) + %vqdmlsl4.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i.i) ret <4 x i32> %vqdmlsl4.i.i } @@ -1781,8 +1781,8 @@ define <2 x i64> @test_vqdmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> - %vqdmlsl2.i.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - %vqdmlsl4.i.i = tail call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i.i) + %vqdmlsl2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) + %vqdmlsl4.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i.i) ret <2 x i64> %vqdmlsl4.i.i } @@ -1790,7 +1790,7 @@ define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: test_vmull_p8: ; CHECK: pmull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: - %vmull.i = tail call <8 x i16> @llvm.arm64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b) + %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b) ret <8 x i16> %vmull.i } @@ -1800,7 +1800,7 @@ define <8 x i16> @test_vmull_high_p8(<16 x i8> %a, <16 x i8> %b) { entry: %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> - %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.pmull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) + %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) ret <8 x i16> %vmull.i.i } @@ -1808,7 +1808,7 @@ define i128 @test_vmull_p64(i64 %a, i64 %b) #4 { ; CHECK-LABEL: test_vmull_p64 ; CHECK: pmull {{v[0-9]+}}.1q, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d entry: - %vmull2.i = tail call <16 x i8> @llvm.arm64.neon.pmull64(i64 %a, i64 %b) + %vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %a, i64 %b) %vmull3.i = bitcast <16 x i8> %vmull2.i to i128 ret i128 %vmull3.i } @@ -1819,11 +1819,11 @@ define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #4 { entry: %0 = extractelement <2 x i64> %a, i32 1 %1 = extractelement <2 x i64> %b, i32 1 - %vmull2.i.i = tail call <16 x i8> @llvm.arm64.neon.pmull64(i64 %0, i64 %1) #1 + %vmull2.i.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %0, i64 %1) #1 %vmull3.i.i = bitcast <16 x i8> %vmull2.i.i to i128 ret i128 %vmull3.i.i } -declare <16 x i8> @llvm.arm64.neon.pmull64(i64, i64) #5 +declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) #5 diff --git a/test/CodeGen/ARM64/aarch64-neon-aba-abd.ll b/test/CodeGen/AArch64/arm64-neon-aba-abd.ll similarity index 58% rename from test/CodeGen/ARM64/aarch64-neon-aba-abd.ll rename to test/CodeGen/AArch64/arm64-neon-aba-abd.ll index 9d321b239763..6404ab728011 100644 --- a/test/CodeGen/ARM64/aarch64-neon-aba-abd.ll +++ b/test/CodeGen/AArch64/arm64-neon-aba-abd.ll @@ -1,18 +1,18 @@ ; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -declare <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8>, <8 x i8>) +declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>) +declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>) define <8 x i8> @test_uabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { ; CHECK: test_uabd_v8i8: - %abd = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) + %abd = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) ; CHECK: uabd v0.8b, v0.8b, v1.8b ret <8 x i8> %abd } define <8 x i8> @test_uaba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { ; CHECK: test_uaba_v8i8: - %abd = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) + %abd = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) %aba = add <8 x i8> %lhs, %abd ; CHECK: uaba v0.8b, v0.8b, v1.8b ret <8 x i8> %aba @@ -20,32 +20,32 @@ define <8 x i8> @test_uaba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { define <8 x i8> @test_sabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { ; CHECK: test_sabd_v8i8: - %abd = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) + %abd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) ; CHECK: sabd v0.8b, v0.8b, v1.8b ret <8 x i8> %abd } define <8 x i8> @test_saba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { ; CHECK: test_saba_v8i8: - %abd = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) + %abd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) %aba = add <8 x i8> %lhs, %abd ; CHECK: saba v0.8b, v0.8b, v1.8b ret <8 x i8> %aba } -declare <16 x i8> @llvm.arm64.neon.uabd.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm64.neon.sabd.v16i8(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8>, <16 x i8>) define <16 x i8> @test_uabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { ; CHECK: test_uabd_v16i8: - %abd = call <16 x i8> @llvm.arm64.neon.uabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) + %abd = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) ; CHECK: uabd v0.16b, v0.16b, v1.16b ret <16 x i8> %abd } define <16 x i8> @test_uaba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { ; CHECK: test_uaba_v16i8: - %abd = call <16 x i8> @llvm.arm64.neon.uabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) + %abd = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) %aba = add <16 x i8> %lhs, %abd ; CHECK: uaba v0.16b, v0.16b, v1.16b ret <16 x i8> %aba @@ -53,32 +53,32 @@ define <16 x i8> @test_uaba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { define <16 x i8> @test_sabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { ; CHECK: test_sabd_v16i8: - %abd = call <16 x i8> @llvm.arm64.neon.sabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) + %abd = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) ; CHECK: sabd v0.16b, v0.16b, v1.16b ret <16 x i8> %abd } define <16 x i8> @test_saba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { ; CHECK: test_saba_v16i8: - %abd = call <16 x i8> @llvm.arm64.neon.sabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) + %abd = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) %aba = add <16 x i8> %lhs, %abd ; CHECK: saba v0.16b, v0.16b, v1.16b ret <16 x i8> %aba } -declare <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16>, <4 x i16>) +declare <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16>, <4 x i16>) +declare <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16>, <4 x i16>) define <4 x i16> @test_uabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { ; CHECK: test_uabd_v4i16: - %abd = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) + %abd = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) ; CHECK: uabd v0.4h, v0.4h, v1.4h ret <4 x i16> %abd } define <4 x i16> @test_uaba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { ; CHECK: test_uaba_v4i16: - %abd = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) + %abd = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) %aba = add <4 x i16> %lhs, %abd ; CHECK: uaba v0.4h, v0.4h, v1.4h ret <4 x i16> %aba @@ -86,32 +86,32 @@ define <4 x i16> @test_uaba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { define <4 x i16> @test_sabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { ; CHECK: test_sabd_v4i16: - %abd = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) + %abd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) ; CHECK: sabd v0.4h, v0.4h, v1.4h ret <4 x i16> %abd } define <4 x i16> @test_saba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { ; CHECK: test_saba_v4i16: - %abd = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) + %abd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) %aba = add <4 x i16> %lhs, %abd ; CHECK: saba v0.4h, v0.4h, v1.4h ret <4 x i16> %aba } -declare <8 x i16> @llvm.arm64.neon.uabd.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm64.neon.sabd.v8i16(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16>, <8 x i16>) define <8 x i16> @test_uabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { ; CHECK: test_uabd_v8i16: - %abd = call <8 x i16> @llvm.arm64.neon.uabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) + %abd = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) ; CHECK: uabd v0.8h, v0.8h, v1.8h ret <8 x i16> %abd } define <8 x i16> @test_uaba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { ; CHECK: test_uaba_v8i16: - %abd = call <8 x i16> @llvm.arm64.neon.uabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) + %abd = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) %aba = add <8 x i16> %lhs, %abd ; CHECK: uaba v0.8h, v0.8h, v1.8h ret <8 x i16> %aba @@ -119,32 +119,32 @@ define <8 x i16> @test_uaba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { define <8 x i16> @test_sabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { ; CHECK: test_sabd_v8i16: - %abd = call <8 x i16> @llvm.arm64.neon.sabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) + %abd = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) ; CHECK: sabd v0.8h, v0.8h, v1.8h ret <8 x i16> %abd } define <8 x i16> @test_saba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { ; CHECK: test_saba_v8i16: - %abd = call <8 x i16> @llvm.arm64.neon.sabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) + %abd = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) %aba = add <8 x i16> %lhs, %abd ; CHECK: saba v0.8h, v0.8h, v1.8h ret <8 x i16> %aba } -declare <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32>, <2 x i32>) +declare <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32>, <2 x i32>) +declare <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32>, <2 x i32>) define <2 x i32> @test_uabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK: test_uabd_v2i32: - %abd = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) + %abd = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ; CHECK: uabd v0.2s, v0.2s, v1.2s ret <2 x i32> %abd } define <2 x i32> @test_uaba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK: test_uaba_v2i32: - %abd = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) + %abd = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) %aba = add <2 x i32> %lhs, %abd ; CHECK: uaba v0.2s, v0.2s, v1.2s ret <2 x i32> %aba @@ -152,7 +152,7 @@ define <2 x i32> @test_uaba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { define <2 x i32> @test_sabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK: test_sabd_v2i32: - %abd = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) + %abd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ; CHECK: sabd v0.2s, v0.2s, v1.2s ret <2 x i32> %abd } @@ -161,7 +161,7 @@ define <2 x i32> @test_sabd_v2i32_const() { ; CHECK: test_sabd_v2i32_const: ; CHECK: movi d1, #0x00ffffffff0000 ; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s - %1 = tail call <2 x i32> @llvm.arm64.neon.sabd.v2i32( + %1 = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32( <2 x i32> , <2 x i32> ) ret <2 x i32> %1 @@ -169,25 +169,25 @@ define <2 x i32> @test_sabd_v2i32_const() { define <2 x i32> @test_saba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK: test_saba_v2i32: - %abd = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) + %abd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) %aba = add <2 x i32> %lhs, %abd ; CHECK: saba v0.2s, v0.2s, v1.2s ret <2 x i32> %aba } -declare <4 x i32> @llvm.arm64.neon.uabd.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm64.neon.sabd.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32>, <4 x i32>) define <4 x i32> @test_uabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ; CHECK: test_uabd_v4i32: - %abd = call <4 x i32> @llvm.arm64.neon.uabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) + %abd = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) ; CHECK: uabd v0.4s, v0.4s, v1.4s ret <4 x i32> %abd } define <4 x i32> @test_uaba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ; CHECK: test_uaba_v4i32: - %abd = call <4 x i32> @llvm.arm64.neon.uabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) + %abd = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) %aba = add <4 x i32> %lhs, %abd ; CHECK: uaba v0.4s, v0.4s, v1.4s ret <4 x i32> %aba @@ -195,42 +195,42 @@ define <4 x i32> @test_uaba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { define <4 x i32> @test_sabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ; CHECK: test_sabd_v4i32: - %abd = call <4 x i32> @llvm.arm64.neon.sabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) + %abd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) ; CHECK: sabd v0.4s, v0.4s, v1.4s ret <4 x i32> %abd } define <4 x i32> @test_saba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ; CHECK: test_saba_v4i32: - %abd = call <4 x i32> @llvm.arm64.neon.sabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) + %abd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) %aba = add <4 x i32> %lhs, %abd ; CHECK: saba v0.4s, v0.4s, v1.4s ret <4 x i32> %aba } -declare <2 x float> @llvm.arm64.neon.fabd.v2f32(<2 x float>, <2 x float>) +declare <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float>, <2 x float>) define <2 x float> @test_fabd_v2f32(<2 x float> %lhs, <2 x float> %rhs) { ; CHECK: test_fabd_v2f32: - %abd = call <2 x float> @llvm.arm64.neon.fabd.v2f32(<2 x float> %lhs, <2 x float> %rhs) + %abd = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %lhs, <2 x float> %rhs) ; CHECK: fabd v0.2s, v0.2s, v1.2s ret <2 x float> %abd } -declare <4 x float> @llvm.arm64.neon.fabd.v4f32(<4 x float>, <4 x float>) +declare <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float>, <4 x float>) define <4 x float> @test_fabd_v4f32(<4 x float> %lhs, <4 x float> %rhs) { ; CHECK: test_fabd_v4f32: - %abd = call <4 x float> @llvm.arm64.neon.fabd.v4f32(<4 x float> %lhs, <4 x float> %rhs) + %abd = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %lhs, <4 x float> %rhs) ; CHECK: fabd v0.4s, v0.4s, v1.4s ret <4 x float> %abd } -declare <2 x double> @llvm.arm64.neon.fabd.v2f64(<2 x double>, <2 x double>) +declare <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double>, <2 x double>) define <2 x double> @test_fabd_v2f64(<2 x double> %lhs, <2 x double> %rhs) { ; CHECK: test_fabd_v2f64: - %abd = call <2 x double> @llvm.arm64.neon.fabd.v2f64(<2 x double> %lhs, <2 x double> %rhs) + %abd = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %lhs, <2 x double> %rhs) ; CHECK: fabd v0.2d, v0.2d, v1.2d ret <2 x double> %abd } diff --git a/test/CodeGen/ARM64/aarch64-neon-across.ll b/test/CodeGen/AArch64/arm64-neon-across.ll similarity index 56% rename from test/CodeGen/ARM64/aarch64-neon-across.ll rename to test/CodeGen/AArch64/arm64-neon-across.ll index 5a3538ba88ec..3a63673f1209 100644 --- a/test/CodeGen/ARM64/aarch64-neon-across.ll +++ b/test/CodeGen/AArch64/arm64-neon-across.ll @@ -1,88 +1,88 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s -declare float @llvm.arm64.neon.fminnmv.f32.v4f32(<4 x float>) +declare float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float>) -declare float @llvm.arm64.neon.fmaxnmv.f32.v4f32(<4 x float>) +declare float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float>) -declare float @llvm.arm64.neon.fminv.f32.v4f32(<4 x float>) +declare float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float>) -declare float @llvm.arm64.neon.fmaxv.f32.v4f32(<4 x float>) +declare float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float>) -declare i32 @llvm.arm64.neon.saddv.i32.v4i32(<4 x i32>) +declare i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32>) -declare i32 @llvm.arm64.neon.saddv.i32.v8i16(<8 x i16>) +declare i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16>) -declare i32 @llvm.arm64.neon.saddv.i32.v16i8(<16 x i8>) +declare i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8>) -declare i32 @llvm.arm64.neon.saddv.i32.v4i16(<4 x i16>) +declare i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16>) -declare i32 @llvm.arm64.neon.saddv.i32.v8i8(<8 x i8>) +declare i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8>) -declare i32 @llvm.arm64.neon.uminv.i32.v4i32(<4 x i32>) +declare i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32>) -declare i32 @llvm.arm64.neon.uminv.i32.v8i16(<8 x i16>) +declare i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16>) -declare i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8>) +declare i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8>) -declare i32 @llvm.arm64.neon.sminv.i32.v4i32(<4 x i32>) +declare i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32>) -declare i32 @llvm.arm64.neon.sminv.i32.v8i16(<8 x i16>) +declare i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16>) -declare i32 @llvm.arm64.neon.sminv.i32.v16i8(<16 x i8>) +declare i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8>) -declare i32 @llvm.arm64.neon.uminv.i32.v4i16(<4 x i16>) +declare i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16>) -declare i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8>) +declare i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8>) -declare i32 @llvm.arm64.neon.sminv.i32.v4i16(<4 x i16>) +declare i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16>) -declare i32 @llvm.arm64.neon.sminv.i32.v8i8(<8 x i8>) +declare i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8>) -declare i32 @llvm.arm64.neon.umaxv.i32.v4i32(<4 x i32>) +declare i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32>) -declare i32 @llvm.arm64.neon.umaxv.i32.v8i16(<8 x i16>) +declare i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16>) -declare i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8>) +declare i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8>) -declare i32 @llvm.arm64.neon.smaxv.i32.v4i32(<4 x i32>) +declare i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32>) -declare i32 @llvm.arm64.neon.smaxv.i32.v8i16(<8 x i16>) +declare i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16>) -declare i32 @llvm.arm64.neon.smaxv.i32.v16i8(<16 x i8>) +declare i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8>) -declare i32 @llvm.arm64.neon.umaxv.i32.v4i16(<4 x i16>) +declare i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16>) -declare i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8>) +declare i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8>) -declare i32 @llvm.arm64.neon.smaxv.i32.v4i16(<4 x i16>) +declare i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16>) -declare i32 @llvm.arm64.neon.smaxv.i32.v8i8(<8 x i8>) +declare i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8>) -declare i64 @llvm.arm64.neon.uaddlv.i64.v4i32(<4 x i32>) +declare i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32>) -declare i32 @llvm.arm64.neon.uaddlv.i32.v8i16(<8 x i16>) +declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16>) -declare i32 @llvm.arm64.neon.uaddlv.i32.v16i8(<16 x i8>) +declare i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8>) -declare i64 @llvm.arm64.neon.saddlv.i64.v4i32(<4 x i32>) +declare i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32>) -declare i32 @llvm.arm64.neon.saddlv.i32.v8i16(<8 x i16>) +declare i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16>) -declare i32 @llvm.arm64.neon.saddlv.i32.v16i8(<16 x i8>) +declare i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8>) -declare i32 @llvm.arm64.neon.uaddlv.i32.v4i16(<4 x i16>) +declare i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16>) -declare i32 @llvm.arm64.neon.uaddlv.i32.v8i8(<8 x i8>) +declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8>) -declare i32 @llvm.arm64.neon.saddlv.i32.v4i16(<4 x i16>) +declare i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16>) -declare i32 @llvm.arm64.neon.saddlv.i32.v8i8(<8 x i8>) +declare i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8>) define i16 @test_vaddlv_s8(<8 x i8> %a) { ; CHECK: test_vaddlv_s8: ; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.8b entry: - %saddlvv.i = tail call i32 @llvm.arm64.neon.saddlv.i32.v8i8(<8 x i8> %a) + %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> %a) %0 = trunc i32 %saddlvv.i to i16 ret i16 %0 } @@ -91,7 +91,7 @@ define i32 @test_vaddlv_s16(<4 x i16> %a) { ; CHECK: test_vaddlv_s16: ; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.4h entry: - %saddlvv.i = tail call i32 @llvm.arm64.neon.saddlv.i32.v4i16(<4 x i16> %a) + %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> %a) ret i32 %saddlvv.i } @@ -99,7 +99,7 @@ define i16 @test_vaddlv_u8(<8 x i8> %a) { ; CHECK: test_vaddlv_u8: ; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.8b entry: - %uaddlvv.i = tail call i32 @llvm.arm64.neon.uaddlv.i32.v8i8(<8 x i8> %a) + %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a) %0 = trunc i32 %uaddlvv.i to i16 ret i16 %0 } @@ -108,7 +108,7 @@ define i32 @test_vaddlv_u16(<4 x i16> %a) { ; CHECK: test_vaddlv_u16: ; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.4h entry: - %uaddlvv.i = tail call i32 @llvm.arm64.neon.uaddlv.i32.v4i16(<4 x i16> %a) + %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> %a) ret i32 %uaddlvv.i } @@ -116,7 +116,7 @@ define i16 @test_vaddlvq_s8(<16 x i8> %a) { ; CHECK: test_vaddlvq_s8: ; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.16b entry: - %saddlvv.i = tail call i32 @llvm.arm64.neon.saddlv.i32.v16i8(<16 x i8> %a) + %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> %a) %0 = trunc i32 %saddlvv.i to i16 ret i16 %0 } @@ -125,7 +125,7 @@ define i32 @test_vaddlvq_s16(<8 x i16> %a) { ; CHECK: test_vaddlvq_s16: ; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.8h entry: - %saddlvv.i = tail call i32 @llvm.arm64.neon.saddlv.i32.v8i16(<8 x i16> %a) + %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> %a) ret i32 %saddlvv.i } @@ -133,7 +133,7 @@ define i64 @test_vaddlvq_s32(<4 x i32> %a) { ; CHECK: test_vaddlvq_s32: ; CHECK: saddlv d{{[0-9]+}}, {{v[0-9]+}}.4s entry: - %saddlvv.i = tail call i64 @llvm.arm64.neon.saddlv.i64.v4i32(<4 x i32> %a) + %saddlvv.i = tail call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %a) ret i64 %saddlvv.i } @@ -141,7 +141,7 @@ define i16 @test_vaddlvq_u8(<16 x i8> %a) { ; CHECK: test_vaddlvq_u8: ; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.16b entry: - %uaddlvv.i = tail call i32 @llvm.arm64.neon.uaddlv.i32.v16i8(<16 x i8> %a) + %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a) %0 = trunc i32 %uaddlvv.i to i16 ret i16 %0 } @@ -150,7 +150,7 @@ define i32 @test_vaddlvq_u16(<8 x i16> %a) { ; CHECK: test_vaddlvq_u16: ; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.8h entry: - %uaddlvv.i = tail call i32 @llvm.arm64.neon.uaddlv.i32.v8i16(<8 x i16> %a) + %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> %a) ret i32 %uaddlvv.i } @@ -158,7 +158,7 @@ define i64 @test_vaddlvq_u32(<4 x i32> %a) { ; CHECK: test_vaddlvq_u32: ; CHECK: uaddlv d{{[0-9]+}}, {{v[0-9]+}}.4s entry: - %uaddlvv.i = tail call i64 @llvm.arm64.neon.uaddlv.i64.v4i32(<4 x i32> %a) + %uaddlvv.i = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %a) ret i64 %uaddlvv.i } @@ -166,7 +166,7 @@ define i8 @test_vmaxv_s8(<8 x i8> %a) { ; CHECK: test_vmaxv_s8: ; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.8b entry: - %smaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v8i8(<8 x i8> %a) + %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a) %0 = trunc i32 %smaxv.i to i8 ret i8 %0 } @@ -175,7 +175,7 @@ define i16 @test_vmaxv_s16(<4 x i16> %a) { ; CHECK: test_vmaxv_s16: ; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.4h entry: - %smaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v4i16(<4 x i16> %a) + %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a) %0 = trunc i32 %smaxv.i to i16 ret i16 %0 } @@ -184,7 +184,7 @@ define i8 @test_vmaxv_u8(<8 x i8> %a) { ; CHECK: test_vmaxv_u8: ; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.8b entry: - %umaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8> %a) + %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a) %0 = trunc i32 %umaxv.i to i8 ret i8 %0 } @@ -193,7 +193,7 @@ define i16 @test_vmaxv_u16(<4 x i16> %a) { ; CHECK: test_vmaxv_u16: ; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.4h entry: - %umaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v4i16(<4 x i16> %a) + %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a) %0 = trunc i32 %umaxv.i to i16 ret i16 %0 } @@ -202,7 +202,7 @@ define i8 @test_vmaxvq_s8(<16 x i8> %a) { ; CHECK: test_vmaxvq_s8: ; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.16b entry: - %smaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v16i8(<16 x i8> %a) + %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a) %0 = trunc i32 %smaxv.i to i8 ret i8 %0 } @@ -211,7 +211,7 @@ define i16 @test_vmaxvq_s16(<8 x i16> %a) { ; CHECK: test_vmaxvq_s16: ; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.8h entry: - %smaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v8i16(<8 x i16> %a) + %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a) %0 = trunc i32 %smaxv.i to i16 ret i16 %0 } @@ -220,7 +220,7 @@ define i32 @test_vmaxvq_s32(<4 x i32> %a) { ; CHECK: test_vmaxvq_s32: ; CHECK: smaxv s{{[0-9]+}}, {{v[0-9]+}}.4s entry: - %smaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v4i32(<4 x i32> %a) + %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a) ret i32 %smaxv.i } @@ -228,7 +228,7 @@ define i8 @test_vmaxvq_u8(<16 x i8> %a) { ; CHECK: test_vmaxvq_u8: ; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.16b entry: - %umaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8> %a) + %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a) %0 = trunc i32 %umaxv.i to i8 ret i8 %0 } @@ -237,7 +237,7 @@ define i16 @test_vmaxvq_u16(<8 x i16> %a) { ; CHECK: test_vmaxvq_u16: ; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.8h entry: - %umaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v8i16(<8 x i16> %a) + %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a) %0 = trunc i32 %umaxv.i to i16 ret i16 %0 } @@ -246,7 +246,7 @@ define i32 @test_vmaxvq_u32(<4 x i32> %a) { ; CHECK: test_vmaxvq_u32: ; CHECK: umaxv s{{[0-9]+}}, {{v[0-9]+}}.4s entry: - %umaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v4i32(<4 x i32> %a) + %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> %a) ret i32 %umaxv.i } @@ -254,7 +254,7 @@ define i8 @test_vminv_s8(<8 x i8> %a) { ; CHECK: test_vminv_s8: ; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.8b entry: - %sminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v8i8(<8 x i8> %a) + %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a) %0 = trunc i32 %sminv.i to i8 ret i8 %0 } @@ -263,7 +263,7 @@ define i16 @test_vminv_s16(<4 x i16> %a) { ; CHECK: test_vminv_s16: ; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.4h entry: - %sminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v4i16(<4 x i16> %a) + %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a) %0 = trunc i32 %sminv.i to i16 ret i16 %0 } @@ -272,7 +272,7 @@ define i8 @test_vminv_u8(<8 x i8> %a) { ; CHECK: test_vminv_u8: ; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.8b entry: - %uminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8> %a) + %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a) %0 = trunc i32 %uminv.i to i8 ret i8 %0 } @@ -281,7 +281,7 @@ define i16 @test_vminv_u16(<4 x i16> %a) { ; CHECK: test_vminv_u16: ; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.4h entry: - %uminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v4i16(<4 x i16> %a) + %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a) %0 = trunc i32 %uminv.i to i16 ret i16 %0 } @@ -290,7 +290,7 @@ define i8 @test_vminvq_s8(<16 x i8> %a) { ; CHECK: test_vminvq_s8: ; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.16b entry: - %sminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v16i8(<16 x i8> %a) + %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a) %0 = trunc i32 %sminv.i to i8 ret i8 %0 } @@ -299,7 +299,7 @@ define i16 @test_vminvq_s16(<8 x i16> %a) { ; CHECK: test_vminvq_s16: ; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.8h entry: - %sminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v8i16(<8 x i16> %a) + %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a) %0 = trunc i32 %sminv.i to i16 ret i16 %0 } @@ -308,7 +308,7 @@ define i32 @test_vminvq_s32(<4 x i32> %a) { ; CHECK: test_vminvq_s32: ; CHECK: sminv s{{[0-9]+}}, {{v[0-9]+}}.4s entry: - %sminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v4i32(<4 x i32> %a) + %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a) ret i32 %sminv.i } @@ -316,7 +316,7 @@ define i8 @test_vminvq_u8(<16 x i8> %a) { ; CHECK: test_vminvq_u8: ; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.16b entry: - %uminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8> %a) + %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a) %0 = trunc i32 %uminv.i to i8 ret i8 %0 } @@ -325,7 +325,7 @@ define i16 @test_vminvq_u16(<8 x i16> %a) { ; CHECK: test_vminvq_u16: ; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.8h entry: - %uminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v8i16(<8 x i16> %a) + %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a) %0 = trunc i32 %uminv.i to i16 ret i16 %0 } @@ -334,7 +334,7 @@ define i32 @test_vminvq_u32(<4 x i32> %a) { ; CHECK: test_vminvq_u32: ; CHECK: uminv s{{[0-9]+}}, {{v[0-9]+}}.4s entry: - %uminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v4i32(<4 x i32> %a) + %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> %a) ret i32 %uminv.i } @@ -342,7 +342,7 @@ define i8 @test_vaddv_s8(<8 x i8> %a) { ; CHECK: test_vaddv_s8: ; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b entry: - %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v8i8(<8 x i8> %a) + %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a) %0 = trunc i32 %vaddv.i to i8 ret i8 %0 } @@ -351,7 +351,7 @@ define i16 @test_vaddv_s16(<4 x i16> %a) { ; CHECK: test_vaddv_s16: ; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h entry: - %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v4i16(<4 x i16> %a) + %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a) %0 = trunc i32 %vaddv.i to i16 ret i16 %0 } @@ -360,7 +360,7 @@ define i8 @test_vaddv_u8(<8 x i8> %a) { ; CHECK: test_vaddv_u8: ; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b entry: - %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v8i8(<8 x i8> %a) + %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a) %0 = trunc i32 %vaddv.i to i8 ret i8 %0 } @@ -369,7 +369,7 @@ define i16 @test_vaddv_u16(<4 x i16> %a) { ; CHECK: test_vaddv_u16: ; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h entry: - %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v4i16(<4 x i16> %a) + %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a) %0 = trunc i32 %vaddv.i to i16 ret i16 %0 } @@ -378,7 +378,7 @@ define i8 @test_vaddvq_s8(<16 x i8> %a) { ; CHECK: test_vaddvq_s8: ; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b entry: - %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v16i8(<16 x i8> %a) + %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a) %0 = trunc i32 %vaddv.i to i8 ret i8 %0 } @@ -387,7 +387,7 @@ define i16 @test_vaddvq_s16(<8 x i16> %a) { ; CHECK: test_vaddvq_s16: ; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h entry: - %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v8i16(<8 x i16> %a) + %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a) %0 = trunc i32 %vaddv.i to i16 ret i16 %0 } @@ -396,7 +396,7 @@ define i32 @test_vaddvq_s32(<4 x i32> %a) { ; CHECK: test_vaddvq_s32: ; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s entry: - %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v4i32(<4 x i32> %a) + %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a) ret i32 %vaddv.i } @@ -404,7 +404,7 @@ define i8 @test_vaddvq_u8(<16 x i8> %a) { ; CHECK: test_vaddvq_u8: ; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b entry: - %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v16i8(<16 x i8> %a) + %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a) %0 = trunc i32 %vaddv.i to i8 ret i8 %0 } @@ -413,7 +413,7 @@ define i16 @test_vaddvq_u16(<8 x i16> %a) { ; CHECK: test_vaddvq_u16: ; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h entry: - %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v8i16(<8 x i16> %a) + %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a) %0 = trunc i32 %vaddv.i to i16 ret i16 %0 } @@ -422,7 +422,7 @@ define i32 @test_vaddvq_u32(<4 x i32> %a) { ; CHECK: test_vaddvq_u32: ; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s entry: - %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v4i32(<4 x i32> %a) + %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a) ret i32 %vaddv.i } @@ -430,7 +430,7 @@ define float @test_vmaxvq_f32(<4 x float> %a) { ; CHECK: test_vmaxvq_f32: ; CHECK: fmaxv s{{[0-9]+}}, {{v[0-9]+}}.4s entry: - %0 = call float @llvm.arm64.neon.fmaxv.f32.v4f32(<4 x float> %a) + %0 = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %a) ret float %0 } @@ -438,7 +438,7 @@ define float @test_vminvq_f32(<4 x float> %a) { ; CHECK: test_vminvq_f32: ; CHECK: fminv s{{[0-9]+}}, {{v[0-9]+}}.4s entry: - %0 = call float @llvm.arm64.neon.fminv.f32.v4f32(<4 x float> %a) + %0 = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %a) ret float %0 } @@ -446,7 +446,7 @@ define float @test_vmaxnmvq_f32(<4 x float> %a) { ; CHECK: test_vmaxnmvq_f32: ; CHECK: fmaxnmv s{{[0-9]+}}, {{v[0-9]+}}.4s entry: - %0 = call float @llvm.arm64.neon.fmaxnmv.f32.v4f32(<4 x float> %a) + %0 = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %a) ret float %0 } @@ -454,7 +454,7 @@ define float @test_vminnmvq_f32(<4 x float> %a) { ; CHECK: test_vminnmvq_f32: ; CHECK: fminnmv s{{[0-9]+}}, {{v[0-9]+}}.4s entry: - %0 = call float @llvm.arm64.neon.fminnmv.f32.v4f32(<4 x float> %a) + %0 = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %a) ret float %0 } diff --git a/test/CodeGen/ARM64/aarch64-neon-add-pairwise.ll b/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll similarity index 51% rename from test/CodeGen/ARM64/aarch64-neon-add-pairwise.ll rename to test/CodeGen/AArch64/arm64-neon-add-pairwise.ll index 9cd76ff1b7e9..d3dc1b8d010f 100644 --- a/test/CodeGen/ARM64/aarch64-neon-add-pairwise.ll +++ b/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll @@ -1,91 +1,91 @@ ; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -declare <8 x i8> @llvm.arm64.neon.addp.v8i8(<8 x i8>, <8 x i8>) +declare <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8>, <8 x i8>) define <8 x i8> @test_addp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { ; Using registers other than v0, v1 are possible, but would be odd. ; CHECK: test_addp_v8i8: - %tmp1 = call <8 x i8> @llvm.arm64.neon.addp.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) + %tmp1 = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) ; CHECK: addp v0.8b, v0.8b, v1.8b ret <8 x i8> %tmp1 } -declare <16 x i8> @llvm.arm64.neon.addp.v16i8(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8>, <16 x i8>) define <16 x i8> @test_addp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { ; CHECK: test_addp_v16i8: - %tmp1 = call <16 x i8> @llvm.arm64.neon.addp.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) + %tmp1 = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) ; CHECK: addp v0.16b, v0.16b, v1.16b ret <16 x i8> %tmp1 } -declare <4 x i16> @llvm.arm64.neon.addp.v4i16(<4 x i16>, <4 x i16>) +declare <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16>, <4 x i16>) define <4 x i16> @test_addp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { ; CHECK: test_addp_v4i16: - %tmp1 = call <4 x i16> @llvm.arm64.neon.addp.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) + %tmp1 = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) ; CHECK: addp v0.4h, v0.4h, v1.4h ret <4 x i16> %tmp1 } -declare <8 x i16> @llvm.arm64.neon.addp.v8i16(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16>, <8 x i16>) define <8 x i16> @test_addp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { ; CHECK: test_addp_v8i16: - %tmp1 = call <8 x i16> @llvm.arm64.neon.addp.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) + %tmp1 = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) ; CHECK: addp v0.8h, v0.8h, v1.8h ret <8 x i16> %tmp1 } -declare <2 x i32> @llvm.arm64.neon.addp.v2i32(<2 x i32>, <2 x i32>) +declare <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32>, <2 x i32>) define <2 x i32> @test_addp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK: test_addp_v2i32: - %tmp1 = call <2 x i32> @llvm.arm64.neon.addp.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) + %tmp1 = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ; CHECK: addp v0.2s, v0.2s, v1.2s ret <2 x i32> %tmp1 } -declare <4 x i32> @llvm.arm64.neon.addp.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32>, <4 x i32>) define <4 x i32> @test_addp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ; CHECK: test_addp_v4i32: - %tmp1 = call <4 x i32> @llvm.arm64.neon.addp.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) + %tmp1 = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) ; CHECK: addp v0.4s, v0.4s, v1.4s ret <4 x i32> %tmp1 } -declare <2 x i64> @llvm.arm64.neon.addp.v2i64(<2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>) define <2 x i64> @test_addp_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { ; CHECK: test_addp_v2i64: - %val = call <2 x i64> @llvm.arm64.neon.addp.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) + %val = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) ; CHECK: addp v0.2d, v0.2d, v1.2d ret <2 x i64> %val } -declare <2 x float> @llvm.arm64.neon.addp.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.arm64.neon.addp.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.arm64.neon.addp.v2f64(<2 x double>, <2 x double>) +declare <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float>, <2 x float>) +declare <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float>, <4 x float>) +declare <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double>, <2 x double>) define <2 x float> @test_faddp_v2f32(<2 x float> %lhs, <2 x float> %rhs) { ; CHECK: test_faddp_v2f32: - %val = call <2 x float> @llvm.arm64.neon.addp.v2f32(<2 x float> %lhs, <2 x float> %rhs) + %val = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %lhs, <2 x float> %rhs) ; CHECK: faddp v0.2s, v0.2s, v1.2s ret <2 x float> %val } define <4 x float> @test_faddp_v4f32(<4 x float> %lhs, <4 x float> %rhs) { ; CHECK: test_faddp_v4f32: - %val = call <4 x float> @llvm.arm64.neon.addp.v4f32(<4 x float> %lhs, <4 x float> %rhs) + %val = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %lhs, <4 x float> %rhs) ; CHECK: faddp v0.4s, v0.4s, v1.4s ret <4 x float> %val } define <2 x double> @test_faddp_v2f64(<2 x double> %lhs, <2 x double> %rhs) { ; CHECK: test_faddp_v2f64: - %val = call <2 x double> @llvm.arm64.neon.addp.v2f64(<2 x double> %lhs, <2 x double> %rhs) + %val = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %lhs, <2 x double> %rhs) ; CHECK: faddp v0.2d, v0.2d, v1.2d ret <2 x double> %val } @@ -93,8 +93,8 @@ define <2 x double> @test_faddp_v2f64(<2 x double> %lhs, <2 x double> %rhs) { define i32 @test_vaddv.v2i32(<2 x i32> %a) { ; CHECK-LABEL: test_vaddv.v2i32 ; CHECK: addp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %1 = tail call i32 @llvm.arm64.neon.saddv.i32.v2i32(<2 x i32> %a) + %1 = tail call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a) ret i32 %1 } -declare i32 @llvm.arm64.neon.saddv.i32.v2i32(<2 x i32>) +declare i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32>) diff --git a/test/CodeGen/ARM64/aarch64-neon-add-sub.ll b/test/CodeGen/AArch64/arm64-neon-add-sub.ll similarity index 89% rename from test/CodeGen/ARM64/aarch64-neon-add-sub.ll rename to test/CodeGen/AArch64/arm64-neon-add-sub.ll index 241025eca339..fbde606538ca 100644 --- a/test/CodeGen/ARM64/aarch64-neon-add-sub.ll +++ b/test/CodeGen/AArch64/arm64-neon-add-sub.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -arm64-simd-scalar| FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -aarch64-simd-scalar| FileCheck %s define <8 x i8> @add8xi8(<8 x i8> %A, <8 x i8> %B) { ;CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b @@ -182,35 +182,35 @@ define <1 x double> @test_vsub_f64(<1 x double> %a, <1 x double> %b) { define <1 x double> @test_vabd_f64(<1 x double> %a, <1 x double> %b) { ; CHECK-LABEL: test_vabd_f64 ; CHECK: fabd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.arm64.neon.fabd.v1f64(<1 x double> %a, <1 x double> %b) + %1 = tail call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> %a, <1 x double> %b) ret <1 x double> %1 } define <1 x double> @test_vmax_f64(<1 x double> %a, <1 x double> %b) { ; CHECK-LABEL: test_vmax_f64 ; CHECK: fmax d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.arm64.neon.fmax.v1f64(<1 x double> %a, <1 x double> %b) + %1 = tail call <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double> %a, <1 x double> %b) ret <1 x double> %1 } define <1 x double> @test_vmin_f64(<1 x double> %a, <1 x double> %b) { ; CHECK-LABEL: test_vmin_f64 ; CHECK: fmin d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.arm64.neon.fmin.v1f64(<1 x double> %a, <1 x double> %b) + %1 = tail call <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double> %a, <1 x double> %b) ret <1 x double> %1 } define <1 x double> @test_vmaxnm_f64(<1 x double> %a, <1 x double> %b) { ; CHECK-LABEL: test_vmaxnm_f64 ; CHECK: fmaxnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.arm64.neon.fmaxnm.v1f64(<1 x double> %a, <1 x double> %b) + %1 = tail call <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double> %a, <1 x double> %b) ret <1 x double> %1 } define <1 x double> @test_vminnm_f64(<1 x double> %a, <1 x double> %b) { ; CHECK-LABEL: test_vminnm_f64 ; CHECK: fminnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.arm64.neon.fminnm.v1f64(<1 x double> %a, <1 x double> %b) + %1 = tail call <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double> %a, <1 x double> %b) ret <1 x double> %1 } @@ -229,9 +229,9 @@ define <1 x double> @test_vneg_f64(<1 x double> %a) { } declare <1 x double> @llvm.fabs.v1f64(<1 x double>) -declare <1 x double> @llvm.arm64.neon.fminnm.v1f64(<1 x double>, <1 x double>) -declare <1 x double> @llvm.arm64.neon.fmaxnm.v1f64(<1 x double>, <1 x double>) -declare <1 x double> @llvm.arm64.neon.fmin.v1f64(<1 x double>, <1 x double>) -declare <1 x double> @llvm.arm64.neon.fmax.v1f64(<1 x double>, <1 x double>) -declare <1 x double> @llvm.arm64.neon.fabd.v1f64(<1 x double>, <1 x double>) +declare <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double>, <1 x double>) +declare <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double>, <1 x double>) +declare <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double>, <1 x double>) +declare <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double>, <1 x double>) +declare <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double>, <1 x double>) declare <1 x double> @llvm.fma.v1f64(<1 x double>, <1 x double>, <1 x double>) diff --git a/test/CodeGen/ARM64/neon-compare-instructions.ll b/test/CodeGen/AArch64/arm64-neon-compare-instructions.ll similarity index 100% rename from test/CodeGen/ARM64/neon-compare-instructions.ll rename to test/CodeGen/AArch64/arm64-neon-compare-instructions.ll diff --git a/test/CodeGen/ARM64/aarch64-neon-copy.ll b/test/CodeGen/AArch64/arm64-neon-copy.ll similarity index 99% rename from test/CodeGen/ARM64/aarch64-neon-copy.ll rename to test/CodeGen/AArch64/arm64-neon-copy.ll index 9493cad33452..cfc2ebf0a2e9 100644 --- a/test/CodeGen/ARM64/aarch64-neon-copy.ll +++ b/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -1030,7 +1030,7 @@ define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) { ; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s ; CHECK-NEXT: ret entry: - %0 = call float @llvm.arm64.neon.fmaxv.f32.v2f32(<2 x float> %a) + %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) %1 = insertelement <1 x float> undef, float %0, i32 0 %2 = extractelement <1 x float> %1, i32 0 %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0 @@ -1042,14 +1042,14 @@ define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) { ; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s ; CHECK-NEXT: ret entry: - %0 = call float @llvm.arm64.neon.fmaxv.f32.v2f32(<2 x float> %a) + %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) %1 = insertelement <1 x float> undef, float %0, i32 0 %2 = extractelement <1 x float> %1, i32 0 %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0 ret <4 x float> %vecinit1.i } -declare float @llvm.arm64.neon.fmaxv.f32.v2f32(<2 x float>) +declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>) define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) { ; CHECK-LABEL: test_concat_undef_v1i32: @@ -1060,14 +1060,14 @@ entry: ret <2 x i32> %vecinit1.i } -declare i32 @llvm.arm64.neon.sqabs.i32(i32) #4 +declare i32 @llvm.aarch64.neon.sqabs.i32(i32) #4 define <2 x i32> @test_concat_v1i32_undef(i32 %a) { ; CHECK-LABEL: test_concat_v1i32_undef: ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} ; CHECK-NEXT: ret entry: - %b = tail call i32 @llvm.arm64.neon.sqabs.i32(i32 %a) + %b = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a) %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0 ret <2 x i32> %vecinit.i432 } @@ -1088,9 +1088,9 @@ define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) { ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} ; CHECK-NEXT: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: - %c = tail call i32 @llvm.arm64.neon.sqabs.i32(i32 %a) + %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a) %d = insertelement <2 x i32> undef, i32 %c, i32 0 - %e = tail call i32 @llvm.arm64.neon.sqabs.i32(i32 %b) + %e = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %b) %f = insertelement <2 x i32> undef, i32 %e, i32 0 %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> ret <2 x i32> %h diff --git a/test/CodeGen/ARM64/aarch64-neon-copyPhysReg-tuple.ll b/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll similarity index 56% rename from test/CodeGen/ARM64/aarch64-neon-copyPhysReg-tuple.ll rename to test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll index f24392bb8fcc..276ac13da40e 100644 --- a/test/CodeGen/ARM64/aarch64-neon-copyPhysReg-tuple.ll +++ b/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll @@ -7,9 +7,9 @@ define <4 x i32> @copyTuple.QPair(i32* %a, i32* %b) { ; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b ; CHECK: ld2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}] entry: - %vld = tail call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32> , <4 x i32> , i64 1, i32* %a) + %vld = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> , <4 x i32> , i64 1, i32* %a) %extract = extractvalue { <4 x i32>, <4 x i32> } %vld, 0 - %vld1 = tail call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32> %extract, <4 x i32> , i64 1, i32* %b) + %vld1 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %extract, <4 x i32> , i64 1, i32* %b) %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld1, 0 ret <4 x i32> %vld1.fca.0.extract } @@ -21,9 +21,9 @@ define <4 x i32> @copyTuple.QTriple(i32* %a, i32* %b, <4 x i32> %c) { ; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b ; CHECK: ld3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}] entry: - %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32> , <4 x i32> %c, <4 x i32> %c, i64 1, i32* %a) + %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> , <4 x i32> %c, <4 x i32> %c, i64 1, i32* %a) %extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0 - %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32> %extract, <4 x i32> , <4 x i32> %c, i64 1, i32* %b) + %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %extract, <4 x i32> , <4 x i32> %c, i64 1, i32* %b) %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld1, 0 ret <4 x i32> %vld1.fca.0.extract } @@ -36,13 +36,13 @@ define <4 x i32> @copyTuple.QQuad(i32* %a, i32* %b, <4 x i32> %c) { ; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b ; CHECK: ld4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}] entry: - %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32> , <4 x i32> %c, <4 x i32> %c, <4 x i32> %c, i64 1, i32* %a) + %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> , <4 x i32> %c, <4 x i32> %c, <4 x i32> %c, i64 1, i32* %a) %extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0 - %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32> %extract, <4 x i32> , <4 x i32> %c, <4 x i32> %c, i64 1, i32* %b) + %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %extract, <4 x i32> , <4 x i32> %c, <4 x i32> %c, i64 1, i32* %b) %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld1, 0 ret <4 x i32> %vld1.fca.0.extract } -declare { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) -declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) -declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) +declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) +declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) +declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) diff --git a/test/CodeGen/ARM64/aarch64-neon-mul-div.ll b/test/CodeGen/AArch64/arm64-neon-mul-div.ll similarity index 92% rename from test/CodeGen/ARM64/aarch64-neon-mul-div.ll rename to test/CodeGen/AArch64/arm64-neon-mul-div.ll index f3a976631977..720f3eb6a4bf 100644 --- a/test/CodeGen/ARM64/aarch64-neon-mul-div.ll +++ b/test/CodeGen/AArch64/arm64-neon-mul-div.ll @@ -684,98 +684,98 @@ define <2 x double> @frem2d64(<2 x double> %A, <2 x double> %B) { ret <2 x double> %tmp3 } -declare <8 x i8> @llvm.arm64.neon.pmul.v8i8(<8 x i8>, <8 x i8>) -declare <16 x i8> @llvm.arm64.neon.pmul.v16i8(<16 x i8>, <16 x i8>) +declare <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8>, <8 x i8>) +declare <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8>, <16 x i8>) define <8 x i8> @poly_mulv8i8(<8 x i8> %lhs, <8 x i8> %rhs) { ; CHECK-LABEL: poly_mulv8i8: - %prod = call <8 x i8> @llvm.arm64.neon.pmul.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) + %prod = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) ; CHECK: pmul v0.8b, v0.8b, v1.8b ret <8 x i8> %prod } define <16 x i8> @poly_mulv16i8(<16 x i8> %lhs, <16 x i8> %rhs) { ; CHECK-LABEL: poly_mulv16i8: - %prod = call <16 x i8> @llvm.arm64.neon.pmul.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) + %prod = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) ; CHECK: pmul v0.16b, v0.16b, v1.16b ret <16 x i8> %prod } -declare <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) -declare <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) -declare <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) -declare <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) +declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) define <4 x i16> @test_sqdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { ; CHECK-LABEL: test_sqdmulh_v4i16: - %prod = call <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) + %prod = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) ; CHECK: sqdmulh v0.4h, v0.4h, v1.4h ret <4 x i16> %prod } define <8 x i16> @test_sqdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { ; CHECK-LABEL: test_sqdmulh_v8i16: - %prod = call <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) + %prod = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) ; CHECK: sqdmulh v0.8h, v0.8h, v1.8h ret <8 x i16> %prod } define <2 x i32> @test_sqdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: test_sqdmulh_v2i32: - %prod = call <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) + %prod = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ; CHECK: sqdmulh v0.2s, v0.2s, v1.2s ret <2 x i32> %prod } define <4 x i32> @test_sqdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ; CHECK-LABEL: test_sqdmulh_v4i32: - %prod = call <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) + %prod = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) ; CHECK: sqdmulh v0.4s, v0.4s, v1.4s ret <4 x i32> %prod } -declare <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) -declare <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) -declare <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) -declare <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) +declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) define <4 x i16> @test_sqrdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { ; CHECK-LABEL: test_sqrdmulh_v4i16: - %prod = call <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) + %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) ; CHECK: sqrdmulh v0.4h, v0.4h, v1.4h ret <4 x i16> %prod } define <8 x i16> @test_sqrdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { ; CHECK-LABEL: test_sqrdmulh_v8i16: - %prod = call <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) + %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) ; CHECK: sqrdmulh v0.8h, v0.8h, v1.8h ret <8 x i16> %prod } define <2 x i32> @test_sqrdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: test_sqrdmulh_v2i32: - %prod = call <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) + %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ; CHECK: sqrdmulh v0.2s, v0.2s, v1.2s ret <2 x i32> %prod } define <4 x i32> @test_sqrdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ; CHECK-LABEL: test_sqrdmulh_v4i32: - %prod = call <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) + %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) ; CHECK: sqrdmulh v0.4s, v0.4s, v1.4s ret <4 x i32> %prod } -declare <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double>, <2 x double>) +declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>) +declare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>) +declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>) define <2 x float> @fmulx_v2f32(<2 x float> %lhs, <2 x float> %rhs) { ; CHECK-LABEL: fmulx_v2f32: ; Using registers other than v0, v1 and v2 are possible, but would be odd. ; CHECK: fmulx v0.2s, v0.2s, v1.2s - %val = call <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float> %lhs, <2 x float> %rhs) + %val = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %lhs, <2 x float> %rhs) ret <2 x float> %val } @@ -783,7 +783,7 @@ define <4 x float> @fmulx_v4f32(<4 x float> %lhs, <4 x float> %rhs) { ; CHECK-LABEL: fmulx_v4f32: ; Using registers other than v0, v1 and v2 are possible, but would be odd. ; CHECK: fmulx v0.4s, v0.4s, v1.4s - %val = call <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float> %lhs, <4 x float> %rhs) + %val = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %lhs, <4 x float> %rhs) ret <4 x float> %val } @@ -791,7 +791,7 @@ define <2 x double> @fmulx_v2f64(<2 x double> %lhs, <2 x double> %rhs) { ; CHECK-LABEL: fmulx_v2f64: ; Using registers other than v0, v1 and v2 are possible, but would be odd. ; CHECK: fmulx v0.2d, v0.2d, v1.2d - %val = call <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double> %lhs, <2 x double> %rhs) + %val = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %lhs, <2 x double> %rhs) ret <2 x double> %val } diff --git a/test/CodeGen/ARM64/aarch64-neon-scalar-by-elem-mul.ll b/test/CodeGen/AArch64/arm64-neon-scalar-by-elem-mul.ll similarity index 85% rename from test/CodeGen/ARM64/aarch64-neon-scalar-by-elem-mul.ll rename to test/CodeGen/AArch64/arm64-neon-scalar-by-elem-mul.ll index 18e6e0fe8b65..92ed23995098 100644 --- a/test/CodeGen/ARM64/aarch64-neon-scalar-by-elem-mul.ll +++ b/test/CodeGen/AArch64/arm64-neon-scalar-by-elem-mul.ll @@ -61,13 +61,13 @@ define double @test_fmul_lane_dd2D_swap(double %a, <2 x double> %v) { ret double %tmp2; } -declare float @llvm.arm64.neon.fmulx.f32(float, float) +declare float @llvm.aarch64.neon.fmulx.f32(float, float) define float @test_fmulx_lane_f32(float %a, <2 x float> %v) { ; CHECK-LABEL: test_fmulx_lane_f32 ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] %tmp1 = extractelement <2 x float> %v, i32 1 - %tmp2 = call float @llvm.arm64.neon.fmulx.f32(float %a, float %tmp1) + %tmp2 = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %tmp1) ret float %tmp2; } @@ -75,7 +75,7 @@ define float @test_fmulx_laneq_f32(float %a, <4 x float> %v) { ; CHECK-LABEL: test_fmulx_laneq_f32 ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] %tmp1 = extractelement <4 x float> %v, i32 3 - %tmp2 = call float @llvm.arm64.neon.fmulx.f32(float %a, float %tmp1) + %tmp2 = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %tmp1) ret float %tmp2; } @@ -83,17 +83,17 @@ define float @test_fmulx_laneq_f32_swap(float %a, <4 x float> %v) { ; CHECK-LABEL: test_fmulx_laneq_f32_swap ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] %tmp1 = extractelement <4 x float> %v, i32 3 - %tmp2 = call float @llvm.arm64.neon.fmulx.f32(float %tmp1, float %a) + %tmp2 = call float @llvm.aarch64.neon.fmulx.f32(float %tmp1, float %a) ret float %tmp2; } -declare double @llvm.arm64.neon.fmulx.f64(double, double) +declare double @llvm.aarch64.neon.fmulx.f64(double, double) define double @test_fmulx_lane_f64(double %a, <1 x double> %v) { ; CHECK-LABEL: test_fmulx_lane_f64 ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+.d\[0]|d[0-9]+}} %tmp1 = extractelement <1 x double> %v, i32 0 - %tmp2 = call double @llvm.arm64.neon.fmulx.f64(double %a, double %tmp1) + %tmp2 = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %tmp1) ret double %tmp2; } @@ -101,7 +101,7 @@ define double @test_fmulx_laneq_f64_0(double %a, <2 x double> %v) { ; CHECK-LABEL: test_fmulx_laneq_f64_0 ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] %tmp1 = extractelement <2 x double> %v, i32 0 - %tmp2 = call double @llvm.arm64.neon.fmulx.f64(double %a, double %tmp1) + %tmp2 = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %tmp1) ret double %tmp2; } @@ -110,7 +110,7 @@ define double @test_fmulx_laneq_f64_1(double %a, <2 x double> %v) { ; CHECK-LABEL: test_fmulx_laneq_f64_1 ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] %tmp1 = extractelement <2 x double> %v, i32 1 - %tmp2 = call double @llvm.arm64.neon.fmulx.f64(double %a, double %tmp1) + %tmp2 = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %tmp1) ret double %tmp2; } @@ -118,7 +118,7 @@ define double @test_fmulx_laneq_f64_1_swap(double %a, <2 x double> %v) { ; CHECK-LABEL: test_fmulx_laneq_f64_1_swap ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] %tmp1 = extractelement <2 x double> %v, i32 1 - %tmp2 = call double @llvm.arm64.neon.fmulx.f64(double %tmp1, double %a) + %tmp2 = call double @llvm.aarch64.neon.fmulx.f64(double %tmp1, double %a) ret double %tmp2; } diff --git a/test/CodeGen/ARM64/aarch64-neon-select_cc.ll b/test/CodeGen/AArch64/arm64-neon-select_cc.ll similarity index 100% rename from test/CodeGen/ARM64/aarch64-neon-select_cc.ll rename to test/CodeGen/AArch64/arm64-neon-select_cc.ll diff --git a/test/CodeGen/ARM64/aarch64-neon-simd-ldst-one.ll b/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll similarity index 100% rename from test/CodeGen/ARM64/aarch64-neon-simd-ldst-one.ll rename to test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll diff --git a/test/CodeGen/ARM64/aarch64-neon-simd-shift.ll b/test/CodeGen/AArch64/arm64-neon-simd-shift.ll similarity index 81% rename from test/CodeGen/ARM64/aarch64-neon-simd-shift.ll rename to test/CodeGen/AArch64/arm64-neon-simd-shift.ll index 2fd2c1e35ce5..447fb6307f21 100644 --- a/test/CodeGen/ARM64/aarch64-neon-simd-shift.ll +++ b/test/CodeGen/AArch64/arm64-neon-simd-shift.ll @@ -333,7 +333,7 @@ define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) { define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) { ; CHECK: test_vqshrun_high_n_s16 ; CHECK: sqshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 - %vqshrun = tail call <8 x i8> @llvm.arm64.neon.sqshrun.v8i8(<8 x i16> %b, i32 3) + %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %b, i32 3) %1 = bitcast <8 x i8> %a to <1 x i64> %2 = bitcast <8 x i8> %vqshrun to <1 x i64> %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> @@ -344,7 +344,7 @@ define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) { define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) { ; CHECK: test_vqshrun_high_n_s32 ; CHECK: sqshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 - %vqshrun = tail call <4 x i16> @llvm.arm64.neon.sqshrun.v4i16(<4 x i32> %b, i32 9) + %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %b, i32 9) %1 = bitcast <4 x i16> %a to <1 x i64> %2 = bitcast <4 x i16> %vqshrun to <1 x i64> %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> @@ -356,7 +356,7 @@ define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) { ; CHECK: test_vqshrun_high_n_s64 ; CHECK: sqshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 %1 = bitcast <2 x i32> %a to <1 x i64> - %vqshrun = tail call <2 x i32> @llvm.arm64.neon.sqshrun.v2i32(<2 x i64> %b, i32 19) + %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %b, i32 19) %2 = bitcast <2 x i32> %vqshrun to <1 x i64> %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> @@ -366,7 +366,7 @@ define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) { define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { ; CHECK: test_vrshrn_high_n_s16 ; CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 - %vrshrn = tail call <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16> %b, i32 3) + %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %b, i32 3) %1 = bitcast <8 x i8> %a to <1 x i64> %2 = bitcast <8 x i8> %vrshrn to <1 x i64> %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> @@ -377,7 +377,7 @@ define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) { ; CHECK: test_vrshrn_high_n_s32 ; CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 - %vrshrn = tail call <4 x i16> @llvm.arm64.neon.rshrn.v4i16(<4 x i32> %b, i32 9) + %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %b, i32 9) %1 = bitcast <4 x i16> %a to <1 x i64> %2 = bitcast <4 x i16> %vrshrn to <1 x i64> %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> @@ -389,7 +389,7 @@ define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { ; CHECK: test_vrshrn_high_n_s64 ; CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 %1 = bitcast <2 x i32> %a to <1 x i64> - %vrshrn = tail call <2 x i32> @llvm.arm64.neon.rshrn.v2i32(<2 x i64> %b, i32 19) + %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %b, i32 19) %2 = bitcast <2 x i32> %vrshrn to <1 x i64> %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> @@ -399,7 +399,7 @@ define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) { ; CHECK: test_vqrshrun_high_n_s16 ; CHECK: sqrshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 - %vqrshrun = tail call <8 x i8> @llvm.arm64.neon.sqrshrun.v8i8(<8 x i16> %b, i32 3) + %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %b, i32 3) %1 = bitcast <8 x i8> %a to <1 x i64> %2 = bitcast <8 x i8> %vqrshrun to <1 x i64> %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> @@ -410,7 +410,7 @@ define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) { define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) { ; CHECK: test_vqrshrun_high_n_s32 ; CHECK: sqrshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 - %vqrshrun = tail call <4 x i16> @llvm.arm64.neon.sqrshrun.v4i16(<4 x i32> %b, i32 9) + %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %b, i32 9) %1 = bitcast <4 x i16> %a to <1 x i64> %2 = bitcast <4 x i16> %vqrshrun to <1 x i64> %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> @@ -422,7 +422,7 @@ define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) { ; CHECK: test_vqrshrun_high_n_s64 ; CHECK: sqrshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 %1 = bitcast <2 x i32> %a to <1 x i64> - %vqrshrun = tail call <2 x i32> @llvm.arm64.neon.sqrshrun.v2i32(<2 x i64> %b, i32 19) + %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %b, i32 19) %2 = bitcast <2 x i32> %vqrshrun to <1 x i64> %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> @@ -432,7 +432,7 @@ define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) { define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { ; CHECK: test_vqshrn_high_n_s16 ; CHECK: sqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 - %vqshrn = tail call <8 x i8> @llvm.arm64.neon.sqshrn.v8i8(<8 x i16> %b, i32 3) + %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %b, i32 3) %1 = bitcast <8 x i8> %a to <1 x i64> %2 = bitcast <8 x i8> %vqshrn to <1 x i64> %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> @@ -443,7 +443,7 @@ define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) { ; CHECK: test_vqshrn_high_n_s32 ; CHECK: sqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 - %vqshrn = tail call <4 x i16> @llvm.arm64.neon.sqshrn.v4i16(<4 x i32> %b, i32 9) + %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %b, i32 9) %1 = bitcast <4 x i16> %a to <1 x i64> %2 = bitcast <4 x i16> %vqshrn to <1 x i64> %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> @@ -455,7 +455,7 @@ define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { ; CHECK: test_vqshrn_high_n_s64 ; CHECK: sqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 %1 = bitcast <2 x i32> %a to <1 x i64> - %vqshrn = tail call <2 x i32> @llvm.arm64.neon.sqshrn.v2i32(<2 x i64> %b, i32 19) + %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %b, i32 19) %2 = bitcast <2 x i32> %vqshrn to <1 x i64> %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> @@ -465,7 +465,7 @@ define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) { ; CHECK: test_vqshrn_high_n_u16 ; CHECK: uqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 - %vqshrn = tail call <8 x i8> @llvm.arm64.neon.uqshrn.v8i8(<8 x i16> %b, i32 3) + %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %b, i32 3) %1 = bitcast <8 x i8> %a to <1 x i64> %2 = bitcast <8 x i8> %vqshrn to <1 x i64> %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> @@ -476,7 +476,7 @@ define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) { define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) { ; CHECK: test_vqshrn_high_n_u32 ; CHECK: uqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 - %vqshrn = tail call <4 x i16> @llvm.arm64.neon.uqshrn.v4i16(<4 x i32> %b, i32 9) + %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %b, i32 9) %1 = bitcast <4 x i16> %a to <1 x i64> %2 = bitcast <4 x i16> %vqshrn to <1 x i64> %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> @@ -488,7 +488,7 @@ define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) { ; CHECK: test_vqshrn_high_n_u64 ; CHECK: uqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 %1 = bitcast <2 x i32> %a to <1 x i64> - %vqshrn = tail call <2 x i32> @llvm.arm64.neon.uqshrn.v2i32(<2 x i64> %b, i32 19) + %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %b, i32 19) %2 = bitcast <2 x i32> %vqshrn to <1 x i64> %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> @@ -498,7 +498,7 @@ define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) { define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { ; CHECK: test_vqrshrn_high_n_s16 ; CHECK: sqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 - %vqrshrn = tail call <8 x i8> @llvm.arm64.neon.sqrshrn.v8i8(<8 x i16> %b, i32 3) + %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %b, i32 3) %1 = bitcast <8 x i8> %a to <1 x i64> %2 = bitcast <8 x i8> %vqrshrn to <1 x i64> %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> @@ -509,7 +509,7 @@ define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) { ; CHECK: test_vqrshrn_high_n_s32 ; CHECK: sqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 - %vqrshrn = tail call <4 x i16> @llvm.arm64.neon.sqrshrn.v4i16(<4 x i32> %b, i32 9) + %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %b, i32 9) %1 = bitcast <4 x i16> %a to <1 x i64> %2 = bitcast <4 x i16> %vqrshrn to <1 x i64> %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> @@ -521,7 +521,7 @@ define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { ; CHECK: test_vqrshrn_high_n_s64 ; CHECK: sqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 %1 = bitcast <2 x i32> %a to <1 x i64> - %vqrshrn = tail call <2 x i32> @llvm.arm64.neon.sqrshrn.v2i32(<2 x i64> %b, i32 19) + %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %b, i32 19) %2 = bitcast <2 x i32> %vqrshrn to <1 x i64> %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> @@ -531,7 +531,7 @@ define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) { ; CHECK: test_vqrshrn_high_n_u16 ; CHECK: uqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 - %vqrshrn = tail call <8 x i8> @llvm.arm64.neon.uqrshrn.v8i8(<8 x i16> %b, i32 3) + %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %b, i32 3) %1 = bitcast <8 x i8> %a to <1 x i64> %2 = bitcast <8 x i8> %vqrshrn to <1 x i64> %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> @@ -542,7 +542,7 @@ define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) { define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) { ; CHECK: test_vqrshrn_high_n_u32 ; CHECK: uqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 - %vqrshrn = tail call <4 x i16> @llvm.arm64.neon.uqrshrn.v4i16(<4 x i32> %b, i32 9) + %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %b, i32 9) %1 = bitcast <4 x i16> %a to <1 x i64> %2 = bitcast <4 x i16> %vqrshrn to <1 x i64> %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> @@ -554,7 +554,7 @@ define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) { ; CHECK: test_vqrshrn_high_n_u64 ; CHECK: uqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 %1 = bitcast <2 x i32> %a to <1 x i64> - %vqrshrn = tail call <2 x i32> @llvm.arm64.neon.uqrshrn.v2i32(<2 x i64> %b, i32 19) + %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %b, i32 19) %2 = bitcast <2 x i32> %vqrshrn to <1 x i64> %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> @@ -563,101 +563,101 @@ define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) { -declare <8 x i8> @llvm.arm64.neon.sqshrun.v8i8(<8 x i16>, i32) +declare <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32) -declare <4 x i16> @llvm.arm64.neon.sqshrun.v4i16(<4 x i32>, i32) +declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32) -declare <2 x i32> @llvm.arm64.neon.sqshrun.v2i32(<2 x i64>, i32) +declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32) -declare <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16>, i32) +declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) -declare <4 x i16> @llvm.arm64.neon.rshrn.v4i16(<4 x i32>, i32) +declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32) -declare <2 x i32> @llvm.arm64.neon.rshrn.v2i32(<2 x i64>, i32) +declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32) -declare <8 x i8> @llvm.arm64.neon.sqrshrun.v8i8(<8 x i16>, i32) +declare <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32) -declare <4 x i16> @llvm.arm64.neon.sqrshrun.v4i16(<4 x i32>, i32) +declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32) -declare <2 x i32> @llvm.arm64.neon.sqrshrun.v2i32(<2 x i64>, i32) +declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32) -declare <8 x i8> @llvm.arm64.neon.sqshrn.v8i8(<8 x i16>, i32) +declare <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32) -declare <4 x i16> @llvm.arm64.neon.sqshrn.v4i16(<4 x i32>, i32) +declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32) -declare <2 x i32> @llvm.arm64.neon.sqshrn.v2i32(<2 x i64>, i32) +declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32) -declare <8 x i8> @llvm.arm64.neon.uqshrn.v8i8(<8 x i16>, i32) +declare <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32) -declare <4 x i16> @llvm.arm64.neon.uqshrn.v4i16(<4 x i32>, i32) +declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32) -declare <2 x i32> @llvm.arm64.neon.uqshrn.v2i32(<2 x i64>, i32) +declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32) -declare <8 x i8> @llvm.arm64.neon.sqrshrn.v8i8(<8 x i16>, i32) +declare <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32) -declare <4 x i16> @llvm.arm64.neon.sqrshrn.v4i16(<4 x i32>, i32) +declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32) -declare <2 x i32> @llvm.arm64.neon.sqrshrn.v2i32(<2 x i64>, i32) +declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32) -declare <8 x i8> @llvm.arm64.neon.uqrshrn.v8i8(<8 x i16>, i32) +declare <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32) -declare <4 x i16> @llvm.arm64.neon.uqrshrn.v4i16(<4 x i32>, i32) +declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32) -declare <2 x i32> @llvm.arm64.neon.uqrshrn.v2i32(<2 x i64>, i32) +declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32) -declare <2 x float> @llvm.arm64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) +declare <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) -declare <4 x float> @llvm.arm64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) +declare <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) -declare <2 x double> @llvm.arm64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32) +declare <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32) -declare <2 x float> @llvm.arm64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) +declare <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) -declare <4 x float> @llvm.arm64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) +declare <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) -declare <2 x double> @llvm.arm64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32) +declare <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32) -declare <2 x i32> @llvm.arm64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) +declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) -declare <4 x i32> @llvm.arm64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) +declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) -declare <2 x i64> @llvm.arm64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32) +declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32) -declare <2 x i32> @llvm.arm64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) +declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) -declare <4 x i32> @llvm.arm64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) +declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) -declare <2 x i64> @llvm.arm64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32) +declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32) define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) { ; CHECK-LABEL: test_vcvt_n_s64_f64 ; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}, #64 - %1 = tail call <1 x i64> @llvm.arm64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> %a, i32 64) + %1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> %a, i32 64) ret <1 x i64> %1 } define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) { ; CHECK-LABEL: test_vcvt_n_u64_f64 ; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}, #64 - %1 = tail call <1 x i64> @llvm.arm64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> %a, i32 64) + %1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> %a, i32 64) ret <1 x i64> %1 } define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) { ; CHECK-LABEL: test_vcvt_n_f64_s64 ; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}, #64 - %1 = tail call <1 x double> @llvm.arm64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> %a, i32 64) + %1 = tail call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> %a, i32 64) ret <1 x double> %1 } define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) { ; CHECK-LABEL: test_vcvt_n_f64_u64 ; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}, #64 - %1 = tail call <1 x double> @llvm.arm64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> %a, i32 64) + %1 = tail call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> %a, i32 64) ret <1 x double> %1 } -declare <1 x i64> @llvm.arm64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double>, i32) -declare <1 x i64> @llvm.arm64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double>, i32) -declare <1 x double> @llvm.arm64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64>, i32) -declare <1 x double> @llvm.arm64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64>, i32) +declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double>, i32) +declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double>, i32) +declare <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64>, i32) +declare <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64>, i32) diff --git a/test/CodeGen/ARM64/aarch64-neon-simd-vget.ll b/test/CodeGen/AArch64/arm64-neon-simd-vget.ll similarity index 100% rename from test/CodeGen/ARM64/aarch64-neon-simd-vget.ll rename to test/CodeGen/AArch64/arm64-neon-simd-vget.ll diff --git a/test/CodeGen/ARM64/neon-v1i1-setcc.ll b/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll similarity index 100% rename from test/CodeGen/ARM64/neon-v1i1-setcc.ll rename to test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll diff --git a/test/CodeGen/ARM64/aarch64-neon-vector-list-spill.ll b/test/CodeGen/AArch64/arm64-neon-vector-list-spill.ll similarity index 77% rename from test/CodeGen/ARM64/aarch64-neon-vector-list-spill.ll rename to test/CodeGen/AArch64/arm64-neon-vector-list-spill.ll index 9e69ac025f97..8262fe43a66c 100644 --- a/test/CodeGen/ARM64/aarch64-neon-vector-list-spill.ll +++ b/test/CodeGen/AArch64/arm64-neon-vector-list-spill.ll @@ -10,7 +10,7 @@ define i32 @spill.DPairReg(i32* %arg1, i32 %arg2) { ; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] ; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] entry: - %vld = tail call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2.v2i32.p0i32(i32* %arg1) + %vld = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %arg1) %cmp = icmp eq i32 %arg2, 0 br i1 %cmp, label %if.then, label %if.end @@ -30,7 +30,7 @@ define i16 @spill.DTripleReg(i16* %arg1, i32 %arg2) { ; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] ; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] entry: - %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3.v4i16.p0i16(i16* %arg1) + %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %arg1) %cmp = icmp eq i32 %arg2, 0 br i1 %cmp, label %if.then, label %if.end @@ -50,7 +50,7 @@ define i16 @spill.DQuadReg(i16* %arg1, i32 %arg2) { ; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] ; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] entry: - %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4.v4i16.p0i16(i16* %arg1) + %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %arg1) %cmp = icmp eq i32 %arg2, 0 br i1 %cmp, label %if.then, label %if.end @@ -70,7 +70,7 @@ define i32 @spill.QPairReg(i32* %arg1, i32 %arg2) { ; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] ; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] entry: - %vld = tail call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2.v4i32.p0i32(i32* %arg1) + %vld = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %arg1) %cmp = icmp eq i32 %arg2, 0 br i1 %cmp, label %if.then, label %if.end @@ -90,7 +90,7 @@ define float @spill.QTripleReg(float* %arg1, i32 %arg2) { ; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] ; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] entry: - %vld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3.v4f32.p0f32(float* %arg1) + %vld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %arg1) %cmp = icmp eq i32 %arg2, 0 br i1 %cmp, label %if.then, label %if.end @@ -110,7 +110,7 @@ define i8 @spill.QQuadReg(i8* %arg1, i32 %arg2) { ; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] ; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] entry: - %vld = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0i8(i8* %arg1) + %vld = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %arg1) %cmp = icmp eq i32 %arg2, 0 br i1 %cmp, label %if.then, label %if.end @@ -124,12 +124,12 @@ if.end: ret i8 %res } -declare { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2.v2i32.p0i32(i32*) -declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3.v4i16.p0i16(i16*) -declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4.v4i16.p0i16(i16*) -declare { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2.v4i32.p0i32(i32*) -declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3.v4f32.p0f32(float*) -declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0i8(i8*) +declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32*) +declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16*) +declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16*) +declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32*) +declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float*) +declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8*) declare void @foo() @@ -139,7 +139,7 @@ declare void @foo() ; then we can delete it. ; check the spill for Register Class QPair_with_qsub_0_in_FPR128Lo define <8 x i16> @test_2xFPR128Lo(i64 %got, i64* %ptr, <1 x i64> %a) { - tail call void @llvm.arm64.neon.st2lane.v1i64.p0i64(<1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i64 0, i64* %ptr) + tail call void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i64 0, i64* %ptr) tail call void @foo() %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> %1 = bitcast <2 x i64> %sv to <8 x i16> @@ -150,7 +150,7 @@ define <8 x i16> @test_2xFPR128Lo(i64 %got, i64* %ptr, <1 x i64> %a) { ; check the spill for Register Class QTriple_with_qsub_0_in_FPR128Lo define <8 x i16> @test_3xFPR128Lo(i64 %got, i64* %ptr, <1 x i64> %a) { - tail call void @llvm.arm64.neon.st3lane.v1i64.p0i64(<1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i64 0, i64* %ptr) + tail call void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i64 0, i64* %ptr) tail call void @foo() %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> %1 = bitcast <2 x i64> %sv to <8 x i16> @@ -161,7 +161,7 @@ define <8 x i16> @test_3xFPR128Lo(i64 %got, i64* %ptr, <1 x i64> %a) { ; check the spill for Register Class QQuad_with_qsub_0_in_FPR128Lo define <8 x i16> @test_4xFPR128Lo(i64 %got, i64* %ptr, <1 x i64> %a) { - tail call void @llvm.arm64.neon.st4lane.v1i64.p0i64(<1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i64 0, i64* %ptr) + tail call void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i64 0, i64* %ptr) tail call void @foo() %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> %1 = bitcast <2 x i64> %sv to <8 x i16> @@ -170,6 +170,6 @@ define <8 x i16> @test_4xFPR128Lo(i64 %got, i64* %ptr, <1 x i64> %a) { ret <8 x i16> %3 } -declare void @llvm.arm64.neon.st2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*) -declare void @llvm.arm64.neon.st3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) -declare void @llvm.arm64.neon.st4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) +declare void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*) +declare void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) +declare void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) diff --git a/test/CodeGen/ARM64/patchpoint.ll b/test/CodeGen/AArch64/arm64-patchpoint.ll similarity index 100% rename from test/CodeGen/ARM64/patchpoint.ll rename to test/CodeGen/AArch64/arm64-patchpoint.ll diff --git a/test/CodeGen/ARM64/pic-local-symbol.ll b/test/CodeGen/AArch64/arm64-pic-local-symbol.ll similarity index 100% rename from test/CodeGen/ARM64/pic-local-symbol.ll rename to test/CodeGen/AArch64/arm64-pic-local-symbol.ll diff --git a/test/CodeGen/ARM64/platform-reg.ll b/test/CodeGen/AArch64/arm64-platform-reg.ll similarity index 100% rename from test/CodeGen/ARM64/platform-reg.ll rename to test/CodeGen/AArch64/arm64-platform-reg.ll diff --git a/test/CodeGen/ARM64/popcnt.ll b/test/CodeGen/AArch64/arm64-popcnt.ll similarity index 93% rename from test/CodeGen/ARM64/popcnt.ll rename to test/CodeGen/AArch64/arm64-popcnt.ll index 9bbba09c250f..2afade2ee750 100644 --- a/test/CodeGen/ARM64/popcnt.ll +++ b/test/CodeGen/AArch64/arm64-popcnt.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define i32 @cnt32_advsimd(i32 %x) nounwind readnone { %cnt = tail call i32 @llvm.ctpop.i32(i32 %x) diff --git a/test/CodeGen/ARM64/prefetch.ll b/test/CodeGen/AArch64/arm64-prefetch.ll similarity index 100% rename from test/CodeGen/ARM64/prefetch.ll rename to test/CodeGen/AArch64/arm64-prefetch.ll diff --git a/test/CodeGen/ARM64/promote-const.ll b/test/CodeGen/AArch64/arm64-promote-const.ll similarity index 98% rename from test/CodeGen/ARM64/promote-const.ll rename to test/CodeGen/AArch64/arm64-promote-const.ll index 9e7a215f64c2..380ff55d6839 100644 --- a/test/CodeGen/ARM64/promote-const.ll +++ b/test/CodeGen/AArch64/arm64-promote-const.ll @@ -1,9 +1,9 @@ ; Disable machine cse to stress the different path of the algorithm. ; Otherwise, we always fall in the simple case, i.e., only one definition. -; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-machine-cse -arm64-stress-promote-const -mcpu=cyclone | FileCheck -check-prefix=PROMOTED %s +; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-machine-cse -aarch64-stress-promote-const -mcpu=cyclone | FileCheck -check-prefix=PROMOTED %s ; The REGULAR run just checks that the inputs passed to promote const expose ; the appropriate patterns. -; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-machine-cse -arm64-promote-const=false -mcpu=cyclone | FileCheck -check-prefix=REGULAR %s +; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-machine-cse -aarch64-promote-const=false -mcpu=cyclone | FileCheck -check-prefix=REGULAR %s %struct.uint8x16x4_t = type { [4 x <16 x i8>] } diff --git a/test/CodeGen/ARM64/redzone.ll b/test/CodeGen/AArch64/arm64-redzone.ll similarity index 88% rename from test/CodeGen/ARM64/redzone.ll rename to test/CodeGen/AArch64/arm64-redzone.ll index b89d7b1de3fe..9b0c384c4d9e 100644 --- a/test/CodeGen/ARM64/redzone.ll +++ b/test/CodeGen/AArch64/arm64-redzone.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-redzone | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-redzone | FileCheck %s define i32 @foo(i32 %a, i32 %b) nounwind ssp { ; CHECK-LABEL: foo: diff --git a/test/CodeGen/ARM64/reg-copy-noneon.ll b/test/CodeGen/AArch64/arm64-reg-copy-noneon.ll similarity index 100% rename from test/CodeGen/ARM64/reg-copy-noneon.ll rename to test/CodeGen/AArch64/arm64-reg-copy-noneon.ll diff --git a/test/CodeGen/ARM64/register-offset-addressing.ll b/test/CodeGen/AArch64/arm64-register-offset-addressing.ll similarity index 100% rename from test/CodeGen/ARM64/register-offset-addressing.ll rename to test/CodeGen/AArch64/arm64-register-offset-addressing.ll diff --git a/test/CodeGen/ARM64/register-pairing.ll b/test/CodeGen/AArch64/arm64-register-pairing.ll similarity index 100% rename from test/CodeGen/ARM64/register-pairing.ll rename to test/CodeGen/AArch64/arm64-register-pairing.ll diff --git a/test/CodeGen/ARM64/regress-f128csel-flags.ll b/test/CodeGen/AArch64/arm64-regress-f128csel-flags.ll similarity index 100% rename from test/CodeGen/ARM64/regress-f128csel-flags.ll rename to test/CodeGen/AArch64/arm64-regress-f128csel-flags.ll diff --git a/test/CodeGen/ARM64/regress-interphase-shift.ll b/test/CodeGen/AArch64/arm64-regress-interphase-shift.ll similarity index 100% rename from test/CodeGen/ARM64/regress-interphase-shift.ll rename to test/CodeGen/AArch64/arm64-regress-interphase-shift.ll diff --git a/test/CodeGen/ARM64/return-vector.ll b/test/CodeGen/AArch64/arm64-return-vector.ll similarity index 100% rename from test/CodeGen/ARM64/return-vector.ll rename to test/CodeGen/AArch64/arm64-return-vector.ll diff --git a/test/CodeGen/ARM64/returnaddr.ll b/test/CodeGen/AArch64/arm64-returnaddr.ll similarity index 100% rename from test/CodeGen/ARM64/returnaddr.ll rename to test/CodeGen/AArch64/arm64-returnaddr.ll diff --git a/test/CodeGen/ARM64/rev.ll b/test/CodeGen/AArch64/arm64-rev.ll similarity index 99% rename from test/CodeGen/ARM64/rev.ll rename to test/CodeGen/AArch64/arm64-rev.ll index 1da59e42f6b9..30d9f4f3e670 100644 --- a/test/CodeGen/ARM64/rev.ll +++ b/test/CodeGen/AArch64/arm64-rev.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define i32 @test_rev_w(i32 %a) nounwind { entry: diff --git a/test/CodeGen/ARM64/rounding.ll b/test/CodeGen/AArch64/arm64-rounding.ll similarity index 100% rename from test/CodeGen/ARM64/rounding.ll rename to test/CodeGen/AArch64/arm64-rounding.ll diff --git a/test/CodeGen/ARM64/scaled_iv.ll b/test/CodeGen/AArch64/arm64-scaled_iv.ll similarity index 100% rename from test/CodeGen/ARM64/scaled_iv.ll rename to test/CodeGen/AArch64/arm64-scaled_iv.ll diff --git a/test/CodeGen/ARM64/scvt.ll b/test/CodeGen/AArch64/arm64-scvt.ll similarity index 99% rename from test/CodeGen/ARM64/scvt.ll rename to test/CodeGen/AArch64/arm64-scvt.ll index b4d4add1e8a9..2e006cff159a 100644 --- a/test/CodeGen/ARM64/scvt.ll +++ b/test/CodeGen/AArch64/arm64-scvt.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s ; rdar://13082402 define float @t1(i32* nocapture %src) nounwind ssp { diff --git a/test/CodeGen/ARM64/shifted-sext.ll b/test/CodeGen/AArch64/arm64-shifted-sext.ll similarity index 100% rename from test/CodeGen/ARM64/shifted-sext.ll rename to test/CodeGen/AArch64/arm64-shifted-sext.ll diff --git a/test/CodeGen/AArch64/arm64-simd-scalar-to-vector.ll b/test/CodeGen/AArch64/arm64-simd-scalar-to-vector.ll new file mode 100644 index 000000000000..aed39e7ed8cb --- /dev/null +++ b/test/CodeGen/AArch64/arm64-simd-scalar-to-vector.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -mcpu=cyclone | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -O0 -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST + +define <16 x i8> @foo(<16 x i8> %a) nounwind optsize readnone ssp { +; CHECK: uaddlv.16b h0, v0 +; CHECK: rshrn.8b v0, v0, #4 +; CHECK: dup.16b v0, v0[0] +; CHECK: ret + +; CHECK-FAST: uaddlv.16b +; CHECK-FAST: rshrn.8b +; CHECK-FAST: dup.16b + %tmp = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a) nounwind + %tmp1 = trunc i32 %tmp to i16 + %tmp2 = insertelement <8 x i16> undef, i16 %tmp1, i32 0 + %tmp3 = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp2, i32 4) + %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <16 x i32> zeroinitializer + ret <16 x i8> %tmp4 +} + +declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone +declare i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8>) nounwind readnone diff --git a/test/CodeGen/ARM64/simplest-elf.ll b/test/CodeGen/AArch64/arm64-simplest-elf.ll similarity index 100% rename from test/CodeGen/ARM64/simplest-elf.ll rename to test/CodeGen/AArch64/arm64-simplest-elf.ll diff --git a/test/CodeGen/ARM64/sincos.ll b/test/CodeGen/AArch64/arm64-sincos.ll similarity index 100% rename from test/CodeGen/ARM64/sincos.ll rename to test/CodeGen/AArch64/arm64-sincos.ll diff --git a/test/CodeGen/ARM64/sitofp-combine-chains.ll b/test/CodeGen/AArch64/arm64-sitofp-combine-chains.ll similarity index 100% rename from test/CodeGen/ARM64/sitofp-combine-chains.ll rename to test/CodeGen/AArch64/arm64-sitofp-combine-chains.ll diff --git a/test/CodeGen/ARM64/sli-sri-opt.ll b/test/CodeGen/AArch64/arm64-sli-sri-opt.ll similarity index 95% rename from test/CodeGen/ARM64/sli-sri-opt.ll rename to test/CodeGen/AArch64/arm64-sli-sri-opt.ll index 725dcd51fd17..7fec53993bc1 100644 --- a/test/CodeGen/ARM64/sli-sri-opt.ll +++ b/test/CodeGen/AArch64/arm64-sli-sri-opt.ll @@ -1,4 +1,4 @@ -; RUN: llc -arm64-shift-insert-generation=true -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s +; RUN: llc -aarch64-shift-insert-generation=true -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s define void @testLeftGood(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind { ; CHECK-LABEL: testLeftGood: diff --git a/test/CodeGen/ARM64/smaxv.ll b/test/CodeGen/AArch64/arm64-smaxv.ll similarity index 61% rename from test/CodeGen/ARM64/smaxv.ll rename to test/CodeGen/AArch64/arm64-smaxv.ll index 4f6e01b31ea0..183e667643cc 100644 --- a/test/CodeGen/ARM64/smaxv.ll +++ b/test/CodeGen/AArch64/arm64-smaxv.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s +; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s define signext i8 @test_vmaxv_s8(<8 x i8> %a1) { ; CHECK: test_vmaxv_s8 @@ -6,7 +6,7 @@ define signext i8 @test_vmaxv_s8(<8 x i8> %a1) { ; CHECK-NEXT: smov.b w0, v[[REGNUM]][0] ; CHECK-NEXT: ret entry: - %vmaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v8i8(<8 x i8> %a1) + %vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a1) %0 = trunc i32 %vmaxv.i to i8 ret i8 %0 } @@ -17,7 +17,7 @@ define signext i16 @test_vmaxv_s16(<4 x i16> %a1) { ; CHECK-NEXT: smov.h w0, v[[REGNUM]][0] ; CHECK-NEXT: ret entry: - %vmaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v4i16(<4 x i16> %a1) + %vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a1) %0 = trunc i32 %vmaxv.i to i16 ret i16 %0 } @@ -29,7 +29,7 @@ define i32 @test_vmaxv_s32(<2 x i32> %a1) { ; CHECK-NEXT: fmov w0, s[[REGNUM]] ; CHECK-NEXT: ret entry: - %vmaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v2i32(<2 x i32> %a1) + %vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> %a1) ret i32 %vmaxv.i } @@ -39,7 +39,7 @@ define signext i8 @test_vmaxvq_s8(<16 x i8> %a1) { ; CHECK-NEXT: smov.b w0, v[[REGNUM]][0] ; CHECK-NEXT: ret entry: - %vmaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v16i8(<16 x i8> %a1) + %vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a1) %0 = trunc i32 %vmaxv.i to i8 ret i8 %0 } @@ -50,7 +50,7 @@ define signext i16 @test_vmaxvq_s16(<8 x i16> %a1) { ; CHECK-NEXT: smov.h w0, v[[REGNUM]][0] ; CHECK-NEXT: ret entry: - %vmaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v8i16(<8 x i16> %a1) + %vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a1) %0 = trunc i32 %vmaxv.i to i16 ret i16 %0 } @@ -61,14 +61,14 @@ define i32 @test_vmaxvq_s32(<4 x i32> %a1) { ; CHECK-NEXT: fmov w0, [[REGNUM]] ; CHECK-NEXT: ret entry: - %vmaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v4i32(<4 x i32> %a1) + %vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a1) ret i32 %vmaxv.i } -declare i32 @llvm.arm64.neon.smaxv.i32.v4i32(<4 x i32>) -declare i32 @llvm.arm64.neon.smaxv.i32.v8i16(<8 x i16>) -declare i32 @llvm.arm64.neon.smaxv.i32.v16i8(<16 x i8>) -declare i32 @llvm.arm64.neon.smaxv.i32.v2i32(<2 x i32>) -declare i32 @llvm.arm64.neon.smaxv.i32.v4i16(<4 x i16>) -declare i32 @llvm.arm64.neon.smaxv.i32.v8i8(<8 x i8>) +declare i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32>) +declare i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16>) +declare i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8>) +declare i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32>) +declare i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16>) +declare i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8>) diff --git a/test/CodeGen/ARM64/sminv.ll b/test/CodeGen/AArch64/arm64-sminv.ll similarity index 61% rename from test/CodeGen/ARM64/sminv.ll rename to test/CodeGen/AArch64/arm64-sminv.ll index a246868d2f20..195c4e59dc41 100644 --- a/test/CodeGen/ARM64/sminv.ll +++ b/test/CodeGen/AArch64/arm64-sminv.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s +; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s define signext i8 @test_vminv_s8(<8 x i8> %a1) { ; CHECK: test_vminv_s8 @@ -6,7 +6,7 @@ define signext i8 @test_vminv_s8(<8 x i8> %a1) { ; CHECK-NEXT: smov.b w0, v[[REGNUM]][0] ; CHECK-NEXT: ret entry: - %vminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v8i8(<8 x i8> %a1) + %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a1) %0 = trunc i32 %vminv.i to i8 ret i8 %0 } @@ -17,7 +17,7 @@ define signext i16 @test_vminv_s16(<4 x i16> %a1) { ; CHECK-NEXT: smov.h w0, v[[REGNUM]][0] ; CHECK-NEXT: ret entry: - %vminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v4i16(<4 x i16> %a1) + %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a1) %0 = trunc i32 %vminv.i to i16 ret i16 %0 } @@ -29,7 +29,7 @@ define i32 @test_vminv_s32(<2 x i32> %a1) { ; CHECK-NEXT: fmov w0, s[[REGNUM]] ; CHECK-NEXT: ret entry: - %vminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v2i32(<2 x i32> %a1) + %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a1) ret i32 %vminv.i } @@ -39,7 +39,7 @@ define signext i8 @test_vminvq_s8(<16 x i8> %a1) { ; CHECK-NEXT: smov.b w0, v[[REGNUM]][0] ; CHECK-NEXT: ret entry: - %vminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v16i8(<16 x i8> %a1) + %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a1) %0 = trunc i32 %vminv.i to i8 ret i8 %0 } @@ -50,7 +50,7 @@ define signext i16 @test_vminvq_s16(<8 x i16> %a1) { ; CHECK-NEXT: smov.h w0, v[[REGNUM]][0] ; CHECK-NEXT: ret entry: - %vminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v8i16(<8 x i16> %a1) + %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a1) %0 = trunc i32 %vminv.i to i16 ret i16 %0 } @@ -61,14 +61,14 @@ define i32 @test_vminvq_s32(<4 x i32> %a1) { ; CHECK-NEXT: fmov w0, [[REGNUM]] ; CHECK-NEXT: ret entry: - %vminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v4i32(<4 x i32> %a1) + %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a1) ret i32 %vminv.i } -declare i32 @llvm.arm64.neon.sminv.i32.v4i32(<4 x i32>) -declare i32 @llvm.arm64.neon.sminv.i32.v8i16(<8 x i16>) -declare i32 @llvm.arm64.neon.sminv.i32.v16i8(<16 x i8>) -declare i32 @llvm.arm64.neon.sminv.i32.v2i32(<2 x i32>) -declare i32 @llvm.arm64.neon.sminv.i32.v4i16(<4 x i16>) -declare i32 @llvm.arm64.neon.sminv.i32.v8i8(<8 x i8>) +declare i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32>) +declare i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16>) +declare i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8>) +declare i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32>) +declare i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16>) +declare i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8>) diff --git a/test/CodeGen/ARM64/spill-lr.ll b/test/CodeGen/AArch64/arm64-spill-lr.ll similarity index 100% rename from test/CodeGen/ARM64/spill-lr.ll rename to test/CodeGen/AArch64/arm64-spill-lr.ll diff --git a/test/CodeGen/ARM64/spill.ll b/test/CodeGen/AArch64/arm64-spill.ll similarity index 88% rename from test/CodeGen/ARM64/spill.ll rename to test/CodeGen/AArch64/arm64-spill.ll index 5a5da9723cb6..47cdc2bd95e4 100644 --- a/test/CodeGen/ARM64/spill.ll +++ b/test/CodeGen/AArch64/arm64-spill.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -arm64-neon-syntax=apple -verify-machineinstrs +; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -aarch64-neon-syntax=apple -verify-machineinstrs ; CHECK: fpr128 ; CHECK: ld1.2d diff --git a/test/CodeGen/ARM64/st1.ll b/test/CodeGen/AArch64/arm64-st1.ll similarity index 50% rename from test/CodeGen/ARM64/st1.ll rename to test/CodeGen/AArch64/arm64-st1.ll index b9aafc60e7ba..4370484478c0 100644 --- a/test/CodeGen/ARM64/st1.ll +++ b/test/CodeGen/AArch64/arm64-st1.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s define void @st1lane_16b(<16 x i8> %A, i8* %D) { ; CHECK-LABEL: st1lane_16b @@ -83,594 +83,594 @@ define void @st1lane_2s_float(<2 x float> %A, float* %D) { define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, i8* %D) { ; CHECK-LABEL: st2lane_16b ; CHECK: st2.b - call void @llvm.arm64.neon.st2lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i64 1, i8* %D) + call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i64 1, i8* %D) ret void } define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, i16* %D) { ; CHECK-LABEL: st2lane_8h ; CHECK: st2.h - call void @llvm.arm64.neon.st2lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i64 1, i16* %D) + call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i64 1, i16* %D) ret void } define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, i32* %D) { ; CHECK-LABEL: st2lane_4s ; CHECK: st2.s - call void @llvm.arm64.neon.st2lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i64 1, i32* %D) + call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i64 1, i32* %D) ret void } define void @st2lane_2d(<2 x i64> %A, <2 x i64> %B, i64* %D) { ; CHECK-LABEL: st2lane_2d ; CHECK: st2.d - call void @llvm.arm64.neon.st2lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64 1, i64* %D) + call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64 1, i64* %D) ret void } -declare void @llvm.arm64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readnone -declare void @llvm.arm64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readnone -declare void @llvm.arm64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readnone -declare void @llvm.arm64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readnone +declare void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readnone +declare void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readnone +declare void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readnone +declare void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readnone define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %D) { ; CHECK-LABEL: st3lane_16b ; CHECK: st3.b - call void @llvm.arm64.neon.st3lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i64 1, i8* %D) + call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i64 1, i8* %D) ret void } define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %D) { ; CHECK-LABEL: st3lane_8h ; CHECK: st3.h - call void @llvm.arm64.neon.st3lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i64 1, i16* %D) + call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i64 1, i16* %D) ret void } define void @st3lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %D) { ; CHECK-LABEL: st3lane_4s ; CHECK: st3.s - call void @llvm.arm64.neon.st3lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i64 1, i32* %D) + call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i64 1, i32* %D) ret void } define void @st3lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %D) { ; CHECK-LABEL: st3lane_2d ; CHECK: st3.d - call void @llvm.arm64.neon.st3lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64 1, i64* %D) + call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64 1, i64* %D) ret void } -declare void @llvm.arm64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone -declare void @llvm.arm64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone -declare void @llvm.arm64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone -declare void @llvm.arm64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone +declare void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone +declare void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone +declare void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone +declare void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone define void @st4lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %E) { ; CHECK-LABEL: st4lane_16b ; CHECK: st4.b - call void @llvm.arm64.neon.st4lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 1, i8* %E) + call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 1, i8* %E) ret void } define void @st4lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %E) { ; CHECK-LABEL: st4lane_8h ; CHECK: st4.h - call void @llvm.arm64.neon.st4lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 1, i16* %E) + call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 1, i16* %E) ret void } define void @st4lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %E) { ; CHECK-LABEL: st4lane_4s ; CHECK: st4.s - call void @llvm.arm64.neon.st4lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 1, i32* %E) + call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 1, i32* %E) ret void } define void @st4lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %E) { ; CHECK-LABEL: st4lane_2d ; CHECK: st4.d - call void @llvm.arm64.neon.st4lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 1, i64* %E) + call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 1, i64* %E) ret void } -declare void @llvm.arm64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone -declare void @llvm.arm64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone -declare void @llvm.arm64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone -declare void @llvm.arm64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone +declare void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone +declare void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone +declare void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone +declare void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone define void @st2_8b(<8 x i8> %A, <8 x i8> %B, i8* %P) nounwind { ; CHECK-LABEL: st2_8b ; CHECK st2.8b - call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %P) + call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %P) ret void } define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) nounwind { ; CHECK-LABEL: st3_8b ; CHECK st3.8b - call void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) + call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) ret void } define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) nounwind { ; CHECK-LABEL: st4_8b ; CHECK st4.8b - call void @llvm.arm64.neon.st4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) + call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) ret void } -declare void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly -declare void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly -declare void @llvm.arm64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly +declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly +declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly +declare void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly define void @st2_16b(<16 x i8> %A, <16 x i8> %B, i8* %P) nounwind { ; CHECK-LABEL: st2_16b ; CHECK st2.16b - call void @llvm.arm64.neon.st2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %P) + call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %P) ret void } define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) nounwind { ; CHECK-LABEL: st3_16b ; CHECK st3.16b - call void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) + call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) ret void } define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) nounwind { ; CHECK-LABEL: st4_16b ; CHECK st4.16b - call void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) + call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) ret void } -declare void @llvm.arm64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly -declare void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly -declare void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly +declare void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly +declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly +declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly define void @st2_4h(<4 x i16> %A, <4 x i16> %B, i16* %P) nounwind { ; CHECK-LABEL: st2_4h ; CHECK st2.4h - call void @llvm.arm64.neon.st2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %P) + call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %P) ret void } define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) nounwind { ; CHECK-LABEL: st3_4h ; CHECK st3.4h - call void @llvm.arm64.neon.st3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) + call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) ret void } define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) nounwind { ; CHECK-LABEL: st4_4h ; CHECK st4.4h - call void @llvm.arm64.neon.st4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) + call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) ret void } -declare void @llvm.arm64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly -declare void @llvm.arm64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly -declare void @llvm.arm64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly +declare void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly +declare void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly +declare void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly define void @st2_8h(<8 x i16> %A, <8 x i16> %B, i16* %P) nounwind { ; CHECK-LABEL: st2_8h ; CHECK st2.8h - call void @llvm.arm64.neon.st2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %P) + call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %P) ret void } define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) nounwind { ; CHECK-LABEL: st3_8h ; CHECK st3.8h - call void @llvm.arm64.neon.st3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) + call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) ret void } define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) nounwind { ; CHECK-LABEL: st4_8h ; CHECK st4.8h - call void @llvm.arm64.neon.st4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) + call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) ret void } -declare void @llvm.arm64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly -declare void @llvm.arm64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly -declare void @llvm.arm64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly +declare void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly +declare void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly +declare void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly define void @st2_2s(<2 x i32> %A, <2 x i32> %B, i32* %P) nounwind { ; CHECK-LABEL: st2_2s ; CHECK st2.2s - call void @llvm.arm64.neon.st2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %P) + call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %P) ret void } define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) nounwind { ; CHECK-LABEL: st3_2s ; CHECK st3.2s - call void @llvm.arm64.neon.st3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) + call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) ret void } define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) nounwind { ; CHECK-LABEL: st4_2s ; CHECK st4.2s - call void @llvm.arm64.neon.st4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) + call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) ret void } -declare void @llvm.arm64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly -declare void @llvm.arm64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly -declare void @llvm.arm64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly +declare void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly +declare void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly +declare void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly define void @st2_4s(<4 x i32> %A, <4 x i32> %B, i32* %P) nounwind { ; CHECK-LABEL: st2_4s ; CHECK st2.4s - call void @llvm.arm64.neon.st2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %P) + call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %P) ret void } define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) nounwind { ; CHECK-LABEL: st3_4s ; CHECK st3.4s - call void @llvm.arm64.neon.st3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) + call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) ret void } define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) nounwind { ; CHECK-LABEL: st4_4s ; CHECK st4.4s - call void @llvm.arm64.neon.st4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) + call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) ret void } -declare void @llvm.arm64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly -declare void @llvm.arm64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly -declare void @llvm.arm64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly +declare void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly +declare void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly +declare void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly define void @st2_1d(<1 x i64> %A, <1 x i64> %B, i64* %P) nounwind { ; CHECK-LABEL: st2_1d ; CHECK st1.2d - call void @llvm.arm64.neon.st2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %P) + call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %P) ret void } define void @st3_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P) nounwind { ; CHECK-LABEL: st3_1d ; CHECK st1.3d - call void @llvm.arm64.neon.st3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P) + call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P) ret void } define void @st4_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P) nounwind { ; CHECK-LABEL: st4_1d ; CHECK st1.4d - call void @llvm.arm64.neon.st4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P) + call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P) ret void } -declare void @llvm.arm64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly -declare void @llvm.arm64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly -declare void @llvm.arm64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly +declare void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly +declare void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly +declare void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly define void @st2_2d(<2 x i64> %A, <2 x i64> %B, i64* %P) nounwind { ; CHECK-LABEL: st2_2d ; CHECK st2.2d - call void @llvm.arm64.neon.st2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %P) + call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %P) ret void } define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) nounwind { ; CHECK-LABEL: st3_2d ; CHECK st2.3d - call void @llvm.arm64.neon.st3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) + call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) ret void } define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) nounwind { ; CHECK-LABEL: st4_2d ; CHECK st2.4d - call void @llvm.arm64.neon.st4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) + call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) ret void } -declare void @llvm.arm64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly -declare void @llvm.arm64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly -declare void @llvm.arm64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly +declare void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly +declare void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly +declare void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly -declare void @llvm.arm64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly -declare void @llvm.arm64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly -declare void @llvm.arm64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly -declare void @llvm.arm64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*) nounwind readonly -declare void @llvm.arm64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly -declare void @llvm.arm64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*) nounwind readonly +declare void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly +declare void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly +declare void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly +declare void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*) nounwind readonly +declare void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly +declare void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*) nounwind readonly define void @st1_x2_v8i8(<8 x i8> %A, <8 x i8> %B, i8* %addr) { ; CHECK-LABEL: st1_x2_v8i8: ; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %addr) + call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %addr) ret void } define void @st1_x2_v4i16(<4 x i16> %A, <4 x i16> %B, i16* %addr) { ; CHECK-LABEL: st1_x2_v4i16: ; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %addr) + call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %addr) ret void } define void @st1_x2_v2i32(<2 x i32> %A, <2 x i32> %B, i32* %addr) { ; CHECK-LABEL: st1_x2_v2i32: ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %addr) + call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %addr) ret void } define void @st1_x2_v2f32(<2 x float> %A, <2 x float> %B, float* %addr) { ; CHECK-LABEL: st1_x2_v2f32: ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x2.v2f32.p0f32(<2 x float> %A, <2 x float> %B, float* %addr) + call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %A, <2 x float> %B, float* %addr) ret void } define void @st1_x2_v1i64(<1 x i64> %A, <1 x i64> %B, i64* %addr) { ; CHECK-LABEL: st1_x2_v1i64: ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %addr) + call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %addr) ret void } define void @st1_x2_v1f64(<1 x double> %A, <1 x double> %B, double* %addr) { ; CHECK-LABEL: st1_x2_v1f64: ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x2.v1f64.p0f64(<1 x double> %A, <1 x double> %B, double* %addr) + call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %A, <1 x double> %B, double* %addr) ret void } -declare void @llvm.arm64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly -declare void @llvm.arm64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly -declare void @llvm.arm64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly -declare void @llvm.arm64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*) nounwind readonly -declare void @llvm.arm64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly -declare void @llvm.arm64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*) nounwind readonly +declare void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly +declare void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly +declare void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly +declare void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*) nounwind readonly +declare void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly +declare void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*) nounwind readonly define void @st1_x2_v16i8(<16 x i8> %A, <16 x i8> %B, i8* %addr) { ; CHECK-LABEL: st1_x2_v16i8: ; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %addr) + call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %addr) ret void } define void @st1_x2_v8i16(<8 x i16> %A, <8 x i16> %B, i16* %addr) { ; CHECK-LABEL: st1_x2_v8i16: ; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %addr) + call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %addr) ret void } define void @st1_x2_v4i32(<4 x i32> %A, <4 x i32> %B, i32* %addr) { ; CHECK-LABEL: st1_x2_v4i32: ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %addr) + call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %addr) ret void } define void @st1_x2_v4f32(<4 x float> %A, <4 x float> %B, float* %addr) { ; CHECK-LABEL: st1_x2_v4f32: ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x2.v4f32.p0f32(<4 x float> %A, <4 x float> %B, float* %addr) + call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %A, <4 x float> %B, float* %addr) ret void } define void @st1_x2_v2i64(<2 x i64> %A, <2 x i64> %B, i64* %addr) { ; CHECK-LABEL: st1_x2_v2i64: ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %addr) + call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %addr) ret void } define void @st1_x2_v2f64(<2 x double> %A, <2 x double> %B, double* %addr) { ; CHECK-LABEL: st1_x2_v2f64: ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x2.v2f64.p0f64(<2 x double> %A, <2 x double> %B, double* %addr) + call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %A, <2 x double> %B, double* %addr) ret void } -declare void @llvm.arm64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly -declare void @llvm.arm64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly -declare void @llvm.arm64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly -declare void @llvm.arm64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly -declare void @llvm.arm64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly -declare void @llvm.arm64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly +declare void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly +declare void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly +declare void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly +declare void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly +declare void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly +declare void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly define void @st1_x3_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr) { ; CHECK-LABEL: st1_x3_v8i8: ; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr) + call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr) ret void } define void @st1_x3_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr) { ; CHECK-LABEL: st1_x3_v4i16: ; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr) + call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr) ret void } define void @st1_x3_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr) { ; CHECK-LABEL: st1_x3_v2i32: ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr) + call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr) ret void } define void @st1_x3_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr) { ; CHECK-LABEL: st1_x3_v2f32: ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x3.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr) + call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr) ret void } define void @st1_x3_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr) { ; CHECK-LABEL: st1_x3_v1i64: ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr) + call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr) ret void } define void @st1_x3_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr) { ; CHECK-LABEL: st1_x3_v1f64: ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x3.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr) + call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr) ret void } -declare void @llvm.arm64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly -declare void @llvm.arm64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly -declare void @llvm.arm64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly -declare void @llvm.arm64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly -declare void @llvm.arm64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly -declare void @llvm.arm64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly +declare void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly +declare void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly +declare void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly +declare void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly +declare void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly +declare void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly define void @st1_x3_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr) { ; CHECK-LABEL: st1_x3_v16i8: ; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr) + call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr) ret void } define void @st1_x3_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr) { ; CHECK-LABEL: st1_x3_v8i16: ; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr) + call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr) ret void } define void @st1_x3_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr) { ; CHECK-LABEL: st1_x3_v4i32: ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr) + call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr) ret void } define void @st1_x3_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr) { ; CHECK-LABEL: st1_x3_v4f32: ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x3.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr) + call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr) ret void } define void @st1_x3_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr) { ; CHECK-LABEL: st1_x3_v2i64: ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr) + call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr) ret void } define void @st1_x3_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr) { ; CHECK-LABEL: st1_x3_v2f64: ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x3.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr) + call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr) ret void } -declare void @llvm.arm64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly -declare void @llvm.arm64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly -declare void @llvm.arm64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly -declare void @llvm.arm64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly -declare void @llvm.arm64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly -declare void @llvm.arm64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly +declare void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly +declare void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly +declare void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly +declare void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly +declare void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly +declare void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly define void @st1_x4_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr) { ; CHECK-LABEL: st1_x4_v8i8: ; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr) + call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr) ret void } define void @st1_x4_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr) { ; CHECK-LABEL: st1_x4_v4i16: ; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr) + call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr) ret void } define void @st1_x4_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr) { ; CHECK-LABEL: st1_x4_v2i32: ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr) + call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr) ret void } define void @st1_x4_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr) { ; CHECK-LABEL: st1_x4_v2f32: ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x4.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr) + call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr) ret void } define void @st1_x4_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr) { ; CHECK-LABEL: st1_x4_v1i64: ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr) + call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr) ret void } define void @st1_x4_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr) { ; CHECK-LABEL: st1_x4_v1f64: ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x4.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr) + call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr) ret void } -declare void @llvm.arm64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly -declare void @llvm.arm64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly -declare void @llvm.arm64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly -declare void @llvm.arm64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly -declare void @llvm.arm64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly -declare void @llvm.arm64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly +declare void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly +declare void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly +declare void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly +declare void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly +declare void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly +declare void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly define void @st1_x4_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr) { ; CHECK-LABEL: st1_x4_v16i8: ; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr) + call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr) ret void } define void @st1_x4_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr) { ; CHECK-LABEL: st1_x4_v8i16: ; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr) + call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr) ret void } define void @st1_x4_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr) { ; CHECK-LABEL: st1_x4_v4i32: ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr) + call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr) ret void } define void @st1_x4_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr) { ; CHECK-LABEL: st1_x4_v4f32: ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x4.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr) + call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr) ret void } define void @st1_x4_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr) { ; CHECK-LABEL: st1_x4_v2i64: ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr) + call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr) ret void } define void @st1_x4_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr) { ; CHECK-LABEL: st1_x4_v2f64: ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] - call void @llvm.arm64.neon.st1x4.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr) + call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr) ret void } diff --git a/test/CodeGen/ARM64/stack-no-frame.ll b/test/CodeGen/AArch64/arm64-stack-no-frame.ll similarity index 100% rename from test/CodeGen/ARM64/stack-no-frame.ll rename to test/CodeGen/AArch64/arm64-stack-no-frame.ll diff --git a/test/CodeGen/ARM64/stackmap.ll b/test/CodeGen/AArch64/arm64-stackmap.ll similarity index 100% rename from test/CodeGen/ARM64/stackmap.ll rename to test/CodeGen/AArch64/arm64-stackmap.ll diff --git a/test/CodeGen/ARM64/stackpointer.ll b/test/CodeGen/AArch64/arm64-stackpointer.ll similarity index 100% rename from test/CodeGen/ARM64/stackpointer.ll rename to test/CodeGen/AArch64/arm64-stackpointer.ll diff --git a/test/CodeGen/ARM64/stacksave.ll b/test/CodeGen/AArch64/arm64-stacksave.ll similarity index 100% rename from test/CodeGen/ARM64/stacksave.ll rename to test/CodeGen/AArch64/arm64-stacksave.ll diff --git a/test/CodeGen/ARM64/stp.ll b/test/CodeGen/AArch64/arm64-stp.ll similarity index 94% rename from test/CodeGen/ARM64/stp.ll rename to test/CodeGen/AArch64/arm64-stp.ll index 3a58396e61b4..40bdf22c995c 100644 --- a/test/CodeGen/ARM64/stp.ll +++ b/test/CodeGen/AArch64/arm64-stp.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=arm64 -arm64-stp-suppress=false -verify-machineinstrs -mcpu=cyclone | FileCheck %s -; RUN: llc < %s -march=arm64 -arm64-unscaled-mem-op=true\ +; RUN: llc < %s -march=arm64 -aarch64-stp-suppress=false -verify-machineinstrs -mcpu=cyclone | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-unscaled-mem-op=true\ ; RUN: -verify-machineinstrs -mcpu=cyclone | FileCheck -check-prefix=STUR_CHK %s ; CHECK: stp_int diff --git a/test/CodeGen/ARM64/strict-align.ll b/test/CodeGen/AArch64/arm64-strict-align.ll similarity index 75% rename from test/CodeGen/ARM64/strict-align.ll rename to test/CodeGen/AArch64/arm64-strict-align.ll index 48a1528b5cd1..5d137043a691 100644 --- a/test/CodeGen/ARM64/strict-align.ll +++ b/test/CodeGen/AArch64/arm64-strict-align.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s -; RUN: llc < %s -mtriple=arm64-apple-darwin -arm64-no-strict-align | FileCheck %s -; RUN: llc < %s -mtriple=arm64-apple-darwin -arm64-strict-align | FileCheck %s --check-prefix=CHECK-STRICT +; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-no-strict-align | FileCheck %s +; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-strict-align | FileCheck %s --check-prefix=CHECK-STRICT define i32 @f0(i32* nocapture %p) nounwind { ; CHECK-STRICT: ldrh [[HIGH:w[0-9]+]], [x0, #2] diff --git a/test/CodeGen/ARM64/stur.ll b/test/CodeGen/AArch64/arm64-stur.ll similarity index 96% rename from test/CodeGen/ARM64/stur.ll rename to test/CodeGen/AArch64/arm64-stur.ll index dc67b60000d4..a2e684dc9528 100644 --- a/test/CodeGen/ARM64/stur.ll +++ b/test/CodeGen/AArch64/arm64-stur.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -mcpu=cyclone | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -mcpu=cyclone | FileCheck %s %struct.X = type <{ i32, i64, i64 }> define void @foo1(i32* %p, i64 %val) nounwind { diff --git a/test/CodeGen/ARM64/subsections.ll b/test/CodeGen/AArch64/arm64-subsections.ll similarity index 100% rename from test/CodeGen/ARM64/subsections.ll rename to test/CodeGen/AArch64/arm64-subsections.ll diff --git a/test/CodeGen/ARM64/subvector-extend.ll b/test/CodeGen/AArch64/arm64-subvector-extend.ll similarity index 97% rename from test/CodeGen/ARM64/subvector-extend.ll rename to test/CodeGen/AArch64/arm64-subvector-extend.ll index ad2f06ce7ba8..d5a178a9e656 100644 --- a/test/CodeGen/ARM64/subvector-extend.ll +++ b/test/CodeGen/AArch64/arm64-subvector-extend.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -asm-verbose=false | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s ; Test efficient codegen of vector extends up from legal type to 128 bit ; and 256 bit vector types. diff --git a/test/CodeGen/ARM64/swizzle-tbl-i16-layout.ll b/test/CodeGen/AArch64/arm64-swizzle-tbl-i16-layout.ll similarity index 100% rename from test/CodeGen/ARM64/swizzle-tbl-i16-layout.ll rename to test/CodeGen/AArch64/arm64-swizzle-tbl-i16-layout.ll diff --git a/test/CodeGen/AArch64/arm64-tbl.ll b/test/CodeGen/AArch64/arm64-tbl.ll new file mode 100644 index 000000000000..b1ce15a1e19a --- /dev/null +++ b/test/CodeGen/AArch64/arm64-tbl.ll @@ -0,0 +1,132 @@ +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s + +define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind { +; CHECK: tbl1_8b +; CHECK: tbl.8b + %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %A, <8 x i8> %B) + ret <8 x i8> %tmp3 +} + +define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind { +; CHECK: tbl1_16b +; CHECK: tbl.16b + %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %A, <16 x i8> %B) + ret <16 x i8> %tmp3 +} + +define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) { +; CHECK: tbl2_8b +; CHECK: tbl.8b + %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) + ret <8 x i8> %tmp3 +} + +define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK: tbl2_16b +; CHECK: tbl.16b + %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) + ret <16 x i8> %tmp3 +} + +define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { +; CHECK: tbl3_8b +; CHECK: tbl.8b + %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) + ret <8 x i8> %tmp3 +} + +define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { +; CHECK: tbl3_16b +; CHECK: tbl.16b + %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) + ret <16 x i8> %tmp3 +} + +define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { +; CHECK: tbl4_8b +; CHECK: tbl.8b + %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) + ret <8 x i8> %tmp3 +} + +define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { +; CHECK: tbl4_16b +; CHECK: tbl.16b + %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) + ret <16 x i8> %tmp3 +} + +declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone + +define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind { +; CHECK: tbx1_8b +; CHECK: tbx.8b + %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) + ret <8 x i8> %tmp3 +} + +define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind { +; CHECK: tbx1_16b +; CHECK: tbx.16b + %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) + ret <16 x i8> %tmp3 +} + +define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { +; CHECK: tbx2_8b +; CHECK: tbx.8b + %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) + ret <8 x i8> %tmp3 +} + +define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { +; CHECK: tbx2_16b +; CHECK: tbx.16b + %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) + ret <16 x i8> %tmp3 +} + +define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { +; CHECK: tbx3_8b +; CHECK: tbx.8b + %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) + ret <8 x i8> %tmp3 +} + +define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { +; CHECK: tbx3_16b +; CHECK: tbx.16b + %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) + ret <16 x i8> %tmp3 +} + +define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) { +; CHECK: tbx4_8b +; CHECK: tbx.8b + %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) + ret <8 x i8> %tmp3 +} + +define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) { +; CHECK: tbx4_16b +; CHECK: tbx.16b + %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) + ret <16 x i8> %tmp3 +} + +declare <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone + diff --git a/test/CodeGen/ARM64/this-return.ll b/test/CodeGen/AArch64/arm64-this-return.ll similarity index 100% rename from test/CodeGen/ARM64/this-return.ll rename to test/CodeGen/AArch64/arm64-this-return.ll diff --git a/test/CodeGen/ARM64/tls-darwin.ll b/test/CodeGen/AArch64/arm64-tls-darwin.ll similarity index 100% rename from test/CodeGen/ARM64/tls-darwin.ll rename to test/CodeGen/AArch64/arm64-tls-darwin.ll diff --git a/test/CodeGen/ARM64/tls-dynamic-together.ll b/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll similarity index 100% rename from test/CodeGen/ARM64/tls-dynamic-together.ll rename to test/CodeGen/AArch64/arm64-tls-dynamic-together.ll diff --git a/test/CodeGen/ARM64/tls-dynamics.ll b/test/CodeGen/AArch64/arm64-tls-dynamics.ll similarity index 100% rename from test/CodeGen/ARM64/tls-dynamics.ll rename to test/CodeGen/AArch64/arm64-tls-dynamics.ll diff --git a/test/CodeGen/ARM64/tls-execs.ll b/test/CodeGen/AArch64/arm64-tls-execs.ll similarity index 100% rename from test/CodeGen/ARM64/tls-execs.ll rename to test/CodeGen/AArch64/arm64-tls-execs.ll diff --git a/test/CodeGen/ARM64/trap.ll b/test/CodeGen/AArch64/arm64-trap.ll similarity index 100% rename from test/CodeGen/ARM64/trap.ll rename to test/CodeGen/AArch64/arm64-trap.ll diff --git a/test/CodeGen/ARM64/trn.ll b/test/CodeGen/AArch64/arm64-trn.ll similarity index 98% rename from test/CodeGen/ARM64/trn.ll rename to test/CodeGen/AArch64/arm64-trn.ll index f46798490f69..2db7a14e7549 100644 --- a/test/CodeGen/ARM64/trn.ll +++ b/test/CodeGen/AArch64/arm64-trn.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vtrni8: diff --git a/test/CodeGen/ARM64/trunc-store.ll b/test/CodeGen/AArch64/arm64-trunc-store.ll similarity index 100% rename from test/CodeGen/ARM64/trunc-store.ll rename to test/CodeGen/AArch64/arm64-trunc-store.ll diff --git a/test/CodeGen/ARM64/umaxv.ll b/test/CodeGen/AArch64/arm64-umaxv.ll similarity index 75% rename from test/CodeGen/ARM64/umaxv.ll rename to test/CodeGen/AArch64/arm64-umaxv.ll index 15277d32f030..d523f317d087 100644 --- a/test/CodeGen/ARM64/umaxv.ll +++ b/test/CodeGen/AArch64/arm64-umaxv.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define i32 @vmax_u8x8(<8 x i8> %a) nounwind ssp { ; CHECK-LABEL: vmax_u8x8: @@ -7,7 +7,7 @@ define i32 @vmax_u8x8(<8 x i8> %a) nounwind ssp { ; CHECK-NOT: and ; CHECK: cbz [[REG2]], entry: - %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8> %a) nounwind + %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a) nounwind %tmp = trunc i32 %vmaxv.i to i8 %tobool = icmp eq i8 %tmp, 0 br i1 %tobool, label %return, label %if.then @@ -30,7 +30,7 @@ define i32 @vmax_u4x16(<4 x i16> %a) nounwind ssp { ; CHECK-NOT: and ; CHECK: cbz [[REG2]], entry: - %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v4i16(<4 x i16> %a) nounwind + %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a) nounwind %tmp = trunc i32 %vmaxv.i to i16 %tobool = icmp eq i16 %tmp, 0 br i1 %tobool, label %return, label %if.then @@ -51,7 +51,7 @@ define i32 @vmax_u8x16(<8 x i16> %a) nounwind ssp { ; CHECK-NOT: and ; CHECK: cbz [[REG2]], entry: - %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v8i16(<8 x i16> %a) nounwind + %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a) nounwind %tmp = trunc i32 %vmaxv.i to i16 %tobool = icmp eq i16 %tmp, 0 br i1 %tobool, label %return, label %if.then @@ -72,7 +72,7 @@ define i32 @vmax_u16x8(<16 x i8> %a) nounwind ssp { ; CHECK-NOT: and ; CHECK: cbz [[REG2]], entry: - %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8> %a) nounwind + %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a) nounwind %tmp = trunc i32 %vmaxv.i to i8 %tobool = icmp eq i8 %tmp, 0 br i1 %tobool, label %return, label %if.then @@ -86,7 +86,7 @@ return: ret i32 %retval.0 } -declare i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8>) nounwind readnone -declare i32 @llvm.arm64.neon.umaxv.i32.v8i16(<8 x i16>) nounwind readnone -declare i32 @llvm.arm64.neon.umaxv.i32.v4i16(<4 x i16>) nounwind readnone -declare i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8>) nounwind readnone +declare i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8>) nounwind readnone +declare i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16>) nounwind readnone +declare i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16>) nounwind readnone +declare i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8>) nounwind readnone diff --git a/test/CodeGen/ARM64/uminv.ll b/test/CodeGen/AArch64/arm64-uminv.ll similarity index 75% rename from test/CodeGen/ARM64/uminv.ll rename to test/CodeGen/AArch64/arm64-uminv.ll index 440522f1693e..3bade4b28b8f 100644 --- a/test/CodeGen/ARM64/uminv.ll +++ b/test/CodeGen/AArch64/arm64-uminv.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define i32 @vmin_u8x8(<8 x i8> %a) nounwind ssp { ; CHECK-LABEL: vmin_u8x8: @@ -7,7 +7,7 @@ define i32 @vmin_u8x8(<8 x i8> %a) nounwind ssp { ; CHECK-NOT: and ; CHECK: cbz [[REG2]], entry: - %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8> %a) nounwind + %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a) nounwind %tmp = trunc i32 %vminv.i to i8 %tobool = icmp eq i8 %tmp, 0 br i1 %tobool, label %return, label %if.then @@ -30,7 +30,7 @@ define i32 @vmin_u4x16(<4 x i16> %a) nounwind ssp { ; CHECK-NOT: and ; CHECK: cbz [[REG2]], entry: - %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v4i16(<4 x i16> %a) nounwind + %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a) nounwind %tmp = trunc i32 %vminv.i to i16 %tobool = icmp eq i16 %tmp, 0 br i1 %tobool, label %return, label %if.then @@ -51,7 +51,7 @@ define i32 @vmin_u8x16(<8 x i16> %a) nounwind ssp { ; CHECK-NOT: and ; CHECK: cbz [[REG2]], entry: - %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v8i16(<8 x i16> %a) nounwind + %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a) nounwind %tmp = trunc i32 %vminv.i to i16 %tobool = icmp eq i16 %tmp, 0 br i1 %tobool, label %return, label %if.then @@ -72,7 +72,7 @@ define i32 @vmin_u16x8(<16 x i8> %a) nounwind ssp { ; CHECK-NOT: and ; CHECK: cbz [[REG2]], entry: - %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8> %a) nounwind + %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a) nounwind %tmp = trunc i32 %vminv.i to i8 %tobool = icmp eq i8 %tmp, 0 br i1 %tobool, label %return, label %if.then @@ -86,7 +86,7 @@ return: ret i32 %retval.0 } -declare i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8>) nounwind readnone -declare i32 @llvm.arm64.neon.uminv.i32.v8i16(<8 x i16>) nounwind readnone -declare i32 @llvm.arm64.neon.uminv.i32.v4i16(<4 x i16>) nounwind readnone -declare i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8>) nounwind readnone +declare i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8>) nounwind readnone +declare i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16>) nounwind readnone +declare i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16>) nounwind readnone +declare i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8>) nounwind readnone diff --git a/test/CodeGen/ARM64/umov.ll b/test/CodeGen/AArch64/arm64-umov.ll similarity index 90% rename from test/CodeGen/ARM64/umov.ll rename to test/CodeGen/AArch64/arm64-umov.ll index 19fd91b6c3d2..a1ef9908646a 100644 --- a/test/CodeGen/ARM64/umov.ll +++ b/test/CodeGen/AArch64/arm64-umov.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define zeroext i8 @f1(<16 x i8> %a) { ; CHECK-LABEL: f1: diff --git a/test/CodeGen/ARM64/unaligned_ldst.ll b/test/CodeGen/AArch64/arm64-unaligned_ldst.ll similarity index 100% rename from test/CodeGen/ARM64/unaligned_ldst.ll rename to test/CodeGen/AArch64/arm64-unaligned_ldst.ll diff --git a/test/CodeGen/ARM64/uzp.ll b/test/CodeGen/AArch64/arm64-uzp.ll similarity index 98% rename from test/CodeGen/ARM64/uzp.ll rename to test/CodeGen/AArch64/arm64-uzp.ll index 60e16d0d686c..cdd8d31c9981 100644 --- a/test/CodeGen/ARM64/uzp.ll +++ b/test/CodeGen/AArch64/arm64-uzp.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vuzpi8: diff --git a/test/CodeGen/ARM64/vaargs.ll b/test/CodeGen/AArch64/arm64-vaargs.ll similarity index 100% rename from test/CodeGen/ARM64/vaargs.ll rename to test/CodeGen/AArch64/arm64-vaargs.ll diff --git a/test/CodeGen/ARM64/vabs.ll b/test/CodeGen/AArch64/arm64-vabs.ll similarity index 68% rename from test/CodeGen/ARM64/vabs.ll rename to test/CodeGen/AArch64/arm64-vabs.ll index 0d8aa24e1b47..5afc8d9f3f49 100644 --- a/test/CodeGen/ARM64/vabs.ll +++ b/test/CodeGen/AArch64/arm64-vabs.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define <8 x i16> @sabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind { @@ -6,7 +6,7 @@ define <8 x i16> @sabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: sabdl.8h %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) %tmp4 = zext <8 x i8> %tmp3 to <8 x i16> ret <8 x i16> %tmp4 } @@ -16,7 +16,7 @@ define <4 x i32> @sabdl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: sabdl.4s %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) %tmp4 = zext <4 x i16> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 } @@ -26,7 +26,7 @@ define <2 x i64> @sabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: sabdl.2d %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) %tmp4 = zext <2 x i32> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 } @@ -38,7 +38,7 @@ define <8 x i16> @sabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind { %load2 = load <16 x i8>* %B %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> - %tmp3 = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) %tmp4 = zext <8 x i8> %tmp3 to <8 x i16> ret <8 x i16> %tmp4 } @@ -50,7 +50,7 @@ define <4 x i32> @sabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind { %load2 = load <8 x i16>* %B %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> - %tmp3 = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) %tmp4 = zext <4 x i16> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 } @@ -62,7 +62,7 @@ define <2 x i64> @sabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind { %load2 = load <4 x i32>* %B %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> - %tmp3 = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) %tmp4 = zext <2 x i32> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 } @@ -72,7 +72,7 @@ define <8 x i16> @uabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: uabdl.8h %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) %tmp4 = zext <8 x i8> %tmp3 to <8 x i16> ret <8 x i16> %tmp4 } @@ -82,7 +82,7 @@ define <4 x i32> @uabdl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: uabdl.4s %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) %tmp4 = zext <4 x i16> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 } @@ -92,7 +92,7 @@ define <2 x i64> @uabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: uabdl.2d %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) %tmp4 = zext <2 x i32> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 } @@ -105,7 +105,7 @@ define <8 x i16> @uabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind { %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> - %tmp3 = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) %tmp4 = zext <8 x i8> %tmp3 to <8 x i16> ret <8 x i16> %tmp4 } @@ -117,7 +117,7 @@ define <4 x i32> @uabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind { %load2 = load <8 x i16>* %B %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> - %tmp3 = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) %tmp4 = zext <4 x i16> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 } @@ -129,7 +129,7 @@ define <2 x i64> @uabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind { %load2 = load <4 x i32>* %B %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> - %tmp3 = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) %tmp4 = zext <2 x i32> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 } @@ -139,7 +139,7 @@ define <2 x float> @fabd_2s(<2 x float>* %A, <2 x float>* %B) nounwind { ;CHECK: fabd.2s %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = call <2 x float> @llvm.arm64.neon.fabd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) + %tmp3 = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) ret <2 x float> %tmp3 } @@ -148,7 +148,7 @@ define <4 x float> @fabd_4s(<4 x float>* %A, <4 x float>* %B) nounwind { ;CHECK: fabd.4s %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B - %tmp3 = call <4 x float> @llvm.arm64.neon.fabd.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) + %tmp3 = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) ret <4 x float> %tmp3 } @@ -157,20 +157,20 @@ define <2 x double> @fabd_2d(<2 x double>* %A, <2 x double>* %B) nounwind { ;CHECK: fabd.2d %tmp1 = load <2 x double>* %A %tmp2 = load <2 x double>* %B - %tmp3 = call <2 x double> @llvm.arm64.neon.fabd.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) + %tmp3 = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) ret <2 x double> %tmp3 } -declare <2 x float> @llvm.arm64.neon.fabd.v2f32(<2 x float>, <2 x float>) nounwind readnone -declare <4 x float> @llvm.arm64.neon.fabd.v4f32(<4 x float>, <4 x float>) nounwind readnone -declare <2 x double> @llvm.arm64.neon.fabd.v2f64(<2 x double>, <2 x double>) nounwind readnone +declare <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float>, <4 x float>) nounwind readnone +declare <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double>, <2 x double>) nounwind readnone define <8 x i8> @sabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: sabd_8b: ;CHECK: sabd.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -179,7 +179,7 @@ define <16 x i8> @sabd_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: sabd.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.sabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -188,7 +188,7 @@ define <4 x i16> @sabd_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: sabd.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -197,7 +197,7 @@ define <8 x i16> @sabd_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: sabd.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.sabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -206,7 +206,7 @@ define <2 x i32> @sabd_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: sabd.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -215,23 +215,23 @@ define <4 x i32> @sabd_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: sabd.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.sabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } -declare <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.sabd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.sabd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.sabd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone define <8 x i8> @uabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: uabd_8b: ;CHECK: uabd.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -240,7 +240,7 @@ define <16 x i8> @uabd_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: uabd.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.uabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -249,7 +249,7 @@ define <4 x i16> @uabd_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: uabd.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -258,7 +258,7 @@ define <8 x i16> @uabd_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: uabd.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.uabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -267,7 +267,7 @@ define <2 x i32> @uabd_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: uabd.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -276,22 +276,22 @@ define <4 x i32> @uabd_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: uabd.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.uabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } -declare <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.uabd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.uabd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.uabd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone define <8 x i8> @sqabs_8b(<8 x i8>* %A) nounwind { ;CHECK-LABEL: sqabs_8b: ;CHECK: sqabs.8b %tmp1 = load <8 x i8>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.sqabs.v8i8(<8 x i8> %tmp1) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> %tmp1) ret <8 x i8> %tmp3 } @@ -299,7 +299,7 @@ define <16 x i8> @sqabs_16b(<16 x i8>* %A) nounwind { ;CHECK-LABEL: sqabs_16b: ;CHECK: sqabs.16b %tmp1 = load <16 x i8>* %A - %tmp3 = call <16 x i8> @llvm.arm64.neon.sqabs.v16i8(<16 x i8> %tmp1) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqabs.v16i8(<16 x i8> %tmp1) ret <16 x i8> %tmp3 } @@ -307,7 +307,7 @@ define <4 x i16> @sqabs_4h(<4 x i16>* %A) nounwind { ;CHECK-LABEL: sqabs_4h: ;CHECK: sqabs.4h %tmp1 = load <4 x i16>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.sqabs.v4i16(<4 x i16> %tmp1) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> %tmp1) ret <4 x i16> %tmp3 } @@ -315,7 +315,7 @@ define <8 x i16> @sqabs_8h(<8 x i16>* %A) nounwind { ;CHECK-LABEL: sqabs_8h: ;CHECK: sqabs.8h %tmp1 = load <8 x i16>* %A - %tmp3 = call <8 x i16> @llvm.arm64.neon.sqabs.v8i16(<8 x i16> %tmp1) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16> %tmp1) ret <8 x i16> %tmp3 } @@ -323,7 +323,7 @@ define <2 x i32> @sqabs_2s(<2 x i32>* %A) nounwind { ;CHECK-LABEL: sqabs_2s: ;CHECK: sqabs.2s %tmp1 = load <2 x i32>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.sqabs.v2i32(<2 x i32> %tmp1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32> %tmp1) ret <2 x i32> %tmp3 } @@ -331,22 +331,22 @@ define <4 x i32> @sqabs_4s(<4 x i32>* %A) nounwind { ;CHECK-LABEL: sqabs_4s: ;CHECK: sqabs.4s %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i32> @llvm.arm64.neon.sqabs.v4i32(<4 x i32> %tmp1) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32> %tmp1) ret <4 x i32> %tmp3 } -declare <8 x i8> @llvm.arm64.neon.sqabs.v8i8(<8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.sqabs.v16i8(<16 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.sqabs.v4i16(<4 x i16>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.sqabs.v8i16(<8 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.sqabs.v2i32(<2 x i32>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.sqabs.v4i32(<4 x i32>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.sqabs.v16i8(<16 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32>) nounwind readnone define <8 x i8> @sqneg_8b(<8 x i8>* %A) nounwind { ;CHECK-LABEL: sqneg_8b: ;CHECK: sqneg.8b %tmp1 = load <8 x i8>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.sqneg.v8i8(<8 x i8> %tmp1) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> %tmp1) ret <8 x i8> %tmp3 } @@ -354,7 +354,7 @@ define <16 x i8> @sqneg_16b(<16 x i8>* %A) nounwind { ;CHECK-LABEL: sqneg_16b: ;CHECK: sqneg.16b %tmp1 = load <16 x i8>* %A - %tmp3 = call <16 x i8> @llvm.arm64.neon.sqneg.v16i8(<16 x i8> %tmp1) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqneg.v16i8(<16 x i8> %tmp1) ret <16 x i8> %tmp3 } @@ -362,7 +362,7 @@ define <4 x i16> @sqneg_4h(<4 x i16>* %A) nounwind { ;CHECK-LABEL: sqneg_4h: ;CHECK: sqneg.4h %tmp1 = load <4 x i16>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.sqneg.v4i16(<4 x i16> %tmp1) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> %tmp1) ret <4 x i16> %tmp3 } @@ -370,7 +370,7 @@ define <8 x i16> @sqneg_8h(<8 x i16>* %A) nounwind { ;CHECK-LABEL: sqneg_8h: ;CHECK: sqneg.8h %tmp1 = load <8 x i16>* %A - %tmp3 = call <8 x i16> @llvm.arm64.neon.sqneg.v8i16(<8 x i16> %tmp1) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16> %tmp1) ret <8 x i16> %tmp3 } @@ -378,7 +378,7 @@ define <2 x i32> @sqneg_2s(<2 x i32>* %A) nounwind { ;CHECK-LABEL: sqneg_2s: ;CHECK: sqneg.2s %tmp1 = load <2 x i32>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.sqneg.v2i32(<2 x i32> %tmp1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32> %tmp1) ret <2 x i32> %tmp3 } @@ -386,22 +386,22 @@ define <4 x i32> @sqneg_4s(<4 x i32>* %A) nounwind { ;CHECK-LABEL: sqneg_4s: ;CHECK: sqneg.4s %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i32> @llvm.arm64.neon.sqneg.v4i32(<4 x i32> %tmp1) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32> %tmp1) ret <4 x i32> %tmp3 } -declare <8 x i8> @llvm.arm64.neon.sqneg.v8i8(<8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.sqneg.v16i8(<16 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.sqneg.v4i16(<4 x i16>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.sqneg.v8i16(<8 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.sqneg.v2i32(<2 x i32>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.sqneg.v4i32(<4 x i32>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.sqneg.v16i8(<16 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32>) nounwind readnone define <8 x i8> @abs_8b(<8 x i8>* %A) nounwind { ;CHECK-LABEL: abs_8b: ;CHECK: abs.8b %tmp1 = load <8 x i8>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.abs.v8i8(<8 x i8> %tmp1) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.abs.v8i8(<8 x i8> %tmp1) ret <8 x i8> %tmp3 } @@ -409,7 +409,7 @@ define <16 x i8> @abs_16b(<16 x i8>* %A) nounwind { ;CHECK-LABEL: abs_16b: ;CHECK: abs.16b %tmp1 = load <16 x i8>* %A - %tmp3 = call <16 x i8> @llvm.arm64.neon.abs.v16i8(<16 x i8> %tmp1) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.abs.v16i8(<16 x i8> %tmp1) ret <16 x i8> %tmp3 } @@ -417,7 +417,7 @@ define <4 x i16> @abs_4h(<4 x i16>* %A) nounwind { ;CHECK-LABEL: abs_4h: ;CHECK: abs.4h %tmp1 = load <4 x i16>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.abs.v4i16(<4 x i16> %tmp1) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16> %tmp1) ret <4 x i16> %tmp3 } @@ -425,7 +425,7 @@ define <8 x i16> @abs_8h(<8 x i16>* %A) nounwind { ;CHECK-LABEL: abs_8h: ;CHECK: abs.8h %tmp1 = load <8 x i16>* %A - %tmp3 = call <8 x i16> @llvm.arm64.neon.abs.v8i16(<8 x i16> %tmp1) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16> %tmp1) ret <8 x i16> %tmp3 } @@ -433,7 +433,7 @@ define <2 x i32> @abs_2s(<2 x i32>* %A) nounwind { ;CHECK-LABEL: abs_2s: ;CHECK: abs.2s %tmp1 = load <2 x i32>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.abs.v2i32(<2 x i32> %tmp1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32> %tmp1) ret <2 x i32> %tmp3 } @@ -441,32 +441,32 @@ define <4 x i32> @abs_4s(<4 x i32>* %A) nounwind { ;CHECK-LABEL: abs_4s: ;CHECK: abs.4s %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i32> @llvm.arm64.neon.abs.v4i32(<4 x i32> %tmp1) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32> %tmp1) ret <4 x i32> %tmp3 } define <1 x i64> @abs_1d(<1 x i64> %A) nounwind { ; CHECK-LABEL: abs_1d: ; CHECK: abs d0, d0 - %abs = call <1 x i64> @llvm.arm64.neon.abs.v1i64(<1 x i64> %A) + %abs = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> %A) ret <1 x i64> %abs } define i64 @abs_1d_honestly(i64 %A) nounwind { ; CHECK-LABEL: abs_1d_honestly: ; CHECK: abs d0, d0 - %abs = call i64 @llvm.arm64.neon.abs.i64(i64 %A) + %abs = call i64 @llvm.aarch64.neon.abs.i64(i64 %A) ret i64 %abs } -declare <8 x i8> @llvm.arm64.neon.abs.v8i8(<8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.abs.v16i8(<16 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.abs.v4i16(<4 x i16>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.abs.v8i16(<8 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.abs.v2i32(<2 x i32>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.abs.v4i32(<4 x i32>) nounwind readnone -declare <1 x i64> @llvm.arm64.neon.abs.v1i64(<1 x i64>) nounwind readnone -declare i64 @llvm.arm64.neon.abs.i64(i64) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.abs.v8i8(<8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.abs.v16i8(<16 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64>) nounwind readnone +declare i64 @llvm.aarch64.neon.abs.i64(i64) nounwind readnone define <8 x i16> @sabal8h(<8 x i8>* %A, <8 x i8>* %B, <8 x i16>* %C) nounwind { ;CHECK-LABEL: sabal8h: @@ -474,7 +474,7 @@ define <8 x i16> @sabal8h(<8 x i8>* %A, <8 x i8>* %B, <8 x i16>* %C) nounwind { %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i16>* %C - %tmp4 = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp4 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16> %tmp5 = add <8 x i16> %tmp3, %tmp4.1 ret <8 x i16> %tmp5 @@ -486,7 +486,7 @@ define <4 x i32> @sabal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i32>* %C - %tmp4 = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp4 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32> %tmp5 = add <4 x i32> %tmp3, %tmp4.1 ret <4 x i32> %tmp5 @@ -498,7 +498,7 @@ define <2 x i64> @sabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i64>* %C - %tmp4 = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp4 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64> %tmp4.1.1 = zext <2 x i32> %tmp4 to <2 x i64> %tmp5 = add <2 x i64> %tmp3, %tmp4.1 @@ -513,7 +513,7 @@ define <8 x i16> @sabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwin %tmp3 = load <8 x i16>* %C %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> - %tmp4 = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp4 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16> %tmp5 = add <8 x i16> %tmp3, %tmp4.1 ret <8 x i16> %tmp5 @@ -527,7 +527,7 @@ define <4 x i32> @sabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwin %tmp3 = load <4 x i32>* %C %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> - %tmp4 = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp4 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32> %tmp5 = add <4 x i32> %tmp3, %tmp4.1 ret <4 x i32> %tmp5 @@ -541,7 +541,7 @@ define <2 x i64> @sabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwin %tmp3 = load <2 x i64>* %C %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> - %tmp4 = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp4 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64> %tmp5 = add <2 x i64> %tmp3, %tmp4.1 ret <2 x i64> %tmp5 @@ -553,7 +553,7 @@ define <8 x i16> @uabal8h(<8 x i8>* %A, <8 x i8>* %B, <8 x i16>* %C) nounwind { %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i16>* %C - %tmp4 = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp4 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16> %tmp5 = add <8 x i16> %tmp3, %tmp4.1 ret <8 x i16> %tmp5 @@ -565,7 +565,7 @@ define <4 x i32> @uabal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i32>* %C - %tmp4 = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp4 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32> %tmp5 = add <4 x i32> %tmp3, %tmp4.1 ret <4 x i32> %tmp5 @@ -577,7 +577,7 @@ define <2 x i64> @uabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i64>* %C - %tmp4 = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp4 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64> %tmp5 = add <2 x i64> %tmp3, %tmp4.1 ret <2 x i64> %tmp5 @@ -591,7 +591,7 @@ define <8 x i16> @uabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwin %tmp3 = load <8 x i16>* %C %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> - %tmp4 = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp4 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16> %tmp5 = add <8 x i16> %tmp3, %tmp4.1 ret <8 x i16> %tmp5 @@ -605,7 +605,7 @@ define <4 x i32> @uabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwin %tmp3 = load <4 x i32>* %C %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> - %tmp4 = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp4 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32> %tmp5 = add <4 x i32> %tmp3, %tmp4.1 ret <4 x i32> %tmp5 @@ -619,7 +619,7 @@ define <2 x i64> @uabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwin %tmp3 = load <2 x i64>* %C %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> - %tmp4 = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp4 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64> %tmp5 = add <2 x i64> %tmp3, %tmp4.1 ret <2 x i64> %tmp5 @@ -630,7 +630,7 @@ define <8 x i8> @saba_8b(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { ;CHECK: saba.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) %tmp4 = load <8 x i8>* %C %tmp5 = add <8 x i8> %tmp3, %tmp4 ret <8 x i8> %tmp5 @@ -641,7 +641,7 @@ define <16 x i8> @saba_16b(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind ;CHECK: saba.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.sabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) %tmp4 = load <16 x i8>* %C %tmp5 = add <16 x i8> %tmp3, %tmp4 ret <16 x i8> %tmp5 @@ -652,7 +652,7 @@ define <4 x i16> @saba_4h(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind ;CHECK: saba.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) %tmp4 = load <4 x i16>* %C %tmp5 = add <4 x i16> %tmp3, %tmp4 ret <4 x i16> %tmp5 @@ -663,7 +663,7 @@ define <8 x i16> @saba_8h(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind ;CHECK: saba.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.sabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) %tmp4 = load <8 x i16>* %C %tmp5 = add <8 x i16> %tmp3, %tmp4 ret <8 x i16> %tmp5 @@ -674,7 +674,7 @@ define <2 x i32> @saba_2s(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind ;CHECK: saba.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) %tmp4 = load <2 x i32>* %C %tmp5 = add <2 x i32> %tmp3, %tmp4 ret <2 x i32> %tmp5 @@ -685,7 +685,7 @@ define <4 x i32> @saba_4s(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind ;CHECK: saba.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.sabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) %tmp4 = load <4 x i32>* %C %tmp5 = add <4 x i32> %tmp3, %tmp4 ret <4 x i32> %tmp5 @@ -696,7 +696,7 @@ define <8 x i8> @uaba_8b(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { ;CHECK: uaba.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) %tmp4 = load <8 x i8>* %C %tmp5 = add <8 x i8> %tmp3, %tmp4 ret <8 x i8> %tmp5 @@ -707,7 +707,7 @@ define <16 x i8> @uaba_16b(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind ;CHECK: uaba.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.uabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) %tmp4 = load <16 x i8>* %C %tmp5 = add <16 x i8> %tmp3, %tmp4 ret <16 x i8> %tmp5 @@ -718,7 +718,7 @@ define <4 x i16> @uaba_4h(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind ;CHECK: uaba.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) %tmp4 = load <4 x i16>* %C %tmp5 = add <4 x i16> %tmp3, %tmp4 ret <4 x i16> %tmp5 @@ -729,7 +729,7 @@ define <8 x i16> @uaba_8h(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind ;CHECK: uaba.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.uabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) %tmp4 = load <8 x i16>* %C %tmp5 = add <8 x i16> %tmp3, %tmp4 ret <8 x i16> %tmp5 @@ -740,7 +740,7 @@ define <2 x i32> @uaba_2s(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind ;CHECK: uaba.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) %tmp4 = load <2 x i32>* %C %tmp5 = add <2 x i32> %tmp3, %tmp4 ret <2 x i32> %tmp5 @@ -751,7 +751,7 @@ define <4 x i32> @uaba_4s(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind ;CHECK: uaba.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.uabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) %tmp4 = load <4 x i32>* %C %tmp5 = add <4 x i32> %tmp3, %tmp4 ret <4 x i32> %tmp5 @@ -761,19 +761,19 @@ define <4 x i32> @uaba_4s(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind define float @fabds(float %a, float %b) nounwind { ; CHECK-LABEL: fabds: ; CHECK: fabd s0, s0, s1 - %vabd.i = tail call float @llvm.arm64.sisd.fabd.f32(float %a, float %b) nounwind + %vabd.i = tail call float @llvm.aarch64.sisd.fabd.f32(float %a, float %b) nounwind ret float %vabd.i } define double @fabdd(double %a, double %b) nounwind { ; CHECK-LABEL: fabdd: ; CHECK: fabd d0, d0, d1 - %vabd.i = tail call double @llvm.arm64.sisd.fabd.f64(double %a, double %b) nounwind + %vabd.i = tail call double @llvm.aarch64.sisd.fabd.f64(double %a, double %b) nounwind ret double %vabd.i } -declare double @llvm.arm64.sisd.fabd.f64(double, double) nounwind readnone -declare float @llvm.arm64.sisd.fabd.f32(float, float) nounwind readnone +declare double @llvm.aarch64.sisd.fabd.f64(double, double) nounwind readnone +declare float @llvm.aarch64.sisd.fabd.f32(float, float) nounwind readnone define <2 x i64> @uabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { ; CHECK-LABEL: uabdl_from_extract_dup: @@ -784,7 +784,7 @@ define <2 x i64> @uabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> - %res = tail call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind + %res = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind %res1 = zext <2 x i32> %res to <2 x i64> ret <2 x i64> %res1 } @@ -798,7 +798,7 @@ define <2 x i64> @sabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> - %res = tail call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind + %res = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind %res1 = zext <2 x i32> %res to <2 x i64> ret <2 x i64> %res1 } diff --git a/test/CodeGen/ARM64/vadd.ll b/test/CodeGen/AArch64/arm64-vadd.ll similarity index 80% rename from test/CodeGen/ARM64/vadd.ll rename to test/CodeGen/AArch64/arm64-vadd.ll index f674c6de3390..9ed8aa6d7c5d 100644 --- a/test/CodeGen/ARM64/vadd.ll +++ b/test/CodeGen/AArch64/arm64-vadd.ll @@ -1,11 +1,11 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -asm-verbose=false | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s define <8 x i8> @addhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK-LABEL: addhn8b: ;CHECK: addhn.8b %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.addhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i8> %tmp3 } @@ -14,7 +14,7 @@ define <4 x i16> @addhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: addhn.4h %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i16> %tmp3 } @@ -23,7 +23,7 @@ define <2 x i32> @addhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: addhn.2s %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.addhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.addhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i32> %tmp3 } @@ -31,8 +31,8 @@ define <16 x i8> @addhn2_16b(<8 x i16> %a, <8 x i16> %b) nounwind { ;CHECK-LABEL: addhn2_16b: ;CHECK: addhn.8b ;CHECK-NEXT: addhn2.16b - %vaddhn2.i = tail call <8 x i8> @llvm.arm64.neon.addhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind - %vaddhn_high2.i = tail call <8 x i8> @llvm.arm64.neon.addhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind + %vaddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind + %vaddhn_high2.i = tail call <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind %res = shufflevector <8 x i8> %vaddhn2.i, <8 x i8> %vaddhn_high2.i, <16 x i32> ret <16 x i8> %res } @@ -41,8 +41,8 @@ define <8 x i16> @addhn2_8h(<4 x i32> %a, <4 x i32> %b) nounwind { ;CHECK-LABEL: addhn2_8h: ;CHECK: addhn.4h ;CHECK-NEXT: addhn2.8h - %vaddhn2.i = tail call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind - %vaddhn_high3.i = tail call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind + %vaddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind + %vaddhn_high3.i = tail call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind %res = shufflevector <4 x i16> %vaddhn2.i, <4 x i16> %vaddhn_high3.i, <8 x i32> ret <8 x i16> %res } @@ -51,15 +51,15 @@ define <4 x i32> @addhn2_4s(<2 x i64> %a, <2 x i64> %b) nounwind { ;CHECK-LABEL: addhn2_4s: ;CHECK: addhn.2s ;CHECK-NEXT: addhn2.4s - %vaddhn2.i = tail call <2 x i32> @llvm.arm64.neon.addhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind - %vaddhn_high3.i = tail call <2 x i32> @llvm.arm64.neon.addhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind + %vaddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.addhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind + %vaddhn_high3.i = tail call <2 x i32> @llvm.aarch64.neon.addhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind %res = shufflevector <2 x i32> %vaddhn2.i, <2 x i32> %vaddhn_high3.i, <4 x i32> ret <4 x i32> %res } -declare <2 x i32> @llvm.arm64.neon.addhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.addhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.addhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone define <8 x i8> @raddhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind { @@ -67,7 +67,7 @@ define <8 x i8> @raddhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: raddhn.8b %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i8> %tmp3 } @@ -76,7 +76,7 @@ define <4 x i16> @raddhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: raddhn.4h %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i16> %tmp3 } @@ -85,7 +85,7 @@ define <2 x i32> @raddhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: raddhn.2s %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i32> %tmp3 } @@ -93,8 +93,8 @@ define <16 x i8> @raddhn2_16b(<8 x i16> %a, <8 x i16> %b) nounwind { ;CHECK-LABEL: raddhn2_16b: ;CHECK: raddhn.8b ;CHECK-NEXT: raddhn2.16b - %vraddhn2.i = tail call <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind - %vraddhn_high2.i = tail call <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind + %vraddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind + %vraddhn_high2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind %res = shufflevector <8 x i8> %vraddhn2.i, <8 x i8> %vraddhn_high2.i, <16 x i32> ret <16 x i8> %res } @@ -103,8 +103,8 @@ define <8 x i16> @raddhn2_8h(<4 x i32> %a, <4 x i32> %b) nounwind { ;CHECK-LABEL: raddhn2_8h: ;CHECK: raddhn.4h ;CHECK-NEXT: raddhn2.8h - %vraddhn2.i = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind - %vraddhn_high3.i = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind + %vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind + %vraddhn_high3.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind %res = shufflevector <4 x i16> %vraddhn2.i, <4 x i16> %vraddhn_high3.i, <8 x i32> ret <8 x i16> %res } @@ -113,15 +113,15 @@ define <4 x i32> @raddhn2_4s(<2 x i64> %a, <2 x i64> %b) nounwind { ;CHECK-LABEL: raddhn2_4s: ;CHECK: raddhn.2s ;CHECK-NEXT: raddhn2.4s - %vraddhn2.i = tail call <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind - %vraddhn_high3.i = tail call <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind + %vraddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind + %vraddhn_high3.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind %res = shufflevector <2 x i32> %vraddhn2.i, <2 x i32> %vraddhn_high3.i, <4 x i32> ret <4 x i32> %res } -declare <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone define <8 x i16> @saddl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: saddl8h: @@ -428,7 +428,7 @@ define <4 x i16> @saddlp4h(<8 x i8>* %A) nounwind { ;CHECK-LABEL: saddlp4h: ;CHECK: saddlp.4h %tmp1 = load <8 x i8>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.saddlp.v4i16.v8i8(<8 x i8> %tmp1) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %tmp1) ret <4 x i16> %tmp3 } @@ -436,7 +436,7 @@ define <2 x i32> @saddlp2s(<4 x i16>* %A) nounwind { ;CHECK-LABEL: saddlp2s: ;CHECK: saddlp.2s %tmp1 = load <4 x i16>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.saddlp.v2i32.v4i16(<4 x i16> %tmp1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> %tmp1) ret <2 x i32> %tmp3 } @@ -444,7 +444,7 @@ define <1 x i64> @saddlp1d(<2 x i32>* %A) nounwind { ;CHECK-LABEL: saddlp1d: ;CHECK: saddlp.1d %tmp1 = load <2 x i32>* %A - %tmp3 = call <1 x i64> @llvm.arm64.neon.saddlp.v1i64.v2i32(<2 x i32> %tmp1) + %tmp3 = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> %tmp1) ret <1 x i64> %tmp3 } @@ -452,7 +452,7 @@ define <8 x i16> @saddlp8h(<16 x i8>* %A) nounwind { ;CHECK-LABEL: saddlp8h: ;CHECK: saddlp.8h %tmp1 = load <16 x i8>* %A - %tmp3 = call <8 x i16> @llvm.arm64.neon.saddlp.v8i16.v16i8(<16 x i8> %tmp1) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %tmp1) ret <8 x i16> %tmp3 } @@ -460,7 +460,7 @@ define <4 x i32> @saddlp4s(<8 x i16>* %A) nounwind { ;CHECK-LABEL: saddlp4s: ;CHECK: saddlp.4s %tmp1 = load <8 x i16>* %A - %tmp3 = call <4 x i32> @llvm.arm64.neon.saddlp.v4i32.v8i16(<8 x i16> %tmp1) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> %tmp1) ret <4 x i32> %tmp3 } @@ -468,23 +468,23 @@ define <2 x i64> @saddlp2d(<4 x i32>* %A) nounwind { ;CHECK-LABEL: saddlp2d: ;CHECK: saddlp.2d %tmp1 = load <4 x i32>* %A - %tmp3 = call <2 x i64> @llvm.arm64.neon.saddlp.v2i64.v4i32(<4 x i32> %tmp1) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> %tmp1) ret <2 x i64> %tmp3 } -declare <4 x i16> @llvm.arm64.neon.saddlp.v4i16.v8i8(<8 x i8>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.saddlp.v2i32.v4i16(<4 x i16>) nounwind readnone -declare <1 x i64> @llvm.arm64.neon.saddlp.v1i64.v2i32(<2 x i32>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.saddlp.v8i16.v16i8(<16 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.saddlp.v4i32.v8i16(<8 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.saddlp.v2i64.v4i32(<4 x i32>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32>) nounwind readnone define <4 x i16> @uaddlp4h(<8 x i8>* %A) nounwind { ;CHECK-LABEL: uaddlp4h: ;CHECK: uaddlp.4h %tmp1 = load <8 x i8>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.uaddlp.v4i16.v8i8(<8 x i8> %tmp1) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %tmp1) ret <4 x i16> %tmp3 } @@ -492,7 +492,7 @@ define <2 x i32> @uaddlp2s(<4 x i16>* %A) nounwind { ;CHECK-LABEL: uaddlp2s: ;CHECK: uaddlp.2s %tmp1 = load <4 x i16>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.uaddlp.v2i32.v4i16(<4 x i16> %tmp1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> %tmp1) ret <2 x i32> %tmp3 } @@ -500,7 +500,7 @@ define <1 x i64> @uaddlp1d(<2 x i32>* %A) nounwind { ;CHECK-LABEL: uaddlp1d: ;CHECK: uaddlp.1d %tmp1 = load <2 x i32>* %A - %tmp3 = call <1 x i64> @llvm.arm64.neon.uaddlp.v1i64.v2i32(<2 x i32> %tmp1) + %tmp3 = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> %tmp1) ret <1 x i64> %tmp3 } @@ -508,7 +508,7 @@ define <8 x i16> @uaddlp8h(<16 x i8>* %A) nounwind { ;CHECK-LABEL: uaddlp8h: ;CHECK: uaddlp.8h %tmp1 = load <16 x i8>* %A - %tmp3 = call <8 x i16> @llvm.arm64.neon.uaddlp.v8i16.v16i8(<16 x i8> %tmp1) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %tmp1) ret <8 x i16> %tmp3 } @@ -516,7 +516,7 @@ define <4 x i32> @uaddlp4s(<8 x i16>* %A) nounwind { ;CHECK-LABEL: uaddlp4s: ;CHECK: uaddlp.4s %tmp1 = load <8 x i16>* %A - %tmp3 = call <4 x i32> @llvm.arm64.neon.uaddlp.v4i32.v8i16(<8 x i16> %tmp1) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %tmp1) ret <4 x i32> %tmp3 } @@ -524,23 +524,23 @@ define <2 x i64> @uaddlp2d(<4 x i32>* %A) nounwind { ;CHECK-LABEL: uaddlp2d: ;CHECK: uaddlp.2d %tmp1 = load <4 x i32>* %A - %tmp3 = call <2 x i64> @llvm.arm64.neon.uaddlp.v2i64.v4i32(<4 x i32> %tmp1) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %tmp1) ret <2 x i64> %tmp3 } -declare <4 x i16> @llvm.arm64.neon.uaddlp.v4i16.v8i8(<8 x i8>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.uaddlp.v2i32.v4i16(<4 x i16>) nounwind readnone -declare <1 x i64> @llvm.arm64.neon.uaddlp.v1i64.v2i32(<2 x i32>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.uaddlp.v8i16.v16i8(<16 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.uaddlp.v4i32.v8i16(<8 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.uaddlp.v2i64.v4i32(<4 x i32>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32>) nounwind readnone define <4 x i16> @sadalp4h(<8 x i8>* %A, <4 x i16>* %B) nounwind { ;CHECK-LABEL: sadalp4h: ;CHECK: sadalp.4h %tmp1 = load <8 x i8>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.saddlp.v4i16.v8i8(<8 x i8> %tmp1) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %tmp1) %tmp4 = load <4 x i16>* %B %tmp5 = add <4 x i16> %tmp3, %tmp4 ret <4 x i16> %tmp5 @@ -550,7 +550,7 @@ define <2 x i32> @sadalp2s(<4 x i16>* %A, <2 x i32>* %B) nounwind { ;CHECK-LABEL: sadalp2s: ;CHECK: sadalp.2s %tmp1 = load <4 x i16>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.saddlp.v2i32.v4i16(<4 x i16> %tmp1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> %tmp1) %tmp4 = load <2 x i32>* %B %tmp5 = add <2 x i32> %tmp3, %tmp4 ret <2 x i32> %tmp5 @@ -560,7 +560,7 @@ define <8 x i16> @sadalp8h(<16 x i8>* %A, <8 x i16>* %B) nounwind { ;CHECK-LABEL: sadalp8h: ;CHECK: sadalp.8h %tmp1 = load <16 x i8>* %A - %tmp3 = call <8 x i16> @llvm.arm64.neon.saddlp.v8i16.v16i8(<16 x i8> %tmp1) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %tmp1) %tmp4 = load <8 x i16>* %B %tmp5 = add <8 x i16> %tmp3, %tmp4 ret <8 x i16> %tmp5 @@ -570,7 +570,7 @@ define <4 x i32> @sadalp4s(<8 x i16>* %A, <4 x i32>* %B) nounwind { ;CHECK-LABEL: sadalp4s: ;CHECK: sadalp.4s %tmp1 = load <8 x i16>* %A - %tmp3 = call <4 x i32> @llvm.arm64.neon.saddlp.v4i32.v8i16(<8 x i16> %tmp1) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> %tmp1) %tmp4 = load <4 x i32>* %B %tmp5 = add <4 x i32> %tmp3, %tmp4 ret <4 x i32> %tmp5 @@ -580,7 +580,7 @@ define <2 x i64> @sadalp2d(<4 x i32>* %A, <2 x i64>* %B) nounwind { ;CHECK-LABEL: sadalp2d: ;CHECK: sadalp.2d %tmp1 = load <4 x i32>* %A - %tmp3 = call <2 x i64> @llvm.arm64.neon.saddlp.v2i64.v4i32(<4 x i32> %tmp1) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> %tmp1) %tmp4 = load <2 x i64>* %B %tmp5 = add <2 x i64> %tmp3, %tmp4 ret <2 x i64> %tmp5 @@ -590,7 +590,7 @@ define <4 x i16> @uadalp4h(<8 x i8>* %A, <4 x i16>* %B) nounwind { ;CHECK-LABEL: uadalp4h: ;CHECK: uadalp.4h %tmp1 = load <8 x i8>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.uaddlp.v4i16.v8i8(<8 x i8> %tmp1) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %tmp1) %tmp4 = load <4 x i16>* %B %tmp5 = add <4 x i16> %tmp3, %tmp4 ret <4 x i16> %tmp5 @@ -600,7 +600,7 @@ define <2 x i32> @uadalp2s(<4 x i16>* %A, <2 x i32>* %B) nounwind { ;CHECK-LABEL: uadalp2s: ;CHECK: uadalp.2s %tmp1 = load <4 x i16>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.uaddlp.v2i32.v4i16(<4 x i16> %tmp1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> %tmp1) %tmp4 = load <2 x i32>* %B %tmp5 = add <2 x i32> %tmp3, %tmp4 ret <2 x i32> %tmp5 @@ -610,7 +610,7 @@ define <8 x i16> @uadalp8h(<16 x i8>* %A, <8 x i16>* %B) nounwind { ;CHECK-LABEL: uadalp8h: ;CHECK: uadalp.8h %tmp1 = load <16 x i8>* %A - %tmp3 = call <8 x i16> @llvm.arm64.neon.uaddlp.v8i16.v16i8(<16 x i8> %tmp1) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %tmp1) %tmp4 = load <8 x i16>* %B %tmp5 = add <8 x i16> %tmp3, %tmp4 ret <8 x i16> %tmp5 @@ -620,7 +620,7 @@ define <4 x i32> @uadalp4s(<8 x i16>* %A, <4 x i32>* %B) nounwind { ;CHECK-LABEL: uadalp4s: ;CHECK: uadalp.4s %tmp1 = load <8 x i16>* %A - %tmp3 = call <4 x i32> @llvm.arm64.neon.uaddlp.v4i32.v8i16(<8 x i16> %tmp1) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %tmp1) %tmp4 = load <4 x i32>* %B %tmp5 = add <4 x i32> %tmp3, %tmp4 ret <4 x i32> %tmp5 @@ -630,7 +630,7 @@ define <2 x i64> @uadalp2d(<4 x i32>* %A, <2 x i64>* %B) nounwind { ;CHECK-LABEL: uadalp2d: ;CHECK: uadalp.2d %tmp1 = load <4 x i32>* %A - %tmp3 = call <2 x i64> @llvm.arm64.neon.uaddlp.v2i64.v4i32(<4 x i32> %tmp1) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %tmp1) %tmp4 = load <2 x i64>* %B %tmp5 = add <2 x i64> %tmp3, %tmp4 ret <2 x i64> %tmp5 @@ -641,7 +641,7 @@ define <8 x i8> @addp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: addp.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.addp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -650,7 +650,7 @@ define <16 x i8> @addp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: addp.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.addp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -659,7 +659,7 @@ define <4 x i16> @addp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: addp.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.addp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -668,7 +668,7 @@ define <8 x i16> @addp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: addp.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.addp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -677,7 +677,7 @@ define <2 x i32> @addp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: addp.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.addp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -686,7 +686,7 @@ define <4 x i32> @addp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: addp.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.addp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } @@ -695,24 +695,24 @@ define <2 x i64> @addp_2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: addp.2d %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B - %tmp3 = call <2 x i64> @llvm.arm64.neon.addp.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i64> %tmp3 } -declare <8 x i8> @llvm.arm64.neon.addp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.addp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.addp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.addp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.addp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.addp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.addp.v2i64(<2 x i64>, <2 x i64>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>) nounwind readnone define <2 x float> @faddp_2s(<2 x float>* %A, <2 x float>* %B) nounwind { ;CHECK-LABEL: faddp_2s: ;CHECK: faddp.2s %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = call <2 x float> @llvm.arm64.neon.addp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) + %tmp3 = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) ret <2 x float> %tmp3 } @@ -721,7 +721,7 @@ define <4 x float> @faddp_4s(<4 x float>* %A, <4 x float>* %B) nounwind { ;CHECK: faddp.4s %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B - %tmp3 = call <4 x float> @llvm.arm64.neon.addp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) + %tmp3 = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) ret <4 x float> %tmp3 } @@ -730,13 +730,13 @@ define <2 x double> @faddp_2d(<2 x double>* %A, <2 x double>* %B) nounwind { ;CHECK: faddp.2d %tmp1 = load <2 x double>* %A %tmp2 = load <2 x double>* %B - %tmp3 = call <2 x double> @llvm.arm64.neon.addp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) + %tmp3 = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) ret <2 x double> %tmp3 } -declare <2 x float> @llvm.arm64.neon.addp.v2f32(<2 x float>, <2 x float>) nounwind readnone -declare <4 x float> @llvm.arm64.neon.addp.v4f32(<4 x float>, <4 x float>) nounwind readnone -declare <2 x double> @llvm.arm64.neon.addp.v2f64(<2 x double>, <2 x double>) nounwind readnone +declare <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float>, <4 x float>) nounwind readnone +declare <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double>, <2 x double>) nounwind readnone define <2 x i64> @uaddl2_duprhs(<4 x i32> %lhs, i32 %rhs) { ; CHECK-LABEL: uaddl2_duprhs diff --git a/test/CodeGen/ARM64/vaddlv.ll b/test/CodeGen/AArch64/arm64-vaddlv.ll similarity index 55% rename from test/CodeGen/ARM64/vaddlv.ll rename to test/CodeGen/AArch64/arm64-vaddlv.ll index d4d4608ba076..2d6413812ec8 100644 --- a/test/CodeGen/ARM64/vaddlv.ll +++ b/test/CodeGen/AArch64/arm64-vaddlv.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s +; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s define i64 @test_vaddlv_s32(<2 x i32> %a1) nounwind readnone { ; CHECK: test_vaddlv_s32 @@ -6,7 +6,7 @@ define i64 @test_vaddlv_s32(<2 x i32> %a1) nounwind readnone { ; CHECK-NEXT: fmov x[[OUTREG:[0-9]+]], d[[REGNUM]] ; CHECK-NEXT: ret entry: - %vaddlv.i = tail call i64 @llvm.arm64.neon.saddlv.i64.v2i32(<2 x i32> %a1) nounwind + %vaddlv.i = tail call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> %a1) nounwind ret i64 %vaddlv.i } @@ -16,11 +16,11 @@ define i64 @test_vaddlv_u32(<2 x i32> %a1) nounwind readnone { ; CHECK-NEXT: fmov x[[OUTREG:[0-9]+]], d[[REGNUM]] ; CHECK-NEXT: ret entry: - %vaddlv.i = tail call i64 @llvm.arm64.neon.uaddlv.i64.v2i32(<2 x i32> %a1) nounwind + %vaddlv.i = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> %a1) nounwind ret i64 %vaddlv.i } -declare i64 @llvm.arm64.neon.uaddlv.i64.v2i32(<2 x i32>) nounwind readnone +declare i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32>) nounwind readnone -declare i64 @llvm.arm64.neon.saddlv.i64.v2i32(<2 x i32>) nounwind readnone +declare i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM64/vaddv.ll b/test/CodeGen/AArch64/arm64-vaddv.ll similarity index 63% rename from test/CodeGen/ARM64/vaddv.ll rename to test/CodeGen/AArch64/arm64-vaddv.ll index 154f91779375..2d92ce6ea570 100644 --- a/test/CodeGen/ARM64/vaddv.ll +++ b/test/CodeGen/AArch64/arm64-vaddv.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s -mcpu=cyclone | FileCheck %s +; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s -mcpu=cyclone | FileCheck %s define signext i8 @test_vaddv_s8(<8 x i8> %a1) { ; CHECK-LABEL: test_vaddv_s8: @@ -6,7 +6,7 @@ define signext i8 @test_vaddv_s8(<8 x i8> %a1) { ; CHECK-NEXT: smov.b w0, v[[REGNUM]][0] ; CHECK-NEXT: ret entry: - %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v8i8(<8 x i8> %a1) + %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a1) %0 = trunc i32 %vaddv.i to i8 ret i8 %0 } @@ -17,7 +17,7 @@ define signext i16 @test_vaddv_s16(<4 x i16> %a1) { ; CHECK-NEXT: smov.h w0, v[[REGNUM]][0] ; CHECK-NEXT: ret entry: - %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v4i16(<4 x i16> %a1) + %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a1) %0 = trunc i32 %vaddv.i to i16 ret i16 %0 } @@ -29,7 +29,7 @@ define i32 @test_vaddv_s32(<2 x i32> %a1) { ; CHECK-NEXT: fmov w0, s[[REGNUM]] ; CHECK-NEXT: ret entry: - %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v2i32(<2 x i32> %a1) + %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a1) ret i32 %vaddv.i } @@ -39,7 +39,7 @@ define i64 @test_vaddv_s64(<2 x i64> %a1) { ; CHECK-NEXT: fmov x0, [[REGNUM]] ; CHECK-NEXT: ret entry: - %vaddv.i = tail call i64 @llvm.arm64.neon.saddv.i64.v2i64(<2 x i64> %a1) + %vaddv.i = tail call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> %a1) ret i64 %vaddv.i } @@ -49,7 +49,7 @@ define zeroext i8 @test_vaddv_u8(<8 x i8> %a1) { ; CHECK-NEXT: fmov w0, s[[REGNUM]] ; CHECK-NEXT: ret entry: - %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v8i8(<8 x i8> %a1) + %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> %a1) %0 = trunc i32 %vaddv.i to i8 ret i8 %0 } @@ -60,7 +60,7 @@ define i32 @test_vaddv_u8_masked(<8 x i8> %a1) { ; CHECK-NEXT: fmov w0, s[[REGNUM]] ; CHECK-NEXT: ret entry: - %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v8i8(<8 x i8> %a1) + %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> %a1) %0 = and i32 %vaddv.i, 511 ; 0x1ff ret i32 %0 } @@ -71,7 +71,7 @@ define zeroext i16 @test_vaddv_u16(<4 x i16> %a1) { ; CHECK-NEXT: fmov w0, s[[REGNUM]] ; CHECK-NEXT: ret entry: - %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v4i16(<4 x i16> %a1) + %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> %a1) %0 = trunc i32 %vaddv.i to i16 ret i16 %0 } @@ -82,7 +82,7 @@ define i32 @test_vaddv_u16_masked(<4 x i16> %a1) { ; CHECK-NEXT: fmov w0, s[[REGNUM]] ; CHECK-NEXT: ret entry: - %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v4i16(<4 x i16> %a1) + %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> %a1) %0 = and i32 %vaddv.i, 3276799 ; 0x31ffff ret i32 %0 } @@ -94,7 +94,7 @@ define i32 @test_vaddv_u32(<2 x i32> %a1) { ; CHECK-NEXT: fmov w0, s[[REGNUM]] ; CHECK-NEXT: ret entry: - %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v2i32(<2 x i32> %a1) + %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> %a1) ret i32 %vaddv.i } @@ -103,7 +103,7 @@ define float @test_vaddv_f32(<2 x float> %a1) { ; CHECK: faddp.2s s0, v0 ; CHECK-NEXT: ret entry: - %vaddv.i = tail call float @llvm.arm64.neon.faddv.f32.v2f32(<2 x float> %a1) + %vaddv.i = tail call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %a1) ret float %vaddv.i } @@ -113,7 +113,7 @@ define float @test_vaddv_v4f32(<4 x float> %a1) { ; CHECK: faddp.2s s0, [[REGNUM]] ; CHECK-NEXT: ret entry: - %vaddv.i = tail call float @llvm.arm64.neon.faddv.f32.v4f32(<4 x float> %a1) + %vaddv.i = tail call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %a1) ret float %vaddv.i } @@ -122,7 +122,7 @@ define double @test_vaddv_f64(<2 x double> %a1) { ; CHECK: faddp.2d d0, v0 ; CHECK-NEXT: ret entry: - %vaddv.i = tail call double @llvm.arm64.neon.faddv.f64.v2f64(<2 x double> %a1) + %vaddv.i = tail call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %a1) ret double %vaddv.i } @@ -132,7 +132,7 @@ define i64 @test_vaddv_u64(<2 x i64> %a1) { ; CHECK-NEXT: fmov x0, [[REGNUM]] ; CHECK-NEXT: ret entry: - %vaddv.i = tail call i64 @llvm.arm64.neon.uaddv.i64.v2i64(<2 x i64> %a1) + %vaddv.i = tail call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a1) ret i64 %vaddv.i } @@ -143,7 +143,7 @@ define <1 x i64> @test_vaddv_u64_to_vec(<2 x i64> %a1) { ; CHECK-NOT: ins ; CHECK: ret entry: - %vaddv.i = tail call i64 @llvm.arm64.neon.uaddv.i64.v2i64(<2 x i64> %a1) + %vaddv.i = tail call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a1) %vec = insertelement <1 x i64> undef, i64 %vaddv.i, i32 0 ret <1 x i64> %vec } @@ -154,7 +154,7 @@ define signext i8 @test_vaddvq_s8(<16 x i8> %a1) { ; CHECK-NEXT: smov.b w0, v[[REGNUM]][0] ; CHECK-NEXT: ret entry: - %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v16i8(<16 x i8> %a1) + %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a1) %0 = trunc i32 %vaddv.i to i8 ret i8 %0 } @@ -165,7 +165,7 @@ define signext i16 @test_vaddvq_s16(<8 x i16> %a1) { ; CHECK-NEXT: smov.h w0, v[[REGNUM]][0] ; CHECK-NEXT: ret entry: - %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v8i16(<8 x i16> %a1) + %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a1) %0 = trunc i32 %vaddv.i to i16 ret i16 %0 } @@ -176,7 +176,7 @@ define i32 @test_vaddvq_s32(<4 x i32> %a1) { ; CHECK-NEXT: fmov w0, [[REGNUM]] ; CHECK-NEXT: ret entry: - %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v4i32(<4 x i32> %a1) + %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a1) ret i32 %vaddv.i } @@ -186,7 +186,7 @@ define zeroext i8 @test_vaddvq_u8(<16 x i8> %a1) { ; CHECK-NEXT: fmov w0, s[[REGNUM]] ; CHECK-NEXT: ret entry: - %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v16i8(<16 x i8> %a1) + %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8> %a1) %0 = trunc i32 %vaddv.i to i8 ret i8 %0 } @@ -197,7 +197,7 @@ define zeroext i16 @test_vaddvq_u16(<8 x i16> %a1) { ; CHECK-NEXT: fmov w0, s[[REGNUM]] ; CHECK-NEXT: ret entry: - %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v8i16(<8 x i16> %a1) + %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> %a1) %0 = trunc i32 %vaddv.i to i16 ret i16 %0 } @@ -208,38 +208,38 @@ define i32 @test_vaddvq_u32(<4 x i32> %a1) { ; CHECK-NEXT: fmov [[FMOVRES:w[0-9]+]], [[REGNUM]] ; CHECK-NEXT: ret entry: - %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v4i32(<4 x i32> %a1) + %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> %a1) ret i32 %vaddv.i } -declare i32 @llvm.arm64.neon.uaddv.i32.v4i32(<4 x i32>) +declare i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32>) -declare i32 @llvm.arm64.neon.uaddv.i32.v8i16(<8 x i16>) +declare i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16>) -declare i32 @llvm.arm64.neon.uaddv.i32.v16i8(<16 x i8>) +declare i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8>) -declare i32 @llvm.arm64.neon.saddv.i32.v4i32(<4 x i32>) +declare i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32>) -declare i32 @llvm.arm64.neon.saddv.i32.v8i16(<8 x i16>) +declare i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16>) -declare i32 @llvm.arm64.neon.saddv.i32.v16i8(<16 x i8>) +declare i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8>) -declare i64 @llvm.arm64.neon.uaddv.i64.v2i64(<2 x i64>) +declare i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64>) -declare i32 @llvm.arm64.neon.uaddv.i32.v2i32(<2 x i32>) +declare i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32>) -declare i32 @llvm.arm64.neon.uaddv.i32.v4i16(<4 x i16>) +declare i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16>) -declare i32 @llvm.arm64.neon.uaddv.i32.v8i8(<8 x i8>) +declare i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8>) -declare i32 @llvm.arm64.neon.saddv.i32.v2i32(<2 x i32>) +declare i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32>) -declare i64 @llvm.arm64.neon.saddv.i64.v2i64(<2 x i64>) +declare i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64>) -declare i32 @llvm.arm64.neon.saddv.i32.v4i16(<4 x i16>) +declare i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16>) -declare i32 @llvm.arm64.neon.saddv.i32.v8i8(<8 x i8>) +declare i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8>) -declare float @llvm.arm64.neon.faddv.f32.v2f32(<2 x float> %a1) -declare float @llvm.arm64.neon.faddv.f32.v4f32(<4 x float> %a1) -declare double @llvm.arm64.neon.faddv.f64.v2f64(<2 x double> %a1) +declare float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %a1) +declare float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %a1) +declare double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %a1) diff --git a/test/CodeGen/ARM64/variadic-aapcs.ll b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll similarity index 100% rename from test/CodeGen/ARM64/variadic-aapcs.ll rename to test/CodeGen/AArch64/arm64-variadic-aapcs.ll diff --git a/test/CodeGen/ARM64/vbitwise.ll b/test/CodeGen/AArch64/arm64-vbitwise.ll similarity index 86% rename from test/CodeGen/ARM64/vbitwise.ll rename to test/CodeGen/AArch64/arm64-vbitwise.ll index 7d8378de292d..93de95e52e53 100644 --- a/test/CodeGen/ARM64/vbitwise.ll +++ b/test/CodeGen/AArch64/arm64-vbitwise.ll @@ -1,10 +1,10 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define <8 x i8> @rbit_8b(<8 x i8>* %A) nounwind { ;CHECK-LABEL: rbit_8b: ;CHECK: rbit.8b %tmp1 = load <8 x i8>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.rbit.v8i8(<8 x i8> %tmp1) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %tmp1) ret <8 x i8> %tmp3 } @@ -12,12 +12,12 @@ define <16 x i8> @rbit_16b(<16 x i8>* %A) nounwind { ;CHECK-LABEL: rbit_16b: ;CHECK: rbit.16b %tmp1 = load <16 x i8>* %A - %tmp3 = call <16 x i8> @llvm.arm64.neon.rbit.v16i8(<16 x i8> %tmp1) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %tmp1) ret <16 x i8> %tmp3 } -declare <8 x i8> @llvm.arm64.neon.rbit.v8i8(<8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.rbit.v16i8(<16 x i8>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8>) nounwind readnone define <8 x i16> @sxtl8h(<8 x i8>* %A) nounwind { ;CHECK-LABEL: sxtl8h: diff --git a/test/CodeGen/ARM64/vclz.ll b/test/CodeGen/AArch64/arm64-vclz.ll similarity index 98% rename from test/CodeGen/ARM64/vclz.ll rename to test/CodeGen/AArch64/arm64-vclz.ll index ddc09ed85faf..cf5670a0354f 100644 --- a/test/CodeGen/ARM64/vclz.ll +++ b/test/CodeGen/AArch64/arm64-vclz.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s +; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s define <8 x i8> @test_vclz_u8(<8 x i8> %a) nounwind readnone ssp { ; CHECK-LABEL: test_vclz_u8: diff --git a/test/CodeGen/ARM64/vcmp.ll b/test/CodeGen/AArch64/arm64-vcmp.ll similarity index 76% rename from test/CodeGen/ARM64/vcmp.ll rename to test/CodeGen/AArch64/arm64-vcmp.ll index 56153f08f357..982ab09ee69e 100644 --- a/test/CodeGen/ARM64/vcmp.ll +++ b/test/CodeGen/AArch64/arm64-vcmp.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define void @fcmltz_4s(<4 x float> %a, <4 x i16>* %p) nounwind { @@ -18,7 +18,7 @@ define <2 x i32> @facge_2s(<2 x float>* %A, <2 x float>* %B) nounwind { ;CHECK: facge.2s %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.facge.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) ret <2 x i32> %tmp3 } @@ -27,7 +27,7 @@ define <4 x i32> @facge_4s(<4 x float>* %A, <4 x float>* %B) nounwind { ;CHECK: facge.4s %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.facge.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) ret <4 x i32> %tmp3 } @@ -36,20 +36,20 @@ define <2 x i64> @facge_2d(<2 x double>* %A, <2 x double>* %B) nounwind { ;CHECK: facge.2d %tmp1 = load <2 x double>* %A %tmp2 = load <2 x double>* %B - %tmp3 = call <2 x i64> @llvm.arm64.neon.facge.v2i64.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) ret <2 x i64> %tmp3 } -declare <2 x i32> @llvm.arm64.neon.facge.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.facge.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.facge.v2i64.v2f64(<2 x double>, <2 x double>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double>, <2 x double>) nounwind readnone define <2 x i32> @facgt_2s(<2 x float>* %A, <2 x float>* %B) nounwind { ;CHECK-LABEL: facgt_2s: ;CHECK: facgt.2s %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.facgt.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) ret <2 x i32> %tmp3 } @@ -58,7 +58,7 @@ define <4 x i32> @facgt_4s(<4 x float>* %A, <4 x float>* %B) nounwind { ;CHECK: facgt.4s %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.facgt.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) ret <4 x i32> %tmp3 } @@ -67,47 +67,47 @@ define <2 x i64> @facgt_2d(<2 x double>* %A, <2 x double>* %B) nounwind { ;CHECK: facgt.2d %tmp1 = load <2 x double>* %A %tmp2 = load <2 x double>* %B - %tmp3 = call <2 x i64> @llvm.arm64.neon.facgt.v2i64.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) ret <2 x i64> %tmp3 } -declare <2 x i32> @llvm.arm64.neon.facgt.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.facgt.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.facgt.v2i64.v2f64(<2 x double>, <2 x double>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double>, <2 x double>) nounwind readnone define i32 @facge_s(float %A, float %B) nounwind { ; CHECK-LABEL: facge_s: ; CHECK: facge {{s[0-9]+}}, s0, s1 - %mask = call i32 @llvm.arm64.neon.facge.i32.f32(float %A, float %B) + %mask = call i32 @llvm.aarch64.neon.facge.i32.f32(float %A, float %B) ret i32 %mask } define i64 @facge_d(double %A, double %B) nounwind { ; CHECK-LABEL: facge_d: ; CHECK: facge {{d[0-9]+}}, d0, d1 - %mask = call i64 @llvm.arm64.neon.facge.i64.f64(double %A, double %B) + %mask = call i64 @llvm.aarch64.neon.facge.i64.f64(double %A, double %B) ret i64 %mask } -declare i64 @llvm.arm64.neon.facge.i64.f64(double, double) -declare i32 @llvm.arm64.neon.facge.i32.f32(float, float) +declare i64 @llvm.aarch64.neon.facge.i64.f64(double, double) +declare i32 @llvm.aarch64.neon.facge.i32.f32(float, float) define i32 @facgt_s(float %A, float %B) nounwind { ; CHECK-LABEL: facgt_s: ; CHECK: facgt {{s[0-9]+}}, s0, s1 - %mask = call i32 @llvm.arm64.neon.facgt.i32.f32(float %A, float %B) + %mask = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %A, float %B) ret i32 %mask } define i64 @facgt_d(double %A, double %B) nounwind { ; CHECK-LABEL: facgt_d: ; CHECK: facgt {{d[0-9]+}}, d0, d1 - %mask = call i64 @llvm.arm64.neon.facgt.i64.f64(double %A, double %B) + %mask = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %A, double %B) ret i64 %mask } -declare i64 @llvm.arm64.neon.facgt.i64.f64(double, double) -declare i32 @llvm.arm64.neon.facgt.i32.f32(float, float) +declare i64 @llvm.aarch64.neon.facgt.i64.f64(double, double) +declare i32 @llvm.aarch64.neon.facgt.i32.f32(float, float) define <8 x i8> @cmtst_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: cmtst_8b: diff --git a/test/CodeGen/AArch64/arm64-vcnt.ll b/test/CodeGen/AArch64/arm64-vcnt.ll new file mode 100644 index 000000000000..903501ec16a9 --- /dev/null +++ b/test/CodeGen/AArch64/arm64-vcnt.ll @@ -0,0 +1,56 @@ +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s + +define <8 x i8> @cls_8b(<8 x i8>* %A) nounwind { +;CHECK-LABEL: cls_8b: +;CHECK: cls.8b + %tmp1 = load <8 x i8>* %A + %tmp3 = call <8 x i8> @llvm.aarch64.neon.cls.v8i8(<8 x i8> %tmp1) + ret <8 x i8> %tmp3 +} + +define <16 x i8> @cls_16b(<16 x i8>* %A) nounwind { +;CHECK-LABEL: cls_16b: +;CHECK: cls.16b + %tmp1 = load <16 x i8>* %A + %tmp3 = call <16 x i8> @llvm.aarch64.neon.cls.v16i8(<16 x i8> %tmp1) + ret <16 x i8> %tmp3 +} + +define <4 x i16> @cls_4h(<4 x i16>* %A) nounwind { +;CHECK-LABEL: cls_4h: +;CHECK: cls.4h + %tmp1 = load <4 x i16>* %A + %tmp3 = call <4 x i16> @llvm.aarch64.neon.cls.v4i16(<4 x i16> %tmp1) + ret <4 x i16> %tmp3 +} + +define <8 x i16> @cls_8h(<8 x i16>* %A) nounwind { +;CHECK-LABEL: cls_8h: +;CHECK: cls.8h + %tmp1 = load <8 x i16>* %A + %tmp3 = call <8 x i16> @llvm.aarch64.neon.cls.v8i16(<8 x i16> %tmp1) + ret <8 x i16> %tmp3 +} + +define <2 x i32> @cls_2s(<2 x i32>* %A) nounwind { +;CHECK-LABEL: cls_2s: +;CHECK: cls.2s + %tmp1 = load <2 x i32>* %A + %tmp3 = call <2 x i32> @llvm.aarch64.neon.cls.v2i32(<2 x i32> %tmp1) + ret <2 x i32> %tmp3 +} + +define <4 x i32> @cls_4s(<4 x i32>* %A) nounwind { +;CHECK-LABEL: cls_4s: +;CHECK: cls.4s + %tmp1 = load <4 x i32>* %A + %tmp3 = call <4 x i32> @llvm.aarch64.neon.cls.v4i32(<4 x i32> %tmp1) + ret <4 x i32> %tmp3 +} + +declare <8 x i8> @llvm.aarch64.neon.cls.v8i8(<8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.cls.v16i8(<16 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.cls.v4i16(<4 x i16>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.cls.v8i16(<8 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.cls.v2i32(<2 x i32>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.cls.v4i32(<4 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM64/vcombine.ll b/test/CodeGen/AArch64/arm64-vcombine.ll similarity index 90% rename from test/CodeGen/ARM64/vcombine.ll rename to test/CodeGen/AArch64/arm64-vcombine.ll index 16f591e378e1..fa1299603af3 100644 --- a/test/CodeGen/ARM64/vcombine.ll +++ b/test/CodeGen/AArch64/arm64-vcombine.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s ; LowerCONCAT_VECTORS() was reversing the order of two parts. ; rdar://11558157 diff --git a/test/CodeGen/ARM64/vcvt.ll b/test/CodeGen/AArch64/arm64-vcvt.ll similarity index 67% rename from test/CodeGen/ARM64/vcvt.ll rename to test/CodeGen/AArch64/arm64-vcvt.ll index 19bb8cb8dc5c..8c9e4e927106 100644 --- a/test/CodeGen/ARM64/vcvt.ll +++ b/test/CodeGen/AArch64/arm64-vcvt.ll @@ -1,11 +1,11 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define <2 x i32> @fcvtas_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtas_2s: ;CHECK-NOT: ld1 ;CHECK: fcvtas.2s v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtas.v2i32.v2f32(<2 x float> %A) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float> %A) ret <2 x i32> %tmp3 } @@ -14,7 +14,7 @@ define <4 x i32> @fcvtas_4s(<4 x float> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: fcvtas.4s v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtas.v4i32.v4f32(<4 x float> %A) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float> %A) ret <4 x i32> %tmp3 } @@ -23,20 +23,20 @@ define <2 x i64> @fcvtas_2d(<2 x double> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: fcvtas.2d v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtas.v2i64.v2f64(<2 x double> %A) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double> %A) ret <2 x i64> %tmp3 } -declare <2 x i32> @llvm.arm64.neon.fcvtas.v2i32.v2f32(<2 x float>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.fcvtas.v4i32.v4f32(<4 x float>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.fcvtas.v2i64.v2f64(<2 x double>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double>) nounwind readnone define <2 x i32> @fcvtau_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtau_2s: ;CHECK-NOT: ld1 ;CHECK: fcvtau.2s v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtau.v2i32.v2f32(<2 x float> %A) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float> %A) ret <2 x i32> %tmp3 } @@ -45,7 +45,7 @@ define <4 x i32> @fcvtau_4s(<4 x float> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: fcvtau.4s v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtau.v4i32.v4f32(<4 x float> %A) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float> %A) ret <4 x i32> %tmp3 } @@ -54,20 +54,20 @@ define <2 x i64> @fcvtau_2d(<2 x double> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: fcvtau.2d v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtau.v2i64.v2f64(<2 x double> %A) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double> %A) ret <2 x i64> %tmp3 } -declare <2 x i32> @llvm.arm64.neon.fcvtau.v2i32.v2f32(<2 x float>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.fcvtau.v4i32.v4f32(<4 x float>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.fcvtau.v2i64.v2f64(<2 x double>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double>) nounwind readnone define <2 x i32> @fcvtms_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtms_2s: ;CHECK-NOT: ld1 ;CHECK: fcvtms.2s v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtms.v2i32.v2f32(<2 x float> %A) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float> %A) ret <2 x i32> %tmp3 } @@ -76,7 +76,7 @@ define <4 x i32> @fcvtms_4s(<4 x float> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: fcvtms.4s v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtms.v4i32.v4f32(<4 x float> %A) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float> %A) ret <4 x i32> %tmp3 } @@ -85,20 +85,20 @@ define <2 x i64> @fcvtms_2d(<2 x double> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: fcvtms.2d v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtms.v2i64.v2f64(<2 x double> %A) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double> %A) ret <2 x i64> %tmp3 } -declare <2 x i32> @llvm.arm64.neon.fcvtms.v2i32.v2f32(<2 x float>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.fcvtms.v4i32.v4f32(<4 x float>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.fcvtms.v2i64.v2f64(<2 x double>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double>) nounwind readnone define <2 x i32> @fcvtmu_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtmu_2s: ;CHECK-NOT: ld1 ;CHECK: fcvtmu.2s v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtmu.v2i32.v2f32(<2 x float> %A) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float> %A) ret <2 x i32> %tmp3 } @@ -107,7 +107,7 @@ define <4 x i32> @fcvtmu_4s(<4 x float> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: fcvtmu.4s v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtmu.v4i32.v4f32(<4 x float> %A) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float> %A) ret <4 x i32> %tmp3 } @@ -116,20 +116,20 @@ define <2 x i64> @fcvtmu_2d(<2 x double> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: fcvtmu.2d v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtmu.v2i64.v2f64(<2 x double> %A) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double> %A) ret <2 x i64> %tmp3 } -declare <2 x i32> @llvm.arm64.neon.fcvtmu.v2i32.v2f32(<2 x float>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.fcvtmu.v4i32.v4f32(<4 x float>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.fcvtmu.v2i64.v2f64(<2 x double>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double>) nounwind readnone define <2 x i32> @fcvtps_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtps_2s: ;CHECK-NOT: ld1 ;CHECK: fcvtps.2s v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtps.v2i32.v2f32(<2 x float> %A) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float> %A) ret <2 x i32> %tmp3 } @@ -138,7 +138,7 @@ define <4 x i32> @fcvtps_4s(<4 x float> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: fcvtps.4s v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtps.v4i32.v4f32(<4 x float> %A) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float> %A) ret <4 x i32> %tmp3 } @@ -147,20 +147,20 @@ define <2 x i64> @fcvtps_2d(<2 x double> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: fcvtps.2d v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtps.v2i64.v2f64(<2 x double> %A) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double> %A) ret <2 x i64> %tmp3 } -declare <2 x i32> @llvm.arm64.neon.fcvtps.v2i32.v2f32(<2 x float>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.fcvtps.v4i32.v4f32(<4 x float>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.fcvtps.v2i64.v2f64(<2 x double>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double>) nounwind readnone define <2 x i32> @fcvtpu_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtpu_2s: ;CHECK-NOT: ld1 ;CHECK: fcvtpu.2s v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtpu.v2i32.v2f32(<2 x float> %A) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float> %A) ret <2 x i32> %tmp3 } @@ -169,7 +169,7 @@ define <4 x i32> @fcvtpu_4s(<4 x float> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: fcvtpu.4s v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtpu.v4i32.v4f32(<4 x float> %A) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float> %A) ret <4 x i32> %tmp3 } @@ -178,20 +178,20 @@ define <2 x i64> @fcvtpu_2d(<2 x double> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: fcvtpu.2d v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtpu.v2i64.v2f64(<2 x double> %A) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double> %A) ret <2 x i64> %tmp3 } -declare <2 x i32> @llvm.arm64.neon.fcvtpu.v2i32.v2f32(<2 x float>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.fcvtpu.v4i32.v4f32(<4 x float>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.fcvtpu.v2i64.v2f64(<2 x double>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double>) nounwind readnone define <2 x i32> @fcvtns_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtns_2s: ;CHECK-NOT: ld1 ;CHECK: fcvtns.2s v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtns.v2i32.v2f32(<2 x float> %A) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float> %A) ret <2 x i32> %tmp3 } @@ -200,7 +200,7 @@ define <4 x i32> @fcvtns_4s(<4 x float> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: fcvtns.4s v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtns.v4i32.v4f32(<4 x float> %A) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float> %A) ret <4 x i32> %tmp3 } @@ -209,20 +209,20 @@ define <2 x i64> @fcvtns_2d(<2 x double> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: fcvtns.2d v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtns.v2i64.v2f64(<2 x double> %A) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double> %A) ret <2 x i64> %tmp3 } -declare <2 x i32> @llvm.arm64.neon.fcvtns.v2i32.v2f32(<2 x float>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.fcvtns.v4i32.v4f32(<4 x float>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.fcvtns.v2i64.v2f64(<2 x double>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double>) nounwind readnone define <2 x i32> @fcvtnu_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtnu_2s: ;CHECK-NOT: ld1 ;CHECK: fcvtnu.2s v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtnu.v2i32.v2f32(<2 x float> %A) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float> %A) ret <2 x i32> %tmp3 } @@ -231,7 +231,7 @@ define <4 x i32> @fcvtnu_4s(<4 x float> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: fcvtnu.4s v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtnu.v4i32.v4f32(<4 x float> %A) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float> %A) ret <4 x i32> %tmp3 } @@ -240,13 +240,13 @@ define <2 x i64> @fcvtnu_2d(<2 x double> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: fcvtnu.2d v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtnu.v2i64.v2f64(<2 x double> %A) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double> %A) ret <2 x i64> %tmp3 } -declare <2 x i32> @llvm.arm64.neon.fcvtnu.v2i32.v2f32(<2 x float>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.fcvtnu.v4i32.v4f32(<4 x float>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.fcvtnu.v2i64.v2f64(<2 x double>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double>) nounwind readnone define <2 x i32> @fcvtzs_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtzs_2s: @@ -401,7 +401,7 @@ define <2 x float> @frintn_2s(<2 x float> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: frintn.2s v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <2 x float> @llvm.arm64.neon.frintn.v2f32(<2 x float> %A) + %tmp3 = call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %A) ret <2 x float> %tmp3 } @@ -410,7 +410,7 @@ define <4 x float> @frintn_4s(<4 x float> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: frintn.4s v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <4 x float> @llvm.arm64.neon.frintn.v4f32(<4 x float> %A) + %tmp3 = call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %A) ret <4 x float> %tmp3 } @@ -419,13 +419,13 @@ define <2 x double> @frintn_2d(<2 x double> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: frintn.2d v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <2 x double> @llvm.arm64.neon.frintn.v2f64(<2 x double> %A) + %tmp3 = call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %A) ret <2 x double> %tmp3 } -declare <2 x float> @llvm.arm64.neon.frintn.v2f32(<2 x float>) nounwind readnone -declare <4 x float> @llvm.arm64.neon.frintn.v4f32(<4 x float>) nounwind readnone -declare <2 x double> @llvm.arm64.neon.frintn.v2f64(<2 x double>) nounwind readnone +declare <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float>) nounwind readnone +declare <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float>) nounwind readnone +declare <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double>) nounwind readnone define <2 x float> @frintp_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: frintp_2s: @@ -525,7 +525,7 @@ define <2 x float> @fcvtxn_2s(<2 x double> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: fcvtxn v0.2s, v0.2d ;CHECK-NEXT: ret - %tmp3 = call <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double> %A) + %tmp3 = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %A) ret <2 x float> %tmp3 } @@ -534,19 +534,19 @@ define <4 x float> @fcvtxn_4s(<2 x float> %ret, <2 x double> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: fcvtxn2 v0.4s, v1.2d ;CHECK-NEXT: ret - %tmp3 = call <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double> %A) + %tmp3 = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %A) %res = shufflevector <2 x float> %ret, <2 x float> %tmp3, <4 x i32> ret <4 x float> %res } -declare <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double>) nounwind readnone +declare <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double>) nounwind readnone define <2 x i32> @fcvtzsc_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtzsc_2s: ;CHECK-NOT: ld1 ;CHECK: fcvtzs.2s v0, v0, #1 ;CHECK-NEXT: ret - %tmp3 = call <2 x i32> @llvm.arm64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %A, i32 1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %A, i32 1) ret <2 x i32> %tmp3 } @@ -555,7 +555,7 @@ define <4 x i32> @fcvtzsc_4s(<4 x float> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: fcvtzs.4s v0, v0, #1 ;CHECK-NEXT: ret - %tmp3 = call <4 x i32> @llvm.arm64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %A, i32 1) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %A, i32 1) ret <4 x i32> %tmp3 } @@ -564,20 +564,20 @@ define <2 x i64> @fcvtzsc_2d(<2 x double> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: fcvtzs.2d v0, v0, #1 ;CHECK-NEXT: ret - %tmp3 = call <2 x i64> @llvm.arm64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> %A, i32 1) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> %A, i32 1) ret <2 x i64> %tmp3 } -declare <2 x i32> @llvm.arm64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32) nounwind readnone define <2 x i32> @fcvtzuc_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtzuc_2s: ;CHECK-NOT: ld1 ;CHECK: fcvtzu.2s v0, v0, #1 ;CHECK-NEXT: ret - %tmp3 = call <2 x i32> @llvm.arm64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %A, i32 1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %A, i32 1) ret <2 x i32> %tmp3 } @@ -586,7 +586,7 @@ define <4 x i32> @fcvtzuc_4s(<4 x float> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: fcvtzu.4s v0, v0, #1 ;CHECK-NEXT: ret - %tmp3 = call <4 x i32> @llvm.arm64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %A, i32 1) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %A, i32 1) ret <4 x i32> %tmp3 } @@ -595,20 +595,20 @@ define <2 x i64> @fcvtzuc_2d(<2 x double> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: fcvtzu.2d v0, v0, #1 ;CHECK-NEXT: ret - %tmp3 = call <2 x i64> @llvm.arm64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> %A, i32 1) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> %A, i32 1) ret <2 x i64> %tmp3 } -declare <2 x i32> @llvm.arm64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32) nounwind readnone define <2 x float> @scvtf_2sc(<2 x i32> %A) nounwind { ;CHECK-LABEL: scvtf_2sc: ;CHECK-NOT: ld1 ;CHECK: scvtf.2s v0, v0, #1 ;CHECK-NEXT: ret - %tmp3 = call <2 x float> @llvm.arm64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %A, i32 1) + %tmp3 = call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %A, i32 1) ret <2 x float> %tmp3 } @@ -617,7 +617,7 @@ define <4 x float> @scvtf_4sc(<4 x i32> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: scvtf.4s v0, v0, #1 ;CHECK-NEXT: ret - %tmp3 = call <4 x float> @llvm.arm64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %A, i32 1) + %tmp3 = call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %A, i32 1) ret <4 x float> %tmp3 } @@ -626,20 +626,20 @@ define <2 x double> @scvtf_2dc(<2 x i64> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: scvtf.2d v0, v0, #1 ;CHECK-NEXT: ret - %tmp3 = call <2 x double> @llvm.arm64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> %A, i32 1) + %tmp3 = call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> %A, i32 1) ret <2 x double> %tmp3 } -declare <2 x float> @llvm.arm64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone -declare <4 x float> @llvm.arm64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone -declare <2 x double> @llvm.arm64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32) nounwind readnone +declare <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone +declare <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone +declare <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32) nounwind readnone define <2 x float> @ucvtf_2sc(<2 x i32> %A) nounwind { ;CHECK-LABEL: ucvtf_2sc: ;CHECK-NOT: ld1 ;CHECK: ucvtf.2s v0, v0, #1 ;CHECK-NEXT: ret - %tmp3 = call <2 x float> @llvm.arm64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %A, i32 1) + %tmp3 = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %A, i32 1) ret <2 x float> %tmp3 } @@ -648,7 +648,7 @@ define <4 x float> @ucvtf_4sc(<4 x i32> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: ucvtf.4s v0, v0, #1 ;CHECK-NEXT: ret - %tmp3 = call <4 x float> @llvm.arm64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %A, i32 1) + %tmp3 = call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %A, i32 1) ret <4 x float> %tmp3 } @@ -657,7 +657,7 @@ define <2 x double> @ucvtf_2dc(<2 x i64> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: ucvtf.2d v0, v0, #1 ;CHECK-NEXT: ret - %tmp3 = call <2 x double> @llvm.arm64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> %A, i32 1) + %tmp3 = call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> %A, i32 1) ret <2 x double> %tmp3 } @@ -681,6 +681,6 @@ define void @autogen_SD19225() { ret void } -declare <2 x float> @llvm.arm64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone -declare <4 x float> @llvm.arm64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone -declare <2 x double> @llvm.arm64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32) nounwind readnone +declare <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone +declare <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone +declare <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32) nounwind readnone diff --git a/test/CodeGen/ARM64/vcvt_f.ll b/test/CodeGen/AArch64/arm64-vcvt_f.ll similarity index 73% rename from test/CodeGen/ARM64/vcvt_f.ll rename to test/CodeGen/AArch64/arm64-vcvt_f.ll index d67aa3b9d471..d24495844b45 100644 --- a/test/CodeGen/ARM64/vcvt_f.ll +++ b/test/CodeGen/AArch64/arm64-vcvt_f.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s -; RUN: llc < %s -O0 -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -O0 -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define <2 x double> @test_vcvt_f64_f32(<2 x float> %x) nounwind readnone ssp { ; CHECK-LABEL: test_vcvt_f64_f32: @@ -38,7 +38,7 @@ define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %x, <2 x double> %v) noun define <2 x float> @test_vcvtx_f32_f64(<2 x double> %v) nounwind readnone ssp { ; CHECK-LABEL: test_vcvtx_f32_f64: - %vcvtx1.i = tail call <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind + %vcvtx1.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind ; CHECK: fcvtxn ret <2 x float> %vcvtx1.i ; CHECK: ret @@ -46,7 +46,7 @@ define <2 x float> @test_vcvtx_f32_f64(<2 x double> %v) nounwind readnone ssp { define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp { ; CHECK-LABEL: test_vcvtx_high_f32_f64: - %vcvtx2.i = tail call <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind + %vcvtx2.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind %res = shufflevector <2 x float> %x, <2 x float> %vcvtx2.i, <4 x i32> ; CHECK: fcvtxn2 ret <4 x float> %res @@ -54,13 +54,13 @@ define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %x, <2 x double> %v) nou } -declare <2 x double> @llvm.arm64.neon.vcvthighfp2df(<4 x float>) nounwind readnone -declare <2 x double> @llvm.arm64.neon.vcvtfp2df(<2 x float>) nounwind readnone +declare <2 x double> @llvm.aarch64.neon.vcvthighfp2df(<4 x float>) nounwind readnone +declare <2 x double> @llvm.aarch64.neon.vcvtfp2df(<2 x float>) nounwind readnone -declare <2 x float> @llvm.arm64.neon.vcvtdf2fp(<2 x double>) nounwind readnone -declare <4 x float> @llvm.arm64.neon.vcvthighdf2fp(<2 x float>, <2 x double>) nounwind readnone +declare <2 x float> @llvm.aarch64.neon.vcvtdf2fp(<2 x double>) nounwind readnone +declare <4 x float> @llvm.aarch64.neon.vcvthighdf2fp(<2 x float>, <2 x double>) nounwind readnone -declare <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double>) nounwind readnone +declare <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double>) nounwind readnone define i16 @to_half(float %in) { ; CHECK-LABEL: to_half: diff --git a/test/CodeGen/ARM64/vcvt_f32_su32.ll b/test/CodeGen/AArch64/arm64-vcvt_f32_su32.ll similarity index 75% rename from test/CodeGen/ARM64/vcvt_f32_su32.ll rename to test/CodeGen/AArch64/arm64-vcvt_f32_su32.ll index 51e053d97459..1eb7b43d5755 100644 --- a/test/CodeGen/ARM64/vcvt_f32_su32.ll +++ b/test/CodeGen/AArch64/arm64-vcvt_f32_su32.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define <2 x float> @ucvt(<2 x i32> %a) nounwind readnone ssp { ; CHECK-LABEL: ucvt: @@ -37,7 +37,7 @@ define <4 x float> @cvtf16(<4 x i16> %a) nounwind readnone ssp { ; CHECK-LABEL: cvtf16: ; CHECK: fcvtl v0.4s, v0.4h ; CHECK-NEXT: ret - %vcvt1.i = tail call <4 x float> @llvm.arm64.neon.vcvthf2fp(<4 x i16> %a) nounwind + %vcvt1.i = tail call <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16> %a) nounwind ret <4 x float> %vcvt1.i } @@ -46,7 +46,7 @@ define <4 x float> @cvtf16_high(<8 x i16> %a) nounwind readnone ssp { ; CHECK: fcvtl2 v0.4s, v0.8h ; CHECK-NEXT: ret %in = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %vcvt1.i = tail call <4 x float> @llvm.arm64.neon.vcvthf2fp(<4 x i16> %in) nounwind + %vcvt1.i = tail call <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16> %in) nounwind ret <4 x float> %vcvt1.i } @@ -56,7 +56,7 @@ define <4 x i16> @cvtf16f32(<4 x float> %a) nounwind readnone ssp { ; CHECK-LABEL: cvtf16f32: ; CHECK: fcvtn v0.4h, v0.4s ; CHECK-NEXT: ret - %vcvt1.i = tail call <4 x i16> @llvm.arm64.neon.vcvtfp2hf(<4 x float> %a) nounwind + %vcvt1.i = tail call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> %a) nounwind ret <4 x i16> %vcvt1.i } @@ -64,10 +64,10 @@ define <8 x i16> @cvtf16f32_high(<4 x i16> %low, <4 x float> %high_big) { ; CHECK-LABEL: cvtf16f32_high: ; CHECK: fcvtn2 v0.8h, v1.4s ; CHECK-NEXT: ret - %high = call <4 x i16> @llvm.arm64.neon.vcvtfp2hf(<4 x float> %high_big) + %high = call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> %high_big) %res = shufflevector <4 x i16> %low, <4 x i16> %high, <8 x i32> ret <8 x i16> %res } -declare <4 x float> @llvm.arm64.neon.vcvthf2fp(<4 x i16>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.vcvtfp2hf(<4 x float>) nounwind readnone +declare <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float>) nounwind readnone diff --git a/test/CodeGen/AArch64/arm64-vcvt_n.ll b/test/CodeGen/AArch64/arm64-vcvt_n.ll new file mode 100644 index 000000000000..7ed5be6e8af9 --- /dev/null +++ b/test/CodeGen/AArch64/arm64-vcvt_n.ll @@ -0,0 +1,49 @@ +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s + +define <2 x float> @cvtf32fxpu(<2 x i32> %a) nounwind readnone ssp { +; CHECK-LABEL: cvtf32fxpu: +; CHECK: ucvtf.2s v0, v0, #9 +; CHECK: ret + %vcvt_n1 = tail call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %a, i32 9) + ret <2 x float> %vcvt_n1 +} + +define <2 x float> @cvtf32fxps(<2 x i32> %a) nounwind readnone ssp { +; CHECK-LABEL: cvtf32fxps: +; CHECK: scvtf.2s v0, v0, #12 +; CHECK: ret + %vcvt_n1 = tail call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %a, i32 12) + ret <2 x float> %vcvt_n1 +} + +define <4 x float> @cvtqf32fxpu(<4 x i32> %a) nounwind readnone ssp { +; CHECK-LABEL: cvtqf32fxpu: +; CHECK: ucvtf.4s v0, v0, #18 +; CHECK: ret + %vcvt_n1 = tail call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %a, i32 18) + ret <4 x float> %vcvt_n1 +} + +define <4 x float> @cvtqf32fxps(<4 x i32> %a) nounwind readnone ssp { +; CHECK-LABEL: cvtqf32fxps: +; CHECK: scvtf.4s v0, v0, #30 +; CHECK: ret + %vcvt_n1 = tail call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %a, i32 30) + ret <4 x float> %vcvt_n1 +} +define <2 x double> @f1(<2 x i64> %a) nounwind readnone ssp { + %vcvt_n1 = tail call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> %a, i32 12) + ret <2 x double> %vcvt_n1 +} + +define <2 x double> @f2(<2 x i64> %a) nounwind readnone ssp { + %vcvt_n1 = tail call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> %a, i32 9) + ret <2 x double> %vcvt_n1 +} + +declare <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone +declare <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone +declare <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone +declare <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone +declare <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32) nounwind readnone +declare <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32) nounwind readnone diff --git a/test/CodeGen/ARM64/vcvt_su32_f32.ll b/test/CodeGen/AArch64/arm64-vcvt_su32_f32.ll similarity index 91% rename from test/CodeGen/ARM64/vcvt_su32_f32.ll rename to test/CodeGen/AArch64/arm64-vcvt_su32_f32.ll index 8c82fa095c81..985a5f762439 100644 --- a/test/CodeGen/ARM64/vcvt_su32_f32.ll +++ b/test/CodeGen/AArch64/arm64-vcvt_su32_f32.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define <2 x i32> @c1(<2 x float> %a) nounwind readnone ssp { ; CHECK: c1 diff --git a/test/CodeGen/ARM64/vcvtxd_f32_f64.ll b/test/CodeGen/AArch64/arm64-vcvtxd_f32_f64.ll similarity index 54% rename from test/CodeGen/ARM64/vcvtxd_f32_f64.ll rename to test/CodeGen/AArch64/arm64-vcvtxd_f32_f64.ll index bbe8f0b38641..b29c22cbfda5 100644 --- a/test/CodeGen/ARM64/vcvtxd_f32_f64.ll +++ b/test/CodeGen/AArch64/arm64-vcvtxd_f32_f64.ll @@ -4,8 +4,8 @@ define float @fcvtxn(double %a) { ; CHECK-LABEL: fcvtxn: ; CHECK: fcvtxn s0, d0 ; CHECK-NEXT: ret - %vcvtxd.i = tail call float @llvm.arm64.sisd.fcvtxn(double %a) nounwind + %vcvtxd.i = tail call float @llvm.aarch64.sisd.fcvtxn(double %a) nounwind ret float %vcvtxd.i } -declare float @llvm.arm64.sisd.fcvtxn(double) nounwind readnone +declare float @llvm.aarch64.sisd.fcvtxn(double) nounwind readnone diff --git a/test/CodeGen/ARM64/vecCmpBr.ll b/test/CodeGen/AArch64/arm64-vecCmpBr.ll similarity index 87% rename from test/CodeGen/ARM64/vecCmpBr.ll rename to test/CodeGen/AArch64/arm64-vecCmpBr.ll index 2af8775cea6e..c7321e4b7d07 100644 --- a/test/CodeGen/ARM64/vecCmpBr.ll +++ b/test/CodeGen/AArch64/arm64-vecCmpBr.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s -mcpu=cyclone | FileCheck %s +; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s -mcpu=cyclone | FileCheck %s ; ModuleID = 'arm64_vecCmpBr.c' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128" target triple = "arm64-apple-ios3.0.0" @@ -13,7 +13,7 @@ define i32 @anyZero64(<4 x i16> %a) #0 { ; CHECK-NEXT: b _bar entry: %0 = bitcast <4 x i16> %a to <8 x i8> - %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8> %0) #3 + %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %0) #3 %1 = trunc i32 %vminv.i to i8 %tobool = icmp eq i8 %1, 0 br i1 %tobool, label %if.then, label %return @@ -39,7 +39,7 @@ define i32 @anyZero128(<8 x i16> %a) #0 { entry: %0 = bitcast <8 x i16> %a to <16 x i8> - %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8> %0) #3 + %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %0) #3 %1 = trunc i32 %vminv.i to i8 %tobool = icmp eq i8 %1, 0 br i1 %tobool, label %if.then, label %return @@ -63,7 +63,7 @@ define i32 @anyNonZero64(<4 x i16> %a) #0 { entry: %0 = bitcast <4 x i16> %a to <8 x i8> - %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8> %0) #3 + %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %0) #3 %1 = trunc i32 %vmaxv.i to i8 %tobool = icmp eq i8 %1, 0 br i1 %tobool, label %return, label %if.then @@ -86,7 +86,7 @@ define i32 @anyNonZero128(<8 x i16> %a) #0 { ; CHECK-NEXT: movz w0, #0 entry: %0 = bitcast <8 x i16> %a to <16 x i8> - %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8> %0) #3 + %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %0) #3 %1 = trunc i32 %vmaxv.i to i8 %tobool = icmp eq i8 %1, 0 br i1 %tobool, label %return, label %if.then @@ -109,7 +109,7 @@ define i32 @allZero64(<4 x i16> %a) #0 { ; CHECK-NEXT: b _bar entry: %0 = bitcast <4 x i16> %a to <8 x i8> - %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8> %0) #3 + %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %0) #3 %1 = trunc i32 %vmaxv.i to i8 %tobool = icmp eq i8 %1, 0 br i1 %tobool, label %if.then, label %return @@ -132,7 +132,7 @@ define i32 @allZero128(<8 x i16> %a) #0 { ; CHECK-NEXT: b _bar entry: %0 = bitcast <8 x i16> %a to <16 x i8> - %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8> %0) #3 + %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %0) #3 %1 = trunc i32 %vmaxv.i to i8 %tobool = icmp eq i8 %1, 0 br i1 %tobool, label %if.then, label %return @@ -155,7 +155,7 @@ define i32 @allNonZero64(<4 x i16> %a) #0 { ; CHECK-NEXT: movz w0, #0 entry: %0 = bitcast <4 x i16> %a to <8 x i8> - %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8> %0) #3 + %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %0) #3 %1 = trunc i32 %vminv.i to i8 %tobool = icmp eq i8 %1, 0 br i1 %tobool, label %return, label %if.then @@ -178,7 +178,7 @@ define i32 @allNonZero128(<8 x i16> %a) #0 { ; CHECK-NEXT: movz w0, #0 entry: %0 = bitcast <8 x i16> %a to <16 x i8> - %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8> %0) #3 + %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %0) #3 %1 = trunc i32 %vminv.i to i8 %tobool = icmp eq i8 %1, 0 br i1 %tobool, label %return, label %if.then @@ -192,13 +192,13 @@ return: ; preds = %entry, %if.then ret i32 %retval.0 } -declare i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8>) #2 +declare i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8>) #2 -declare i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8>) #2 +declare i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8>) #2 -declare i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8>) #2 +declare i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8>) #2 -declare i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8>) #2 +declare i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8>) #2 attributes #0 = { nounwind ssp "target-cpu"="cyclone" } attributes #1 = { "target-cpu"="cyclone" } diff --git a/test/CodeGen/ARM64/vecFold.ll b/test/CodeGen/AArch64/arm64-vecFold.ll similarity index 74% rename from test/CodeGen/ARM64/vecFold.ll rename to test/CodeGen/AArch64/arm64-vecFold.ll index 6888932f2ce1..aeacfccab3c4 100644 --- a/test/CodeGen/ARM64/vecFold.ll +++ b/test/CodeGen/AArch64/arm64-vecFold.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=arm64 -arm64-neon-syntax=apple -o - %s| FileCheck %s +; RUN: llc -march=arm64 -aarch64-neon-syntax=apple -o - %s| FileCheck %s define <16 x i8> @foov16i8(<8 x i16> %a0, <8 x i16> %b0) nounwind readnone ssp { ; CHECK-LABEL: foov16i8: @@ -50,8 +50,8 @@ define <4 x i32> @foov4i32(<2 x i64> %a0, <2 x i64> %b0) nounwind readnone ssp { define <8 x i16> @bar(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %b0, <4 x i32> %b1) nounwind readnone ssp { ; CHECK-LABEL: bar: - %vaddhn2.i = tail call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32> %a0, <4 x i32> %a1) nounwind - %vaddhn2.i10 = tail call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32> %b0, <4 x i32> %b1) nounwind + %vaddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %a0, <4 x i32> %a1) nounwind + %vaddhn2.i10 = tail call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %b0, <4 x i32> %b1) nounwind ; CHECK: addhn.4h v0, v0, v1 ; CHECK-NEXT: addhn2.8h v0, v2, v3 ; CHECK-NEXT: ret @@ -64,7 +64,7 @@ define <8 x i16> @bar(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %b0, <4 x i32> %b1 define <8 x i16> @baz(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %b0, <4 x i32> %b1) nounwind readnone ssp { ; CHECK-LABEL: baz: - %vaddhn2.i = tail call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32> %a0, <4 x i32> %a1) nounwind + %vaddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %a0, <4 x i32> %a1) nounwind %vshrn_high_shift = ashr <4 x i32> %b0, %vshrn_high = trunc <4 x i32> %vshrn_high_shift to <4 x i16> ; CHECK: addhn.4h v0, v0, v1 @@ -83,8 +83,8 @@ entry: ; CHECK: raddhn.4h v0, v0, v1 ; CHECK-NEXT: raddhn2.8h v0, v2, v3 ; CHECK-NEXT: ret - %vraddhn2.i = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %a0, <4 x i32> %a1) nounwind - %vraddhn2.i10 = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %b0, <4 x i32> %b1) nounwind + %vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a0, <4 x i32> %a1) nounwind + %vraddhn2.i10 = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %b0, <4 x i32> %b1) nounwind %0 = bitcast <4 x i16> %vraddhn2.i to <1 x i64> %1 = bitcast <4 x i16> %vraddhn2.i10 to <1 x i64> %shuffle.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> @@ -97,8 +97,8 @@ define <8 x i16> @vrshrn(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %b0, <8 x i16> ; CHECK: rshrn.8b v0, v0, #5 ; CHECK-NEXT: rshrn2.16b v0, v2, #6 ; CHECK-NEXT: ret - %vrshrn_n1 = tail call <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16> %a0, i32 5) - %vrshrn_n4 = tail call <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16> %b0, i32 6) + %vrshrn_n1 = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %a0, i32 5) + %vrshrn_n4 = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %b0, i32 6) %1 = bitcast <8 x i8> %vrshrn_n1 to <1 x i64> %2 = bitcast <8 x i8> %vrshrn_n4 to <1 x i64> %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> @@ -111,8 +111,8 @@ define <8 x i16> @vrsubhn(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %b0, <8 x i16> ; CHECK: rsubhn.8b v0, v0, v1 ; CHECK: rsubhn2.16b v0, v2, v3 ; CHECK-NEXT: ret - %vrsubhn2.i = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %a0, <8 x i16> %a1) nounwind - %vrsubhn2.i10 = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %b0, <8 x i16> %b1) nounwind + %vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a0, <8 x i16> %a1) nounwind + %vrsubhn2.i10 = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %b0, <8 x i16> %b1) nounwind %1 = bitcast <8 x i8> %vrsubhn2.i to <1 x i64> %2 = bitcast <8 x i8> %vrsubhn2.i10 to <1 x i64> %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> @@ -122,8 +122,8 @@ define <8 x i16> @vrsubhn(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %b0, <8 x i16> define <8 x i16> @noOpt1(<2 x i32> %a0, <2 x i32> %a1, <4 x i32> %b0, <4 x i32> %b1) nounwind readnone ssp { ; CHECK-LABEL: noOpt1: - %vqsub2.i = tail call <2 x i32> @llvm.arm64.neon.sqsub.v2i32(<2 x i32> %a0, <2 x i32> %a1) nounwind - %vaddhn2.i = tail call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32> %b0, <4 x i32> %b1) nounwind + %vqsub2.i = tail call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %a0, <2 x i32> %a1) nounwind + %vaddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %b0, <4 x i32> %b1) nounwind ; CHECK: sqsub.2s v0, v0, v1 ; CHECK-NEXT: addhn2.8h v0, v2, v3 %1 = bitcast <2 x i32> %vqsub2.i to <1 x i64> @@ -133,13 +133,13 @@ define <8 x i16> @noOpt1(<2 x i32> %a0, <2 x i32> %a1, <4 x i32> %b0, <4 x i32> ret <8 x i16> %3 } -declare <2 x i32> @llvm.arm64.neon.sqsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone diff --git a/test/CodeGen/ARM64/vector-ext.ll b/test/CodeGen/AArch64/arm64-vector-ext.ll similarity index 81% rename from test/CodeGen/ARM64/vector-ext.ll rename to test/CodeGen/AArch64/arm64-vector-ext.ll index 9cc0555d8c48..650ff1e14f02 100644 --- a/test/CodeGen/ARM64/vector-ext.ll +++ b/test/CodeGen/AArch64/arm64-vector-ext.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s ;CHECK: @func30 ;CHECK: ushll.4s v0, v0, #0 diff --git a/test/CodeGen/ARM64/vector-imm.ll b/test/CodeGen/AArch64/arm64-vector-imm.ll similarity index 98% rename from test/CodeGen/ARM64/vector-imm.ll rename to test/CodeGen/AArch64/arm64-vector-imm.ll index a84f804c8cd1..9fb088b9a497 100644 --- a/test/CodeGen/ARM64/vector-imm.ll +++ b/test/CodeGen/AArch64/arm64-vector-imm.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define <8 x i8> @v_orrimm(<8 x i8>* %A) nounwind { ; CHECK-LABEL: v_orrimm: diff --git a/test/CodeGen/ARM64/vector-insertion.ll b/test/CodeGen/AArch64/arm64-vector-insertion.ll similarity index 91% rename from test/CodeGen/ARM64/vector-insertion.ll rename to test/CodeGen/AArch64/arm64-vector-insertion.ll index 0926bcfde9a3..8fbff71f9fc2 100644 --- a/test/CodeGen/ARM64/vector-insertion.ll +++ b/test/CodeGen/AArch64/arm64-vector-insertion.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=arm64 -mcpu=generic -arm64-neon-syntax=apple < %s | FileCheck %s +; RUN: llc -march=arm64 -mcpu=generic -aarch64-neon-syntax=apple < %s | FileCheck %s define void @test0f(float* nocapture %x, float %a) #0 { entry: diff --git a/test/CodeGen/ARM64/vector-ldst.ll b/test/CodeGen/AArch64/arm64-vector-ldst.ll similarity index 99% rename from test/CodeGen/ARM64/vector-ldst.ll rename to test/CodeGen/AArch64/arm64-vector-ldst.ll index 154160ee502a..c00191577d17 100644 --- a/test/CodeGen/ARM64/vector-ldst.ll +++ b/test/CodeGen/AArch64/arm64-vector-ldst.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s ; rdar://9428579 diff --git a/test/CodeGen/ARM64/vext.ll b/test/CodeGen/AArch64/arm64-vext.ll similarity index 99% rename from test/CodeGen/ARM64/vext.ll rename to test/CodeGen/AArch64/arm64-vext.ll index c82043940c8f..2240dfd5a1ae 100644 --- a/test/CodeGen/ARM64/vext.ll +++ b/test/CodeGen/AArch64/arm64-vext.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s +; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s define void @test_vext_s8() nounwind ssp { ; CHECK-LABEL: test_vext_s8: diff --git a/test/CodeGen/ARM64/vext_reverse.ll b/test/CodeGen/AArch64/arm64-vext_reverse.ll similarity index 100% rename from test/CodeGen/ARM64/vext_reverse.ll rename to test/CodeGen/AArch64/arm64-vext_reverse.ll diff --git a/test/CodeGen/ARM64/vfloatintrinsics.ll b/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll similarity index 99% rename from test/CodeGen/ARM64/vfloatintrinsics.ll rename to test/CodeGen/AArch64/arm64-vfloatintrinsics.ll index a8c882bf6960..255a18216de5 100644 --- a/test/CodeGen/ARM64/vfloatintrinsics.ll +++ b/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s +; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s ;;; Float vectors diff --git a/test/CodeGen/ARM64/vhadd.ll b/test/CodeGen/AArch64/arm64-vhadd.ll similarity index 51% rename from test/CodeGen/ARM64/vhadd.ll rename to test/CodeGen/AArch64/arm64-vhadd.ll index aed76810e133..6178bf9809dd 100644 --- a/test/CodeGen/ARM64/vhadd.ll +++ b/test/CodeGen/AArch64/arm64-vhadd.ll @@ -1,11 +1,11 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define <8 x i8> @shadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: shadd8b: ;CHECK: shadd.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.shadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -14,7 +14,7 @@ define <16 x i8> @shadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: shadd.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.shadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -23,7 +23,7 @@ define <4 x i16> @shadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: shadd.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.shadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -32,7 +32,7 @@ define <8 x i16> @shadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: shadd.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.shadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -41,7 +41,7 @@ define <2 x i32> @shadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: shadd.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.shadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -50,7 +50,7 @@ define <4 x i32> @shadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: shadd.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.shadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } @@ -59,7 +59,7 @@ define <8 x i8> @uhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: uhadd.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.uhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -68,7 +68,7 @@ define <16 x i8> @uhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: uhadd.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.uhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -77,7 +77,7 @@ define <4 x i16> @uhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: uhadd.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.uhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -86,7 +86,7 @@ define <8 x i16> @uhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: uhadd.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.uhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -95,7 +95,7 @@ define <2 x i32> @uhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: uhadd.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.uhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -104,32 +104,32 @@ define <4 x i32> @uhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: uhadd.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.uhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } -declare <8 x i8> @llvm.arm64.neon.shadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.shadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.shadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.uhadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.uhadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.uhadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.shadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.shadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.shadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.uhadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.uhadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone define <8 x i8> @srhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: srhadd8b: ;CHECK: srhadd.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.srhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -138,7 +138,7 @@ define <16 x i8> @srhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: srhadd.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.srhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -147,7 +147,7 @@ define <4 x i16> @srhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: srhadd.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.srhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -156,7 +156,7 @@ define <8 x i16> @srhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: srhadd.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.srhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -165,7 +165,7 @@ define <2 x i32> @srhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: srhadd.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.srhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -174,7 +174,7 @@ define <4 x i32> @srhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: srhadd.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.srhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } @@ -183,7 +183,7 @@ define <8 x i8> @urhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: urhadd.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.urhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -192,7 +192,7 @@ define <16 x i8> @urhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: urhadd.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.urhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -201,7 +201,7 @@ define <4 x i16> @urhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: urhadd.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.urhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -210,7 +210,7 @@ define <8 x i16> @urhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: urhadd.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.urhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -219,7 +219,7 @@ define <2 x i32> @urhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: urhadd.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.urhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -228,22 +228,22 @@ define <4 x i32> @urhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: urhadd.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.urhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } -declare <8 x i8> @llvm.arm64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.srhadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.srhadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.urhadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.urhadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.urhadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.srhadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.srhadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.urhadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM64/vhsub.ll b/test/CodeGen/AArch64/arm64-vhsub.ll similarity index 50% rename from test/CodeGen/ARM64/vhsub.ll rename to test/CodeGen/AArch64/arm64-vhsub.ll index 85df4d4eb73a..13bfda3899e5 100644 --- a/test/CodeGen/ARM64/vhsub.ll +++ b/test/CodeGen/AArch64/arm64-vhsub.ll @@ -1,11 +1,11 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define <8 x i8> @shsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: shsub8b: ;CHECK: shsub.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.shsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -14,7 +14,7 @@ define <16 x i8> @shsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: shsub.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.shsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -23,7 +23,7 @@ define <4 x i16> @shsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: shsub.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.shsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -32,7 +32,7 @@ define <8 x i16> @shsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: shsub.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.shsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -41,7 +41,7 @@ define <2 x i32> @shsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: shsub.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.shsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -50,7 +50,7 @@ define <4 x i32> @shsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: shsub.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.shsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } @@ -59,7 +59,7 @@ define <8 x i8> @uhsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: uhsub.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.uhsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -68,7 +68,7 @@ define <16 x i8> @uhsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: uhsub.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.uhsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -77,7 +77,7 @@ define <4 x i16> @uhsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: uhsub.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.uhsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -86,7 +86,7 @@ define <8 x i16> @uhsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: uhsub.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.uhsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -95,7 +95,7 @@ define <2 x i32> @uhsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: uhsub.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.uhsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -104,22 +104,22 @@ define <4 x i32> @uhsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: uhsub.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.uhsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } -declare <8 x i8> @llvm.arm64.neon.shsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.shsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.shsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.uhsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.uhsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.uhsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.shsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.shsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.shsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.uhsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.uhsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.uhsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM64/virtual_base.ll b/test/CodeGen/AArch64/arm64-virtual_base.ll similarity index 100% rename from test/CodeGen/ARM64/virtual_base.ll rename to test/CodeGen/AArch64/arm64-virtual_base.ll diff --git a/test/CodeGen/ARM64/vmax.ll b/test/CodeGen/AArch64/arm64-vmax.ll similarity index 52% rename from test/CodeGen/ARM64/vmax.ll rename to test/CodeGen/AArch64/arm64-vmax.ll index b2426f35057f..3f2c134dec6e 100644 --- a/test/CodeGen/ARM64/vmax.ll +++ b/test/CodeGen/AArch64/arm64-vmax.ll @@ -1,11 +1,11 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define <8 x i8> @smax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: smax_8b: ;CHECK: smax.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.smax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -14,7 +14,7 @@ define <16 x i8> @smax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: smax.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.smax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -23,7 +23,7 @@ define <4 x i16> @smax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: smax.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.smax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -32,7 +32,7 @@ define <8 x i16> @smax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: smax.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.smax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -41,7 +41,7 @@ define <2 x i32> @smax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: smax.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.smax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -50,23 +50,23 @@ define <4 x i32> @smax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: smax.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.smax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } -declare <8 x i8> @llvm.arm64.neon.smax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.smax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.smax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.smax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.smax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone define <8 x i8> @umax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: umax_8b: ;CHECK: umax.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.umax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -75,7 +75,7 @@ define <16 x i8> @umax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: umax.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.umax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -84,7 +84,7 @@ define <4 x i16> @umax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: umax.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.umax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -93,7 +93,7 @@ define <8 x i16> @umax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: umax.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.umax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -102,7 +102,7 @@ define <2 x i32> @umax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: umax.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.umax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -111,23 +111,23 @@ define <4 x i32> @umax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: umax.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.umax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } -declare <8 x i8> @llvm.arm64.neon.umax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.umax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.umax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.umax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.umax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.umax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone define <8 x i8> @smin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: smin_8b: ;CHECK: smin.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.smin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -136,7 +136,7 @@ define <16 x i8> @smin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: smin.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.smin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -145,7 +145,7 @@ define <4 x i16> @smin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: smin.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.smin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -154,7 +154,7 @@ define <8 x i16> @smin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: smin.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.smin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -163,7 +163,7 @@ define <2 x i32> @smin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: smin.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.smin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -172,23 +172,23 @@ define <4 x i32> @smin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: smin.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.smin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } -declare <8 x i8> @llvm.arm64.neon.smin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.smin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.smin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.smin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.smin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.smin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone define <8 x i8> @umin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: umin_8b: ;CHECK: umin.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.umin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -197,7 +197,7 @@ define <16 x i8> @umin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: umin.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.umin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -206,7 +206,7 @@ define <4 x i16> @umin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: umin.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.umin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -215,7 +215,7 @@ define <8 x i16> @umin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: umin.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.umin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -224,7 +224,7 @@ define <2 x i32> @umin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: umin.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.umin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -233,25 +233,25 @@ define <4 x i32> @umin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: umin.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.umin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } -declare <8 x i8> @llvm.arm64.neon.umin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.umin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.umin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.umin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.umin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define <8 x i8> @smaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: smaxp_8b: ;CHECK: smaxp.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.smaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -260,7 +260,7 @@ define <16 x i8> @smaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: smaxp.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.smaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -269,7 +269,7 @@ define <4 x i16> @smaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: smaxp.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.smaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -278,7 +278,7 @@ define <8 x i16> @smaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: smaxp.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.smaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -287,7 +287,7 @@ define <2 x i32> @smaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: smaxp.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.smaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -296,23 +296,23 @@ define <4 x i32> @smaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: smaxp.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.smaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } -declare <8 x i8> @llvm.arm64.neon.smaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.smaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.smaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.smaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.smaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.smaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone define <8 x i8> @umaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: umaxp_8b: ;CHECK: umaxp.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.umaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -321,7 +321,7 @@ define <16 x i8> @umaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: umaxp.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.umaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -330,7 +330,7 @@ define <4 x i16> @umaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: umaxp.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.umaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -339,7 +339,7 @@ define <8 x i16> @umaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: umaxp.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.umaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -348,7 +348,7 @@ define <2 x i32> @umaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: umaxp.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.umaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -357,25 +357,25 @@ define <4 x i32> @umaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: umaxp.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.umaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } -declare <8 x i8> @llvm.arm64.neon.umaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.umaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.umaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.umaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.umaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.umaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define <8 x i8> @sminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: sminp_8b: ;CHECK: sminp.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.sminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -384,7 +384,7 @@ define <16 x i8> @sminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: sminp.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.sminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -393,7 +393,7 @@ define <4 x i16> @sminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: sminp.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.sminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -402,7 +402,7 @@ define <8 x i16> @sminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: sminp.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.sminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -411,7 +411,7 @@ define <2 x i32> @sminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: sminp.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.sminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -420,23 +420,23 @@ define <4 x i32> @sminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: sminp.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.sminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } -declare <8 x i8> @llvm.arm64.neon.sminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.sminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.sminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.sminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.sminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.sminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone define <8 x i8> @uminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: uminp_8b: ;CHECK: uminp.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.uminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -445,7 +445,7 @@ define <16 x i8> @uminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: uminp.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.uminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -454,7 +454,7 @@ define <4 x i16> @uminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: uminp.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.uminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -463,7 +463,7 @@ define <8 x i16> @uminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: uminp.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.uminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -472,7 +472,7 @@ define <2 x i32> @uminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: uminp.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.uminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -481,23 +481,23 @@ define <4 x i32> @uminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: uminp.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.uminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } -declare <8 x i8> @llvm.arm64.neon.uminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.uminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.uminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.uminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.uminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.uminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone define <2 x float> @fmax_2s(<2 x float>* %A, <2 x float>* %B) nounwind { ;CHECK-LABEL: fmax_2s: ;CHECK: fmax.2s %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = call <2 x float> @llvm.arm64.neon.fmax.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) + %tmp3 = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) ret <2 x float> %tmp3 } @@ -506,7 +506,7 @@ define <4 x float> @fmax_4s(<4 x float>* %A, <4 x float>* %B) nounwind { ;CHECK: fmax.4s %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B - %tmp3 = call <4 x float> @llvm.arm64.neon.fmax.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) + %tmp3 = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) ret <4 x float> %tmp3 } @@ -515,20 +515,20 @@ define <2 x double> @fmax_2d(<2 x double>* %A, <2 x double>* %B) nounwind { ;CHECK: fmax.2d %tmp1 = load <2 x double>* %A %tmp2 = load <2 x double>* %B - %tmp3 = call <2 x double> @llvm.arm64.neon.fmax.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) + %tmp3 = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) ret <2 x double> %tmp3 } -declare <2 x float> @llvm.arm64.neon.fmax.v2f32(<2 x float>, <2 x float>) nounwind readnone -declare <4 x float> @llvm.arm64.neon.fmax.v4f32(<4 x float>, <4 x float>) nounwind readnone -declare <2 x double> @llvm.arm64.neon.fmax.v2f64(<2 x double>, <2 x double>) nounwind readnone +declare <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float>, <4 x float>) nounwind readnone +declare <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double>, <2 x double>) nounwind readnone define <2 x float> @fmaxp_2s(<2 x float>* %A, <2 x float>* %B) nounwind { ;CHECK-LABEL: fmaxp_2s: ;CHECK: fmaxp.2s %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = call <2 x float> @llvm.arm64.neon.fmaxp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) + %tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) ret <2 x float> %tmp3 } @@ -537,7 +537,7 @@ define <4 x float> @fmaxp_4s(<4 x float>* %A, <4 x float>* %B) nounwind { ;CHECK: fmaxp.4s %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B - %tmp3 = call <4 x float> @llvm.arm64.neon.fmaxp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) + %tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) ret <4 x float> %tmp3 } @@ -546,20 +546,20 @@ define <2 x double> @fmaxp_2d(<2 x double>* %A, <2 x double>* %B) nounwind { ;CHECK: fmaxp.2d %tmp1 = load <2 x double>* %A %tmp2 = load <2 x double>* %B - %tmp3 = call <2 x double> @llvm.arm64.neon.fmaxp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) + %tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) ret <2 x double> %tmp3 } -declare <2 x float> @llvm.arm64.neon.fmaxp.v2f32(<2 x float>, <2 x float>) nounwind readnone -declare <4 x float> @llvm.arm64.neon.fmaxp.v4f32(<4 x float>, <4 x float>) nounwind readnone -declare <2 x double> @llvm.arm64.neon.fmaxp.v2f64(<2 x double>, <2 x double>) nounwind readnone +declare <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float>, <4 x float>) nounwind readnone +declare <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double>, <2 x double>) nounwind readnone define <2 x float> @fmin_2s(<2 x float>* %A, <2 x float>* %B) nounwind { ;CHECK-LABEL: fmin_2s: ;CHECK: fmin.2s %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = call <2 x float> @llvm.arm64.neon.fmin.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) + %tmp3 = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) ret <2 x float> %tmp3 } @@ -568,7 +568,7 @@ define <4 x float> @fmin_4s(<4 x float>* %A, <4 x float>* %B) nounwind { ;CHECK: fmin.4s %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B - %tmp3 = call <4 x float> @llvm.arm64.neon.fmin.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) + %tmp3 = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) ret <4 x float> %tmp3 } @@ -577,20 +577,20 @@ define <2 x double> @fmin_2d(<2 x double>* %A, <2 x double>* %B) nounwind { ;CHECK: fmin.2d %tmp1 = load <2 x double>* %A %tmp2 = load <2 x double>* %B - %tmp3 = call <2 x double> @llvm.arm64.neon.fmin.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) + %tmp3 = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) ret <2 x double> %tmp3 } -declare <2 x float> @llvm.arm64.neon.fmin.v2f32(<2 x float>, <2 x float>) nounwind readnone -declare <4 x float> @llvm.arm64.neon.fmin.v4f32(<4 x float>, <4 x float>) nounwind readnone -declare <2 x double> @llvm.arm64.neon.fmin.v2f64(<2 x double>, <2 x double>) nounwind readnone +declare <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float>, <4 x float>) nounwind readnone +declare <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double>, <2 x double>) nounwind readnone define <2 x float> @fminp_2s(<2 x float>* %A, <2 x float>* %B) nounwind { ;CHECK-LABEL: fminp_2s: ;CHECK: fminp.2s %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = call <2 x float> @llvm.arm64.neon.fminp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) + %tmp3 = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) ret <2 x float> %tmp3 } @@ -599,7 +599,7 @@ define <4 x float> @fminp_4s(<4 x float>* %A, <4 x float>* %B) nounwind { ;CHECK: fminp.4s %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B - %tmp3 = call <4 x float> @llvm.arm64.neon.fminp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) + %tmp3 = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) ret <4 x float> %tmp3 } @@ -608,20 +608,20 @@ define <2 x double> @fminp_2d(<2 x double>* %A, <2 x double>* %B) nounwind { ;CHECK: fminp.2d %tmp1 = load <2 x double>* %A %tmp2 = load <2 x double>* %B - %tmp3 = call <2 x double> @llvm.arm64.neon.fminp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) + %tmp3 = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) ret <2 x double> %tmp3 } -declare <2 x float> @llvm.arm64.neon.fminp.v2f32(<2 x float>, <2 x float>) nounwind readnone -declare <4 x float> @llvm.arm64.neon.fminp.v4f32(<4 x float>, <4 x float>) nounwind readnone -declare <2 x double> @llvm.arm64.neon.fminp.v2f64(<2 x double>, <2 x double>) nounwind readnone +declare <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float>, <4 x float>) nounwind readnone +declare <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double>, <2 x double>) nounwind readnone define <2 x float> @fminnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind { ;CHECK-LABEL: fminnmp_2s: ;CHECK: fminnmp.2s %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = call <2 x float> @llvm.arm64.neon.fminnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) + %tmp3 = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) ret <2 x float> %tmp3 } @@ -630,7 +630,7 @@ define <4 x float> @fminnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind { ;CHECK: fminnmp.4s %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B - %tmp3 = call <4 x float> @llvm.arm64.neon.fminnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) + %tmp3 = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) ret <4 x float> %tmp3 } @@ -639,20 +639,20 @@ define <2 x double> @fminnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind { ;CHECK: fminnmp.2d %tmp1 = load <2 x double>* %A %tmp2 = load <2 x double>* %B - %tmp3 = call <2 x double> @llvm.arm64.neon.fminnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) + %tmp3 = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) ret <2 x double> %tmp3 } -declare <2 x float> @llvm.arm64.neon.fminnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone -declare <4 x float> @llvm.arm64.neon.fminnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone -declare <2 x double> @llvm.arm64.neon.fminnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone +declare <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone +declare <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone define <2 x float> @fmaxnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind { ;CHECK-LABEL: fmaxnmp_2s: ;CHECK: fmaxnmp.2s %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = call <2 x float> @llvm.arm64.neon.fmaxnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) + %tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) ret <2 x float> %tmp3 } @@ -661,7 +661,7 @@ define <4 x float> @fmaxnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind { ;CHECK: fmaxnmp.4s %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B - %tmp3 = call <4 x float> @llvm.arm64.neon.fmaxnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) + %tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) ret <4 x float> %tmp3 } @@ -670,10 +670,10 @@ define <2 x double> @fmaxnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind { ;CHECK: fmaxnmp.2d %tmp1 = load <2 x double>* %A %tmp2 = load <2 x double>* %B - %tmp3 = call <2 x double> @llvm.arm64.neon.fmaxnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) + %tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) ret <2 x double> %tmp3 } -declare <2 x float> @llvm.arm64.neon.fmaxnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone -declare <4 x float> @llvm.arm64.neon.fmaxnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone -declare <2 x double> @llvm.arm64.neon.fmaxnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone +declare <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone +declare <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone diff --git a/test/CodeGen/AArch64/arm64-vminmaxnm.ll b/test/CodeGen/AArch64/arm64-vminmaxnm.ll new file mode 100644 index 000000000000..b5aca45cd479 --- /dev/null +++ b/test/CodeGen/AArch64/arm64-vminmaxnm.ll @@ -0,0 +1,68 @@ +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s + +define <2 x float> @f1(<2 x float> %a, <2 x float> %b) nounwind readnone ssp { +; CHECK: fmaxnm.2s v0, v0, v1 +; CHECK: ret + %vmaxnm2.i = tail call <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float> %a, <2 x float> %b) nounwind + ret <2 x float> %vmaxnm2.i +} + +define <4 x float> @f2(<4 x float> %a, <4 x float> %b) nounwind readnone ssp { +; CHECK: fmaxnm.4s v0, v0, v1 +; CHECK: ret + %vmaxnm2.i = tail call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> %a, <4 x float> %b) nounwind + ret <4 x float> %vmaxnm2.i +} + +define <2 x double> @f3(<2 x double> %a, <2 x double> %b) nounwind readnone ssp { +; CHECK: fmaxnm.2d v0, v0, v1 +; CHECK: ret + %vmaxnm2.i = tail call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> %a, <2 x double> %b) nounwind + ret <2 x double> %vmaxnm2.i +} + +define <2 x float> @f4(<2 x float> %a, <2 x float> %b) nounwind readnone ssp { +; CHECK: fminnm.2s v0, v0, v1 +; CHECK: ret + %vminnm2.i = tail call <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float> %a, <2 x float> %b) nounwind + ret <2 x float> %vminnm2.i +} + +define <4 x float> @f5(<4 x float> %a, <4 x float> %b) nounwind readnone ssp { +; CHECK: fminnm.4s v0, v0, v1 +; CHECK: ret + %vminnm2.i = tail call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> %a, <4 x float> %b) nounwind + ret <4 x float> %vminnm2.i +} + +define <2 x double> @f6(<2 x double> %a, <2 x double> %b) nounwind readnone ssp { +; CHECK: fminnm.2d v0, v0, v1 +; CHECK: ret + %vminnm2.i = tail call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> %a, <2 x double> %b) nounwind + ret <2 x double> %vminnm2.i +} + +declare <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double>, <2 x double>) nounwind readnone +declare <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float>, <4 x float>) nounwind readnone +declare <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double>, <2 x double>) nounwind readnone +declare <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float>, <4 x float>) nounwind readnone +declare <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float>, <2 x float>) nounwind readnone + + +define double @test_fmaxnmv(<2 x double> %in) { +; CHECK-LABEL: test_fmaxnmv: +; CHECK: fmaxnmp.2d d0, v0 + %max = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %in) + ret double %max +} + +define double @test_fminnmv(<2 x double> %in) { +; CHECK-LABEL: test_fminnmv: +; CHECK: fminnmp.2d d0, v0 + %min = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %in) + ret double %min +} + +declare double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double>) +declare double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double>) diff --git a/test/CodeGen/ARM64/vmovn.ll b/test/CodeGen/AArch64/arm64-vmovn.ll similarity index 74% rename from test/CodeGen/ARM64/vmovn.ll rename to test/CodeGen/AArch64/arm64-vmovn.ll index 675633b6cfad..67e2816a7f5f 100644 --- a/test/CodeGen/ARM64/vmovn.ll +++ b/test/CodeGen/AArch64/arm64-vmovn.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define <8 x i8> @xtn8b(<8 x i16> %A) nounwind { ;CHECK-LABEL: xtn8b: @@ -62,7 +62,7 @@ define <8 x i8> @sqxtn8b(<8 x i16> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: sqxtn.8b v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <8 x i8> @llvm.arm64.neon.sqxtn.v8i8(<8 x i16> %A) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> %A) ret <8 x i8> %tmp3 } @@ -71,7 +71,7 @@ define <4 x i16> @sqxtn4h(<4 x i32> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: sqxtn.4h v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <4 x i16> @llvm.arm64.neon.sqxtn.v4i16(<4 x i32> %A) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %A) ret <4 x i16> %tmp3 } @@ -80,7 +80,7 @@ define <2 x i32> @sqxtn2s(<2 x i64> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: sqxtn.2s v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <2 x i32> @llvm.arm64.neon.sqxtn.v2i32(<2 x i64> %A) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> %A) ret <2 x i32> %tmp3 } @@ -89,7 +89,7 @@ define <16 x i8> @sqxtn2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: sqxtn2.16b v0, v1 ;CHECK-NEXT: ret - %tmp3 = call <8 x i8> @llvm.arm64.neon.sqxtn.v8i8(<8 x i16> %A) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> %A) %res = shufflevector <8 x i8> %ret, <8 x i8> %tmp3, <16 x i32> ret <16 x i8> %res } @@ -99,7 +99,7 @@ define <8 x i16> @sqxtn2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: sqxtn2.8h v0, v1 ;CHECK-NEXT: ret - %tmp3 = call <4 x i16> @llvm.arm64.neon.sqxtn.v4i16(<4 x i32> %A) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %A) %res = shufflevector <4 x i16> %ret, <4 x i16> %tmp3, <8 x i32> ret <8 x i16> %res } @@ -109,21 +109,21 @@ define <4 x i32> @sqxtn2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: sqxtn2.4s v0, v1 ;CHECK-NEXT: ret - %tmp3 = call <2 x i32> @llvm.arm64.neon.sqxtn.v2i32(<2 x i64> %A) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> %A) %res = shufflevector <2 x i32> %ret, <2 x i32> %tmp3, <4 x i32> ret <4 x i32> %res } -declare <8 x i8> @llvm.arm64.neon.sqxtn.v8i8(<8 x i16>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.sqxtn.v4i16(<4 x i32>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.sqxtn.v2i32(<2 x i64>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64>) nounwind readnone define <8 x i8> @uqxtn8b(<8 x i16> %A) nounwind { ;CHECK-LABEL: uqxtn8b: ;CHECK-NOT: ld1 ;CHECK: uqxtn.8b v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <8 x i8> @llvm.arm64.neon.uqxtn.v8i8(<8 x i16> %A) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> %A) ret <8 x i8> %tmp3 } @@ -132,7 +132,7 @@ define <4 x i16> @uqxtn4h(<4 x i32> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: uqxtn.4h v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <4 x i16> @llvm.arm64.neon.uqxtn.v4i16(<4 x i32> %A) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> %A) ret <4 x i16> %tmp3 } @@ -141,7 +141,7 @@ define <2 x i32> @uqxtn2s(<2 x i64> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: uqxtn.2s v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <2 x i32> @llvm.arm64.neon.uqxtn.v2i32(<2 x i64> %A) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> %A) ret <2 x i32> %tmp3 } @@ -150,7 +150,7 @@ define <16 x i8> @uqxtn2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: uqxtn2.16b v0, v1 ;CHECK-NEXT: ret - %tmp3 = call <8 x i8> @llvm.arm64.neon.uqxtn.v8i8(<8 x i16> %A) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> %A) %res = shufflevector <8 x i8> %ret, <8 x i8> %tmp3, <16 x i32> ret <16 x i8> %res } @@ -160,7 +160,7 @@ define <8 x i16> @uqxtn2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: uqxtn2.8h v0, v1 ;CHECK-NEXT: ret - %tmp3 = call <4 x i16> @llvm.arm64.neon.uqxtn.v4i16(<4 x i32> %A) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> %A) %res = shufflevector <4 x i16> %ret, <4 x i16> %tmp3, <8 x i32> ret <8 x i16> %res } @@ -170,21 +170,21 @@ define <4 x i32> @uqxtn2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: uqxtn2.4s v0, v1 ;CHECK-NEXT: ret - %tmp3 = call <2 x i32> @llvm.arm64.neon.uqxtn.v2i32(<2 x i64> %A) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> %A) %res = shufflevector <2 x i32> %ret, <2 x i32> %tmp3, <4 x i32> ret <4 x i32> %res } -declare <8 x i8> @llvm.arm64.neon.uqxtn.v8i8(<8 x i16>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.uqxtn.v4i16(<4 x i32>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.uqxtn.v2i32(<2 x i64>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64>) nounwind readnone define <8 x i8> @sqxtun8b(<8 x i16> %A) nounwind { ;CHECK-LABEL: sqxtun8b: ;CHECK-NOT: ld1 ;CHECK: sqxtun.8b v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <8 x i8> @llvm.arm64.neon.sqxtun.v8i8(<8 x i16> %A) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> %A) ret <8 x i8> %tmp3 } @@ -193,7 +193,7 @@ define <4 x i16> @sqxtun4h(<4 x i32> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: sqxtun.4h v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <4 x i16> @llvm.arm64.neon.sqxtun.v4i16(<4 x i32> %A) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> %A) ret <4 x i16> %tmp3 } @@ -202,7 +202,7 @@ define <2 x i32> @sqxtun2s(<2 x i64> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: sqxtun.2s v0, v0 ;CHECK-NEXT: ret - %tmp3 = call <2 x i32> @llvm.arm64.neon.sqxtun.v2i32(<2 x i64> %A) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> %A) ret <2 x i32> %tmp3 } @@ -211,7 +211,7 @@ define <16 x i8> @sqxtun2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: sqxtun2.16b v0, v1 ;CHECK-NEXT: ret - %tmp3 = call <8 x i8> @llvm.arm64.neon.sqxtun.v8i8(<8 x i16> %A) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> %A) %res = shufflevector <8 x i8> %ret, <8 x i8> %tmp3, <16 x i32> ret <16 x i8> %res } @@ -221,7 +221,7 @@ define <8 x i16> @sqxtun2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: sqxtun2.8h v0, v1 ;CHECK-NEXT: ret - %tmp3 = call <4 x i16> @llvm.arm64.neon.sqxtun.v4i16(<4 x i32> %A) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> %A) %res = shufflevector <4 x i16> %ret, <4 x i16> %tmp3, <8 x i32> ret <8 x i16> %res } @@ -231,12 +231,12 @@ define <4 x i32> @sqxtun2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind { ;CHECK-NOT: ld1 ;CHECK: sqxtun2.4s v0, v1 ;CHECK-NEXT: ret - %tmp3 = call <2 x i32> @llvm.arm64.neon.sqxtun.v2i32(<2 x i64> %A) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> %A) %res = shufflevector <2 x i32> %ret, <2 x i32> %tmp3, <4 x i32> ret <4 x i32> %res } -declare <8 x i8> @llvm.arm64.neon.sqxtun.v8i8(<8 x i16>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.sqxtun.v4i16(<4 x i32>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.sqxtun.v2i32(<2 x i64>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64>) nounwind readnone diff --git a/test/CodeGen/ARM64/vmul.ll b/test/CodeGen/AArch64/arm64-vmul.ll similarity index 80% rename from test/CodeGen/ARM64/vmul.ll rename to test/CodeGen/AArch64/arm64-vmul.ll index b6bd16ac0b4c..6fa60fe346af 100644 --- a/test/CodeGen/ARM64/vmul.ll +++ b/test/CodeGen/AArch64/arm64-vmul.ll @@ -1,4 +1,4 @@ -; RUN: llc -asm-verbose=false < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc -asm-verbose=false < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define <8 x i16> @smull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind { @@ -6,7 +6,7 @@ define <8 x i16> @smull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: smull.8h %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i16> %tmp3 } @@ -15,7 +15,7 @@ define <4 x i32> @smull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: smull.4s %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i32> %tmp3 } @@ -24,20 +24,20 @@ define <2 x i64> @smull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: smull.2d %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i64> %tmp3 } -declare <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone define <8 x i16> @umull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: umull8h: ;CHECK: umull.8h %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i16> %tmp3 } @@ -46,7 +46,7 @@ define <4 x i32> @umull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: umull.4s %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i32> %tmp3 } @@ -55,20 +55,20 @@ define <2 x i64> @umull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: umull.2d %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i64> %tmp3 } -declare <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone define <4 x i32> @sqdmull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK-LABEL: sqdmull4s: ;CHECK: sqdmull.4s %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i32> %tmp3 } @@ -77,7 +77,7 @@ define <2 x i64> @sqdmull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: sqdmull.2d %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i64> %tmp3 } @@ -88,7 +88,7 @@ define <4 x i32> @sqdmull2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind { %load2 = load <8 x i16>* %B %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> - %tmp3 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i32> %tmp3 } @@ -99,31 +99,31 @@ define <2 x i64> @sqdmull2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind { %load2 = load <4 x i32>* %B %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> - %tmp3 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i64> %tmp3 } -declare <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone define <8 x i16> @pmull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: pmull8h: ;CHECK: pmull.8h %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.pmull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i16> %tmp3 } -declare <8 x i16> @llvm.arm64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone define <4 x i16> @sqdmulh_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK-LABEL: sqdmulh_4h: ;CHECK: sqdmulh.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -132,7 +132,7 @@ define <8 x i16> @sqdmulh_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: sqdmulh.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -141,7 +141,7 @@ define <2 x i32> @sqdmulh_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: sqdmulh.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -150,7 +150,7 @@ define <4 x i32> @sqdmulh_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: sqdmulh.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } @@ -159,22 +159,22 @@ define i32 @sqdmulh_1s(i32* %A, i32* %B) nounwind { ;CHECK: sqdmulh s0, {{s[0-9]+}}, {{s[0-9]+}} %tmp1 = load i32* %A %tmp2 = load i32* %B - %tmp3 = call i32 @llvm.arm64.neon.sqdmulh.i32(i32 %tmp1, i32 %tmp2) + %tmp3 = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %tmp1, i32 %tmp2) ret i32 %tmp3 } -declare <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare i32 @llvm.arm64.neon.sqdmulh.i32(i32, i32) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare i32 @llvm.aarch64.neon.sqdmulh.i32(i32, i32) nounwind readnone define <4 x i16> @sqrdmulh_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK-LABEL: sqrdmulh_4h: ;CHECK: sqrdmulh.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -183,7 +183,7 @@ define <8 x i16> @sqrdmulh_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: sqrdmulh.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -192,7 +192,7 @@ define <2 x i32> @sqrdmulh_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: sqrdmulh.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -201,7 +201,7 @@ define <4 x i32> @sqrdmulh_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: sqrdmulh.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } @@ -210,22 +210,22 @@ define i32 @sqrdmulh_1s(i32* %A, i32* %B) nounwind { ;CHECK: sqrdmulh s0, {{s[0-9]+}}, {{s[0-9]+}} %tmp1 = load i32* %A %tmp2 = load i32* %B - %tmp3 = call i32 @llvm.arm64.neon.sqrdmulh.i32(i32 %tmp1, i32 %tmp2) + %tmp3 = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %tmp1, i32 %tmp2) ret i32 %tmp3 } -declare <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare i32 @llvm.arm64.neon.sqrdmulh.i32(i32, i32) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare i32 @llvm.aarch64.neon.sqrdmulh.i32(i32, i32) nounwind readnone define <2 x float> @fmulx_2s(<2 x float>* %A, <2 x float>* %B) nounwind { ;CHECK-LABEL: fmulx_2s: ;CHECK: fmulx.2s %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = call <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) + %tmp3 = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) ret <2 x float> %tmp3 } @@ -234,7 +234,7 @@ define <4 x float> @fmulx_4s(<4 x float>* %A, <4 x float>* %B) nounwind { ;CHECK: fmulx.4s %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B - %tmp3 = call <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) + %tmp3 = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) ret <4 x float> %tmp3 } @@ -243,13 +243,13 @@ define <2 x double> @fmulx_2d(<2 x double>* %A, <2 x double>* %B) nounwind { ;CHECK: fmulx.2d %tmp1 = load <2 x double>* %A %tmp2 = load <2 x double>* %B - %tmp3 = call <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) + %tmp3 = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) ret <2 x double> %tmp3 } -declare <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float>, <2 x float>) nounwind readnone -declare <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float>, <4 x float>) nounwind readnone -declare <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double>, <2 x double>) nounwind readnone +declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>) nounwind readnone +declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>) nounwind readnone define <4 x i32> @smlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind { ;CHECK-LABEL: smlal4s: @@ -257,7 +257,7 @@ define <4 x i32> @smlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i32>* %C - %tmp4 = call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) %tmp5 = add <4 x i32> %tmp3, %tmp4 ret <4 x i32> %tmp5 } @@ -268,7 +268,7 @@ define <2 x i64> @smlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i64>* %C - %tmp4 = call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) %tmp5 = add <2 x i64> %tmp3, %tmp4 ret <2 x i64> %tmp5 } @@ -279,7 +279,7 @@ define <4 x i32> @smlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i32>* %C - %tmp4 = call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) %tmp5 = sub <4 x i32> %tmp3, %tmp4 ret <4 x i32> %tmp5 } @@ -290,15 +290,15 @@ define <2 x i64> @smlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i64>* %C - %tmp4 = call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) %tmp5 = sub <2 x i64> %tmp3, %tmp4 ret <2 x i64> %tmp5 } -declare <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) -declare <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) -declare <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) -declare <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) +declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) +declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) define <4 x i32> @sqdmlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind { ;CHECK-LABEL: sqdmlal4s: @@ -306,8 +306,8 @@ define <4 x i32> @sqdmlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwin %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i32>* %C - %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - %tmp5 = call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4) + %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4) ret <4 x i32> %tmp5 } @@ -317,8 +317,8 @@ define <2 x i64> @sqdmlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwin %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i64>* %C - %tmp4 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - %tmp5 = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4) + %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4) ret <2 x i64> %tmp5 } @@ -330,8 +330,8 @@ define <4 x i32> @sqdmlal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounw %tmp3 = load <4 x i32>* %C %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> - %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - %tmp5 = call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4) + %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4) ret <4 x i32> %tmp5 } @@ -343,8 +343,8 @@ define <2 x i64> @sqdmlal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounw %tmp3 = load <2 x i64>* %C %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> - %tmp4 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - %tmp5 = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4) + %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4) ret <2 x i64> %tmp5 } @@ -354,8 +354,8 @@ define <4 x i32> @sqdmlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwin %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i32>* %C - %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - %tmp5 = call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4) + %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4) ret <4 x i32> %tmp5 } @@ -365,8 +365,8 @@ define <2 x i64> @sqdmlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwin %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i64>* %C - %tmp4 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - %tmp5 = call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4) + %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4) ret <2 x i64> %tmp5 } @@ -378,8 +378,8 @@ define <4 x i32> @sqdmlsl2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounw %tmp3 = load <4 x i32>* %C %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> - %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - %tmp5 = call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4) + %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4) ret <4 x i32> %tmp5 } @@ -391,8 +391,8 @@ define <2 x i64> @sqdmlsl2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounw %tmp3 = load <2 x i64>* %C %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> - %tmp4 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - %tmp5 = call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4) + %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4) ret <2 x i64> %tmp5 } @@ -402,7 +402,7 @@ define <4 x i32> @umlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i32>* %C - %tmp4 = call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) %tmp5 = add <4 x i32> %tmp3, %tmp4 ret <4 x i32> %tmp5 } @@ -413,7 +413,7 @@ define <2 x i64> @umlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i64>* %C - %tmp4 = call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) %tmp5 = add <2 x i64> %tmp3, %tmp4 ret <2 x i64> %tmp5 } @@ -424,7 +424,7 @@ define <4 x i32> @umlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i32>* %C - %tmp4 = call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) %tmp5 = sub <4 x i32> %tmp3, %tmp4 ret <4 x i32> %tmp5 } @@ -435,7 +435,7 @@ define <2 x i64> @umlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i64>* %C - %tmp4 = call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) %tmp5 = sub <2 x i64> %tmp3, %tmp4 ret <2 x i64> %tmp5 } @@ -717,7 +717,7 @@ define <2 x float> @fmulx_lane_2s(<2 x float>* %A, <2 x float>* %B) nounwind { %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B %tmp3 = shufflevector <2 x float> %tmp2, <2 x float> %tmp2, <2 x i32> - %tmp4 = call <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp3) + %tmp4 = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp3) ret <2 x float> %tmp4 } @@ -728,7 +728,7 @@ define <4 x float> @fmulx_lane_4s(<4 x float>* %A, <4 x float>* %B) nounwind { %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B %tmp3 = shufflevector <4 x float> %tmp2, <4 x float> %tmp2, <4 x i32> - %tmp4 = call <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp3) + %tmp4 = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp3) ret <4 x float> %tmp4 } @@ -739,7 +739,7 @@ define <2 x double> @fmulx_lane_2d(<2 x double>* %A, <2 x double>* %B) nounwind %tmp1 = load <2 x double>* %A %tmp2 = load <2 x double>* %B %tmp3 = shufflevector <2 x double> %tmp2, <2 x double> %tmp2, <2 x i32> - %tmp4 = call <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp3) + %tmp4 = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp3) ret <2 x double> %tmp4 } @@ -750,7 +750,7 @@ define <4 x i16> @sqdmulh_lane_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> - %tmp4 = call <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3) + %tmp4 = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3) ret <4 x i16> %tmp4 } @@ -761,7 +761,7 @@ define <8 x i16> @sqdmulh_lane_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> - %tmp4 = call <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3) + %tmp4 = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3) ret <8 x i16> %tmp4 } @@ -772,7 +772,7 @@ define <2 x i32> @sqdmulh_lane_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> - %tmp4 = call <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3) + %tmp4 = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3) ret <2 x i32> %tmp4 } @@ -783,7 +783,7 @@ define <4 x i32> @sqdmulh_lane_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> - %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3) + %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3) ret <4 x i32> %tmp4 } @@ -792,7 +792,7 @@ define i32 @sqdmulh_lane_1s(i32 %A, <4 x i32> %B) nounwind { ;CHECK-NOT: dup ;CHECK: sqdmulh.s s0, {{s[0-9]+}}, {{v[0-9]+}}[1] %tmp1 = extractelement <4 x i32> %B, i32 1 - %tmp2 = call i32 @llvm.arm64.neon.sqdmulh.i32(i32 %A, i32 %tmp1) + %tmp2 = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %A, i32 %tmp1) ret i32 %tmp2 } @@ -803,7 +803,7 @@ define <4 x i16> @sqrdmulh_lane_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> - %tmp4 = call <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3) + %tmp4 = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3) ret <4 x i16> %tmp4 } @@ -814,7 +814,7 @@ define <8 x i16> @sqrdmulh_lane_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> - %tmp4 = call <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3) + %tmp4 = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3) ret <8 x i16> %tmp4 } @@ -825,7 +825,7 @@ define <2 x i32> @sqrdmulh_lane_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> - %tmp4 = call <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3) + %tmp4 = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3) ret <2 x i32> %tmp4 } @@ -836,7 +836,7 @@ define <4 x i32> @sqrdmulh_lane_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> - %tmp4 = call <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3) + %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3) ret <4 x i32> %tmp4 } @@ -845,7 +845,7 @@ define i32 @sqrdmulh_lane_1s(i32 %A, <4 x i32> %B) nounwind { ;CHECK-NOT: dup ;CHECK: sqrdmulh.s s0, {{s[0-9]+}}, {{v[0-9]+}}[1] %tmp1 = extractelement <4 x i32> %B, i32 1 - %tmp2 = call i32 @llvm.arm64.neon.sqrdmulh.i32(i32 %A, i32 %tmp1) + %tmp2 = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %A, i32 %tmp1) ret i32 %tmp2 } @@ -856,7 +856,7 @@ define <4 x i32> @sqdmull_lane_4s(<4 x i16>* %A, <4 x i16>* %B) nounwind { %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> - %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) + %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) ret <4 x i32> %tmp4 } @@ -867,7 +867,7 @@ define <2 x i64> @sqdmull_lane_2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> - %tmp4 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) + %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) ret <2 x i64> %tmp4 } @@ -879,7 +879,7 @@ define <4 x i32> @sqdmull2_lane_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind { %load2 = load <8 x i16>* %B %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> - %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i32> %tmp4 } @@ -891,7 +891,7 @@ define <2 x i64> @sqdmull2_lane_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind { %load2 = load <4 x i32>* %B %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> - %tmp4 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i64> %tmp4 } @@ -902,7 +902,7 @@ define <4 x i32> @umull_lane_4s(<4 x i16>* %A, <4 x i16>* %B) nounwind { %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> - %tmp4 = call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) + %tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) ret <4 x i32> %tmp4 } @@ -913,7 +913,7 @@ define <2 x i64> @umull_lane_2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> - %tmp4 = call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) + %tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) ret <2 x i64> %tmp4 } @@ -924,7 +924,7 @@ define <4 x i32> @smull_lane_4s(<4 x i16>* %A, <4 x i16>* %B) nounwind { %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> - %tmp4 = call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) + %tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) ret <4 x i32> %tmp4 } @@ -935,7 +935,7 @@ define <2 x i64> @smull_lane_2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> - %tmp4 = call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) + %tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) ret <2 x i64> %tmp4 } @@ -947,7 +947,7 @@ define <4 x i32> @smlal_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nou %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i32>* %C %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> - %tmp5 = call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4) + %tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4) %tmp6 = add <4 x i32> %tmp3, %tmp5 ret <4 x i32> %tmp6 } @@ -960,7 +960,7 @@ define <2 x i64> @smlal_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nou %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i64>* %C %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> - %tmp5 = call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4) + %tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4) %tmp6 = add <2 x i64> %tmp3, %tmp5 ret <2 x i64> %tmp6 } @@ -973,8 +973,8 @@ define <4 x i32> @sqdmlal_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) n %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i32>* %C %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> - %tmp5 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4) - %tmp6 = call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5) + %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4) + %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5) ret <4 x i32> %tmp6 } @@ -986,8 +986,8 @@ define <2 x i64> @sqdmlal_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) n %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i64>* %C %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> - %tmp5 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4) - %tmp6 = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5) + %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4) + %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5) ret <2 x i64> %tmp6 } @@ -1000,8 +1000,8 @@ define <4 x i32> @sqdmlal2_lane_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) %tmp3 = load <4 x i32>* %C %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> - %tmp5 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - %tmp6 = call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5) + %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5) ret <4 x i32> %tmp6 } @@ -1014,8 +1014,8 @@ define <2 x i64> @sqdmlal2_lane_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) %tmp3 = load <2 x i64>* %C %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> - %tmp5 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - %tmp6 = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5) + %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5) ret <2 x i64> %tmp6 } @@ -1024,45 +1024,45 @@ define i32 @sqdmlal_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind { ;CHECK: sqdmlal.4s %lhs = insertelement <4 x i16> undef, i16 %B, i32 0 %rhs = shufflevector <4 x i16> %C, <4 x i16> undef, <4 x i32> - %prod.vec = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs) + %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs) %prod = extractelement <4 x i32> %prod.vec, i32 0 - %res = call i32 @llvm.arm64.neon.sqadd.i32(i32 %A, i32 %prod) + %res = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %A, i32 %prod) ret i32 %res } -declare i32 @llvm.arm64.neon.sqadd.i32(i32, i32) +declare i32 @llvm.aarch64.neon.sqadd.i32(i32, i32) define i32 @sqdmlsl_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind { ;CHECK-LABEL: sqdmlsl_lane_1s: ;CHECK: sqdmlsl.4s %lhs = insertelement <4 x i16> undef, i16 %B, i32 0 %rhs = shufflevector <4 x i16> %C, <4 x i16> undef, <4 x i32> - %prod.vec = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs) + %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs) %prod = extractelement <4 x i32> %prod.vec, i32 0 - %res = call i32 @llvm.arm64.neon.sqsub.i32(i32 %A, i32 %prod) + %res = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %A, i32 %prod) ret i32 %res } -declare i32 @llvm.arm64.neon.sqsub.i32(i32, i32) +declare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32) define i64 @sqdmlal_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind { ;CHECK-LABEL: sqdmlal_lane_1d: ;CHECK: sqdmlal.s %rhs = extractelement <2 x i32> %C, i32 1 - %prod = call i64 @llvm.arm64.neon.sqdmulls.scalar(i32 %B, i32 %rhs) - %res = call i64 @llvm.arm64.neon.sqadd.i64(i64 %A, i64 %prod) + %prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %B, i32 %rhs) + %res = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %A, i64 %prod) ret i64 %res } -declare i64 @llvm.arm64.neon.sqdmulls.scalar(i32, i32) -declare i64 @llvm.arm64.neon.sqadd.i64(i64, i64) +declare i64 @llvm.aarch64.neon.sqdmulls.scalar(i32, i32) +declare i64 @llvm.aarch64.neon.sqadd.i64(i64, i64) define i64 @sqdmlsl_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind { ;CHECK-LABEL: sqdmlsl_lane_1d: ;CHECK: sqdmlsl.s %rhs = extractelement <2 x i32> %C, i32 1 - %prod = call i64 @llvm.arm64.neon.sqdmulls.scalar(i32 %B, i32 %rhs) - %res = call i64 @llvm.arm64.neon.sqsub.i64(i64 %A, i64 %prod) + %prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %B, i32 %rhs) + %res = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %A, i64 %prod) ret i64 %res } -declare i64 @llvm.arm64.neon.sqsub.i64(i64, i64) +declare i64 @llvm.aarch64.neon.sqsub.i64(i64, i64) define <4 x i32> @umlal_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind { @@ -1073,7 +1073,7 @@ define <4 x i32> @umlal_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nou %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i32>* %C %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> - %tmp5 = call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4) + %tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4) %tmp6 = add <4 x i32> %tmp3, %tmp5 ret <4 x i32> %tmp6 } @@ -1086,7 +1086,7 @@ define <2 x i64> @umlal_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nou %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i64>* %C %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> - %tmp5 = call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4) + %tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4) %tmp6 = add <2 x i64> %tmp3, %tmp5 ret <2 x i64> %tmp6 } @@ -1100,7 +1100,7 @@ define <4 x i32> @smlsl_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nou %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i32>* %C %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> - %tmp5 = call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4) + %tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4) %tmp6 = sub <4 x i32> %tmp3, %tmp5 ret <4 x i32> %tmp6 } @@ -1113,7 +1113,7 @@ define <2 x i64> @smlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nou %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i64>* %C %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> - %tmp5 = call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4) + %tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4) %tmp6 = sub <2 x i64> %tmp3, %tmp5 ret <2 x i64> %tmp6 } @@ -1126,8 +1126,8 @@ define <4 x i32> @sqdmlsl_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) n %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i32>* %C %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> - %tmp5 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4) - %tmp6 = call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5) + %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4) + %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5) ret <4 x i32> %tmp6 } @@ -1139,8 +1139,8 @@ define <2 x i64> @sqdmlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) n %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i64>* %C %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> - %tmp5 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4) - %tmp6 = call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5) + %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4) + %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5) ret <2 x i64> %tmp6 } @@ -1153,8 +1153,8 @@ define <4 x i32> @sqdmlsl2_lane_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) %tmp3 = load <4 x i32>* %C %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> - %tmp5 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - %tmp6 = call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5) + %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5) ret <4 x i32> %tmp6 } @@ -1167,8 +1167,8 @@ define <2 x i64> @sqdmlsl2_lane_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) %tmp3 = load <2 x i64>* %C %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> - %tmp5 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - %tmp6 = call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5) + %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5) ret <2 x i64> %tmp6 } @@ -1180,7 +1180,7 @@ define <4 x i32> @umlsl_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nou %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i32>* %C %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> - %tmp5 = call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4) + %tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4) %tmp6 = sub <4 x i32> %tmp3, %tmp5 ret <4 x i32> %tmp6 } @@ -1193,7 +1193,7 @@ define <2 x i64> @umlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nou %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i64>* %C %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> - %tmp5 = call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4) + %tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4) %tmp6 = sub <2 x i64> %tmp3, %tmp5 ret <2 x i64> %tmp6 } @@ -1202,7 +1202,7 @@ define <2 x i64> @umlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nou define float @fmulxs(float %a, float %b) nounwind { ; CHECK-LABEL: fmulxs: ; CHECKNEXT: fmulx s0, s0, s1 - %fmulx.i = tail call float @llvm.arm64.neon.fmulx.f32(float %a, float %b) nounwind + %fmulx.i = tail call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) nounwind ; CHECKNEXT: ret ret float %fmulx.i } @@ -1210,7 +1210,7 @@ define float @fmulxs(float %a, float %b) nounwind { define double @fmulxd(double %a, double %b) nounwind { ; CHECK-LABEL: fmulxd: ; CHECKNEXT: fmulx d0, d0, d1 - %fmulx.i = tail call double @llvm.arm64.neon.fmulx.f64(double %a, double %b) nounwind + %fmulx.i = tail call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) nounwind ; CHECKNEXT: ret ret double %fmulx.i } @@ -1219,7 +1219,7 @@ define float @fmulxs_lane(float %a, <4 x float> %vec) nounwind { ; CHECK-LABEL: fmulxs_lane: ; CHECKNEXT: fmulx.s s0, s0, v1[3] %b = extractelement <4 x float> %vec, i32 3 - %fmulx.i = tail call float @llvm.arm64.neon.fmulx.f32(float %a, float %b) nounwind + %fmulx.i = tail call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) nounwind ; CHECKNEXT: ret ret float %fmulx.i } @@ -1228,13 +1228,13 @@ define double @fmulxd_lane(double %a, <2 x double> %vec) nounwind { ; CHECK-LABEL: fmulxd_lane: ; CHECKNEXT: fmulx d0, d0, v1[1] %b = extractelement <2 x double> %vec, i32 1 - %fmulx.i = tail call double @llvm.arm64.neon.fmulx.f64(double %a, double %b) nounwind + %fmulx.i = tail call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) nounwind ; CHECKNEXT: ret ret double %fmulx.i } -declare double @llvm.arm64.neon.fmulx.f64(double, double) nounwind readnone -declare float @llvm.arm64.neon.fmulx.f32(float, float) nounwind readnone +declare double @llvm.aarch64.neon.fmulx.f64(double, double) nounwind readnone +declare float @llvm.aarch64.neon.fmulx.f32(float, float) nounwind readnone define <8 x i16> @smull2_8h_simple(<16 x i8> %a, <16 x i8> %b) nounwind { @@ -1243,7 +1243,7 @@ define <8 x i16> @smull2_8h_simple(<16 x i8> %a, <16 x i8> %b) nounwind { ; CHECK-NEXT: ret %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> %2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> - %3 = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %1, <8 x i8> %2) #2 + %3 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %1, <8 x i8> %2) #2 ret <8 x i16> %3 } @@ -1256,7 +1256,7 @@ define <8 x i16> @foo0(<16 x i8> %a, <16 x i8> %b) nounwind { %tmp2 = bitcast <16 x i8> %b to <2 x i64> %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <8 x i8> - %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind + %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind ret <8 x i16> %vmull.i.i } @@ -1269,7 +1269,7 @@ define <4 x i32> @foo1(<8 x i16> %a, <8 x i16> %b) nounwind { %tmp2 = bitcast <8 x i16> %b to <2 x i64> %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16> - %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind + %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind ret <4 x i32> %vmull2.i.i } @@ -1282,7 +1282,7 @@ define <2 x i64> @foo2(<4 x i32> %a, <4 x i32> %b) nounwind { %tmp2 = bitcast <4 x i32> %b to <2 x i64> %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32> - %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind + %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind ret <2 x i64> %vmull2.i.i } @@ -1295,7 +1295,7 @@ define <8 x i16> @foo3(<16 x i8> %a, <16 x i8> %b) nounwind { %tmp2 = bitcast <16 x i8> %b to <2 x i64> %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <8 x i8> - %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind + %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind ret <8 x i16> %vmull.i.i } @@ -1308,7 +1308,7 @@ define <4 x i32> @foo4(<8 x i16> %a, <8 x i16> %b) nounwind { %tmp2 = bitcast <8 x i16> %b to <2 x i64> %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16> - %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind + %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind ret <4 x i32> %vmull2.i.i } @@ -1321,7 +1321,7 @@ define <2 x i64> @foo5(<4 x i32> %a, <4 x i32> %b) nounwind { %tmp2 = bitcast <4 x i32> %b to <2 x i64> %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32> - %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind + %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind ret <2 x i64> %vmull2.i.i } @@ -1334,7 +1334,7 @@ entry: %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> %1 = bitcast <1 x i64> %shuffle.i to <4 x i16> %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind ret <4 x i32> %vmull2.i } @@ -1347,7 +1347,7 @@ entry: %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> %1 = bitcast <1 x i64> %shuffle.i to <2 x i32> %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind ret <2 x i64> %vmull2.i } @@ -1360,7 +1360,7 @@ entry: %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> %1 = bitcast <1 x i64> %shuffle.i to <4 x i16> %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind ret <4 x i32> %vmull2.i } @@ -1373,7 +1373,7 @@ entry: %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> %1 = bitcast <1 x i64> %shuffle.i to <2 x i32> %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind ret <2 x i64> %vmull2.i } @@ -1388,7 +1388,7 @@ define <8 x i16> @bar0(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) nounwind { %tmp2 = bitcast <16 x i8> %c to <2 x i64> %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <8 x i8> - %vmull.i.i.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind + %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind %add.i = add <8 x i16> %vmull.i.i.i, %a ret <8 x i16> %add.i } @@ -1404,7 +1404,7 @@ define <4 x i32> @bar1(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) nounwind { %tmp2 = bitcast <8 x i16> %c to <2 x i64> %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <4 x i16> - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind + %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind %add.i = add <4 x i32> %vmull2.i.i.i, %a ret <4 x i32> %add.i } @@ -1420,7 +1420,7 @@ define <2 x i64> @bar2(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) nounwind { %tmp2 = bitcast <4 x i32> %c to <2 x i64> %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <2 x i32> - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind + %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind %add.i = add <2 x i64> %vmull2.i.i.i, %a ret <2 x i64> %add.i } @@ -1436,7 +1436,7 @@ define <8 x i16> @bar3(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) nounwind { %tmp2 = bitcast <16 x i8> %c to <2 x i64> %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <8 x i8> - %vmull.i.i.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind + %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind %add.i = add <8 x i16> %vmull.i.i.i, %a ret <8 x i16> %add.i } @@ -1452,7 +1452,7 @@ define <4 x i32> @bar4(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) nounwind { %tmp2 = bitcast <8 x i16> %c to <2 x i64> %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <4 x i16> - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind + %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind %add.i = add <4 x i32> %vmull2.i.i.i, %a ret <4 x i32> %add.i } @@ -1468,7 +1468,7 @@ define <2 x i64> @bar5(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) nounwind { %tmp2 = bitcast <4 x i32> %c to <2 x i64> %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <2 x i32> - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind + %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind %add.i = add <2 x i64> %vmull2.i.i.i, %a ret <2 x i64> %add.i } @@ -1484,7 +1484,7 @@ define <4 x i32> @mlal2_1(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind { %tmp2 = bitcast <8 x i16> %shuffle to <2 x i64> %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16> - %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind + %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind %add = add <4 x i32> %vmull2.i.i, %a ret <4 x i32> %add } @@ -1500,7 +1500,7 @@ define <2 x i64> @mlal2_2(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind { %tmp2 = bitcast <4 x i32> %shuffle to <2 x i64> %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32> - %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind + %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind %add = add <2 x i64> %vmull2.i.i, %a ret <2 x i64> %add } @@ -1517,7 +1517,7 @@ define <4 x i32> @mlal2_4(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind { %tmp2 = bitcast <8 x i16> %shuffle to <2 x i64> %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16> - %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind + %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind %add = add <4 x i32> %vmull2.i.i, %a ret <4 x i32> %add } @@ -1533,7 +1533,7 @@ define <2 x i64> @mlal2_5(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind { %tmp2 = bitcast <4 x i32> %shuffle to <2 x i64> %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32> - %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind + %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind %add = add <2 x i64> %vmull2.i.i, %a ret <2 x i64> %add } @@ -1631,7 +1631,7 @@ entry: ; CHECK: smull.4s v0, v0, v1[6] ; CHECK-NEXT: ret %shuffle = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2 + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2 ret <4 x i32> %vmull2.i } @@ -1642,7 +1642,7 @@ entry: ; CHECK: smull.2d v0, v0, v1[2] ; CHECK-NEXT: ret %shuffle = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2 + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2 ret <2 x i64> %vmull2.i } define <4 x i32> @vmull_laneq_u16_test(<4 x i16> %a, <8 x i16> %b) nounwind readnone ssp { @@ -1652,7 +1652,7 @@ entry: ; CHECK: umull.4s v0, v0, v1[6] ; CHECK-NEXT: ret %shuffle = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> - %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2 + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2 ret <4 x i32> %vmull2.i } @@ -1663,7 +1663,7 @@ entry: ; CHECK: umull.2d v0, v0, v1[2] ; CHECK-NEXT: ret %shuffle = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> - %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2 + %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2 ret <2 x i64> %vmull2.i } @@ -1681,7 +1681,7 @@ entry: %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %conv, i32 1 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %conv, i32 2 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %conv, i32 3 - %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %vecinit3.i) nounwind + %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %vecinit3.i) nounwind ret <4 x i32> %vmull2.i.i } @@ -1696,7 +1696,7 @@ entry: %1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32> %vecinit.i = insertelement <2 x i32> undef, i32 %d, i32 0 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %d, i32 1 - %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %1, <2 x i32> %vecinit1.i) nounwind + %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %1, <2 x i32> %vecinit1.i) nounwind ret <2 x i64> %vmull2.i.i } @@ -1714,7 +1714,7 @@ entry: %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %conv, i32 1 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %conv, i32 2 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %conv, i32 3 - %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %1, <4 x i16> %vecinit3.i) nounwind + %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %1, <4 x i16> %vecinit3.i) nounwind ret <4 x i32> %vmull2.i.i } @@ -1729,7 +1729,7 @@ entry: %1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32> %vecinit.i = insertelement <2 x i32> undef, i32 %d, i32 0 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %d, i32 1 - %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %1, <2 x i32> %vecinit1.i) nounwind + %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %1, <2 x i32> %vecinit1.i) nounwind ret <2 x i64> %vmull2.i.i } @@ -1787,7 +1787,7 @@ define <2 x i64> @mull_from_two_extracts(<4 x i32> %lhs, <4 x i32> %rhs) { %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> - %res = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind + %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind ret <2 x i64> %res } @@ -1799,8 +1799,8 @@ define <2 x i64> @mlal_from_two_extracts(<2 x i64> %accum, <4 x i32> %lhs, <4 x %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> - %res = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind - %sum = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %accum, <2 x i64> %res) + %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind + %sum = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %accum, <2 x i64> %res) ret <2 x i64> %sum } @@ -1813,7 +1813,7 @@ define <2 x i64> @mull_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> - %res = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind + %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind ret <2 x i64> %res } @@ -1826,7 +1826,7 @@ define <8 x i16> @pmull_from_extract_dup(<16 x i8> %lhs, i8 %rhs) { %lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> - %res = tail call <8 x i16> @llvm.arm64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhsvec) nounwind + %res = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhsvec) nounwind ret <8 x i16> %res } @@ -1838,7 +1838,7 @@ define <8 x i16> @pmull_from_extract_duplane(<16 x i8> %lhs, <8 x i8> %rhs) { %lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> %rhs.high = shufflevector <8 x i8> %rhs, <8 x i8> undef, <8 x i32> - %res = tail call <8 x i16> @llvm.arm64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhs.high) nounwind + %res = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhs.high) nounwind ret <8 x i16> %res } @@ -1850,7 +1850,7 @@ define <2 x i64> @sqdmull_from_extract_duplane(<4 x i32> %lhs, <4 x i32> %rhs) { %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> - %res = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind + %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind ret <2 x i64> %res } @@ -1862,8 +1862,8 @@ define <2 x i64> @sqdmlal_from_extract_duplane(<2 x i64> %accum, <4 x i32> %lhs, %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> - %res = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind - %sum = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %accum, <2 x i64> %res) + %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind + %sum = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %accum, <2 x i64> %res) ret <2 x i64> %sum } @@ -1875,7 +1875,7 @@ define <2 x i64> @umlal_from_extract_duplane(<2 x i64> %accum, <4 x i32> %lhs, < %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> - %res = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind + %res = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind %sum = add <2 x i64> %accum, %res ret <2 x i64> %sum } @@ -1997,23 +1997,23 @@ define <1 x double> @test_fdiv_v1f64(<1 x double> %L, <1 x double> %R) nounwind define i64 @sqdmlal_d(i32 %A, i32 %B, i64 %C) nounwind { ;CHECK-LABEL: sqdmlal_d: ;CHECK: sqdmlal - %tmp4 = call i64 @llvm.arm64.neon.sqdmulls.scalar(i32 %A, i32 %B) - %tmp5 = call i64 @llvm.arm64.neon.sqadd.i64(i64 %C, i64 %tmp4) + %tmp4 = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %A, i32 %B) + %tmp5 = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %C, i64 %tmp4) ret i64 %tmp5 } define i64 @sqdmlsl_d(i32 %A, i32 %B, i64 %C) nounwind { ;CHECK-LABEL: sqdmlsl_d: ;CHECK: sqdmlsl - %tmp4 = call i64 @llvm.arm64.neon.sqdmulls.scalar(i32 %A, i32 %B) - %tmp5 = call i64 @llvm.arm64.neon.sqsub.i64(i64 %C, i64 %tmp4) + %tmp4 = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %A, i32 %B) + %tmp5 = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %C, i64 %tmp4) ret i64 %tmp5 } define <16 x i8> @test_pmull_64(i64 %l, i64 %r) nounwind { ; CHECK-LABEL: test_pmull_64: ; CHECK: pmull.1q - %val = call <16 x i8> @llvm.arm64.neon.pmull64(i64 %l, i64 %r) + %val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r) ret <16 x i8> %val } @@ -2022,11 +2022,11 @@ define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind { ; CHECK: pmull2.1q %l_hi = extractelement <2 x i64> %l, i32 1 %r_hi = extractelement <2 x i64> %r, i32 1 - %val = call <16 x i8> @llvm.arm64.neon.pmull64(i64 %l_hi, i64 %r_hi) + %val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l_hi, i64 %r_hi) ret <16 x i8> %val } -declare <16 x i8> @llvm.arm64.neon.pmull64(i64, i64) +declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) define <1 x i64> @test_mul_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) nounwind { ; CHECK-LABEL: test_mul_v1i64: diff --git a/test/CodeGen/ARM64/volatile.ll b/test/CodeGen/AArch64/arm64-volatile.ll similarity index 100% rename from test/CodeGen/ARM64/volatile.ll rename to test/CodeGen/AArch64/arm64-volatile.ll diff --git a/test/CodeGen/ARM64/vpopcnt.ll b/test/CodeGen/AArch64/arm64-vpopcnt.ll similarity index 100% rename from test/CodeGen/ARM64/vpopcnt.ll rename to test/CodeGen/AArch64/arm64-vpopcnt.ll diff --git a/test/CodeGen/ARM64/vqadd.ll b/test/CodeGen/AArch64/arm64-vqadd.ll similarity index 50% rename from test/CodeGen/ARM64/vqadd.ll rename to test/CodeGen/AArch64/arm64-vqadd.ll index 0b7f7e53105e..20f7e2c7a893 100644 --- a/test/CodeGen/ARM64/vqadd.ll +++ b/test/CodeGen/AArch64/arm64-vqadd.ll @@ -1,11 +1,11 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define <8 x i8> @sqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: sqadd8b: ;CHECK: sqadd.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.sqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -14,7 +14,7 @@ define <4 x i16> @sqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: sqadd.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.sqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -23,7 +23,7 @@ define <2 x i32> @sqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: sqadd.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.sqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -32,7 +32,7 @@ define <8 x i8> @uqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: uqadd.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.uqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -41,7 +41,7 @@ define <4 x i16> @uqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: uqadd.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.uqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -50,7 +50,7 @@ define <2 x i32> @uqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: uqadd.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.uqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -59,7 +59,7 @@ define <16 x i8> @sqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: sqadd.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.sqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -68,7 +68,7 @@ define <8 x i16> @sqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: sqadd.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.sqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -77,7 +77,7 @@ define <4 x i32> @sqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: sqadd.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } @@ -86,7 +86,7 @@ define <2 x i64> @sqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: sqadd.2d %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B - %tmp3 = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i64> %tmp3 } @@ -95,7 +95,7 @@ define <16 x i8> @uqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: uqadd.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.uqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -104,7 +104,7 @@ define <8 x i16> @uqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: uqadd.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.uqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -113,7 +113,7 @@ define <4 x i32> @uqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: uqadd.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.uqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } @@ -122,36 +122,36 @@ define <2 x i64> @uqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: uqadd.2d %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B - %tmp3 = call <2 x i64> @llvm.arm64.neon.uqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i64> %tmp3 } -declare <8 x i8> @llvm.arm64.neon.sqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.sqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.sqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <1 x i64> @llvm.arm64.neon.sqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.uqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.uqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.uqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <1 x i64> @llvm.arm64.neon.uqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.sqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.sqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.uqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.uqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.uqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.uqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone define <8 x i8> @usqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: usqadd8b: ;CHECK: usqadd.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.usqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -160,7 +160,7 @@ define <4 x i16> @usqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: usqadd.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.usqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -169,7 +169,7 @@ define <2 x i32> @usqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: usqadd.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.usqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -178,7 +178,7 @@ define <16 x i8> @usqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: usqadd.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.usqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -187,7 +187,7 @@ define <8 x i16> @usqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: usqadd.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.usqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -196,7 +196,7 @@ define <4 x i32> @usqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: usqadd.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.usqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } @@ -205,42 +205,42 @@ define <2 x i64> @usqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: usqadd.2d %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B - %tmp3 = call <2 x i64> @llvm.arm64.neon.usqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i64> %tmp3 } define i64 @usqadd_d(i64 %l, i64 %r) nounwind { ; CHECK-LABEL: usqadd_d: ; CHECK: usqadd {{d[0-9]+}}, {{d[0-9]+}} - %sum = call i64 @llvm.arm64.neon.usqadd.i64(i64 %l, i64 %r) + %sum = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %l, i64 %r) ret i64 %sum } define i32 @usqadd_s(i32 %l, i32 %r) nounwind { ; CHECK-LABEL: usqadd_s: ; CHECK: usqadd {{s[0-9]+}}, {{s[0-9]+}} - %sum = call i32 @llvm.arm64.neon.usqadd.i32(i32 %l, i32 %r) + %sum = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %l, i32 %r) ret i32 %sum } -declare <8 x i8> @llvm.arm64.neon.usqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.usqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.usqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <1 x i64> @llvm.arm64.neon.usqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone -declare i64 @llvm.arm64.neon.usqadd.i64(i64, i64) nounwind readnone -declare i32 @llvm.arm64.neon.usqadd.i32(i32, i32) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare i64 @llvm.aarch64.neon.usqadd.i64(i64, i64) nounwind readnone +declare i32 @llvm.aarch64.neon.usqadd.i32(i32, i32) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.usqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.usqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.usqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.usqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone define <8 x i8> @suqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: suqadd8b: ;CHECK: suqadd.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.suqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -249,7 +249,7 @@ define <4 x i16> @suqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: suqadd.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.suqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -258,7 +258,7 @@ define <2 x i32> @suqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: suqadd.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.suqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -267,7 +267,7 @@ define <16 x i8> @suqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: suqadd.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.suqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -276,7 +276,7 @@ define <8 x i16> @suqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: suqadd.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.suqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -285,7 +285,7 @@ define <4 x i32> @suqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: suqadd.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.suqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } @@ -294,39 +294,39 @@ define <2 x i64> @suqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: suqadd.2d %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B - %tmp3 = call <2 x i64> @llvm.arm64.neon.suqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i64> %tmp3 } define <1 x i64> @suqadd_1d(<1 x i64> %l, <1 x i64> %r) nounwind { ; CHECK-LABEL: suqadd_1d: ; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}} - %sum = call <1 x i64> @llvm.arm64.neon.suqadd.v1i64(<1 x i64> %l, <1 x i64> %r) + %sum = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %l, <1 x i64> %r) ret <1 x i64> %sum } define i64 @suqadd_d(i64 %l, i64 %r) nounwind { ; CHECK-LABEL: suqadd_d: ; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}} - %sum = call i64 @llvm.arm64.neon.suqadd.i64(i64 %l, i64 %r) + %sum = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %l, i64 %r) ret i64 %sum } define i32 @suqadd_s(i32 %l, i32 %r) nounwind { ; CHECK-LABEL: suqadd_s: ; CHECK: suqadd {{s[0-9]+}}, {{s[0-9]+}} - %sum = call i32 @llvm.arm64.neon.suqadd.i32(i32 %l, i32 %r) + %sum = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %l, i32 %r) ret i32 %sum } -declare <8 x i8> @llvm.arm64.neon.suqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.suqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.suqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <1 x i64> @llvm.arm64.neon.suqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone -declare i64 @llvm.arm64.neon.suqadd.i64(i64, i64) nounwind readnone -declare i32 @llvm.arm64.neon.suqadd.i32(i32, i32) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare i64 @llvm.aarch64.neon.suqadd.i64(i64, i64) nounwind readnone +declare i32 @llvm.aarch64.neon.suqadd.i32(i32, i32) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.suqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.suqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.suqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.suqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone diff --git a/test/CodeGen/AArch64/arm64-vqsub.ll b/test/CodeGen/AArch64/arm64-vqsub.ll new file mode 100644 index 000000000000..dde3ac3478e4 --- /dev/null +++ b/test/CodeGen/AArch64/arm64-vqsub.ll @@ -0,0 +1,147 @@ +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s + +define <8 x i8> @sqsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK-LABEL: sqsub8b: +;CHECK: sqsub.8b + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @sqsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK-LABEL: sqsub4h: +;CHECK: sqsub.4h + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @sqsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK-LABEL: sqsub2s: +;CHECK: sqsub.2s + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <8 x i8> @uqsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK-LABEL: uqsub8b: +;CHECK: uqsub.8b + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @uqsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK-LABEL: uqsub4h: +;CHECK: uqsub.4h + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @uqsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK-LABEL: uqsub2s: +;CHECK: uqsub.2s + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <16 x i8> @sqsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK-LABEL: sqsub16b: +;CHECK: sqsub.16b + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @sqsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK-LABEL: sqsub8h: +;CHECK: sqsub.8h + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @sqsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK-LABEL: sqsub4s: +;CHECK: sqsub.4s + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @sqsub2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK-LABEL: sqsub2d: +;CHECK: sqsub.2d + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i64>* %B + %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + ret <2 x i64> %tmp3 +} + +define <16 x i8> @uqsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK-LABEL: uqsub16b: +;CHECK: uqsub.16b + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @uqsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK-LABEL: uqsub8h: +;CHECK: uqsub.8h + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @uqsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK-LABEL: uqsub4s: +;CHECK: uqsub.4s + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @uqsub2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK-LABEL: uqsub2d: +;CHECK: uqsub.2d + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i64>* %B + %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + ret <2 x i64> %tmp3 +} + +declare <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64>, <1 x i64>) nounwind readnone + +declare <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64>, <1 x i64>) nounwind readnone + +declare <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) nounwind readnone + +declare <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64>, <2 x i64>) nounwind readnone diff --git a/test/CodeGen/ARM64/vselect.ll b/test/CodeGen/AArch64/arm64-vselect.ll similarity index 89% rename from test/CodeGen/ARM64/vselect.ll rename to test/CodeGen/AArch64/arm64-vselect.ll index aa8e81eb709e..9988512f530e 100644 --- a/test/CodeGen/ARM64/vselect.ll +++ b/test/CodeGen/AArch64/arm64-vselect.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s ;CHECK: @func63 ;CHECK: cmeq.4h v0, v0, v1 diff --git a/test/CodeGen/ARM64/vsetcc_fp.ll b/test/CodeGen/AArch64/arm64-vsetcc_fp.ll similarity index 80% rename from test/CodeGen/ARM64/vsetcc_fp.ll rename to test/CodeGen/AArch64/arm64-vsetcc_fp.ll index c93aad5c4ee0..f4f4714dde4d 100644 --- a/test/CodeGen/ARM64/vsetcc_fp.ll +++ b/test/CodeGen/AArch64/arm64-vsetcc_fp.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -asm-verbose=false | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s define <2 x i32> @fcmp_one(<2 x float> %x, <2 x float> %y) nounwind optsize readnone { ; CHECK-LABEL: fcmp_one: ; CHECK-NEXT: fcmgt.2s [[REG:v[0-9]+]], v0, v1 diff --git a/test/CodeGen/ARM64/vshift.ll b/test/CodeGen/AArch64/arm64-vshift.ll similarity index 68% rename from test/CodeGen/ARM64/vshift.ll rename to test/CodeGen/AArch64/arm64-vshift.ll index 486c6cc390b1..82ae486f8c4a 100644 --- a/test/CodeGen/ARM64/vshift.ll +++ b/test/CodeGen/AArch64/arm64-vshift.ll @@ -1,11 +1,11 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -enable-misched=false | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -enable-misched=false | FileCheck %s define <8 x i8> @sqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: sqshl8b: ;CHECK: sqshl.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -14,7 +14,7 @@ define <4 x i16> @sqshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: sqshl.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -23,7 +23,7 @@ define <2 x i32> @sqshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: sqshl.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -32,7 +32,7 @@ define <8 x i8> @uqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: uqshl.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -41,7 +41,7 @@ define <4 x i16> @uqshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: uqshl.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -50,7 +50,7 @@ define <2 x i32> @uqshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: uqshl.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -59,7 +59,7 @@ define <16 x i8> @sqshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: sqshl.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -68,7 +68,7 @@ define <8 x i16> @sqshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: sqshl.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -77,7 +77,7 @@ define <4 x i32> @sqshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: sqshl.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } @@ -86,7 +86,7 @@ define <2 x i64> @sqshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: sqshl.2d %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B - %tmp3 = call <2 x i64> @llvm.arm64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i64> %tmp3 } @@ -95,7 +95,7 @@ define <16 x i8> @uqshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: uqshl.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -104,7 +104,7 @@ define <8 x i16> @uqshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: uqshl.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -113,7 +113,7 @@ define <4 x i32> @uqshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: uqshl.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } @@ -122,36 +122,36 @@ define <2 x i64> @uqshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: uqshl.2d %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B - %tmp3 = call <2 x i64> @llvm.arm64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i64> %tmp3 } -declare <8 x i8> @llvm.arm64.neon.sqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.sqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.sqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <1 x i64> @llvm.arm64.neon.sqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.uqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.uqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.uqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <1 x i64> @llvm.arm64.neon.uqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.sqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.sqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.sqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.sqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.uqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.uqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.uqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.uqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone define <8 x i8> @srshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: srshl8b: ;CHECK: srshl.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -160,7 +160,7 @@ define <4 x i16> @srshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: srshl.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -169,7 +169,7 @@ define <2 x i32> @srshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: srshl.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -178,7 +178,7 @@ define <8 x i8> @urshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: urshl.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -187,7 +187,7 @@ define <4 x i16> @urshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: urshl.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -196,7 +196,7 @@ define <2 x i32> @urshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: urshl.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -205,7 +205,7 @@ define <16 x i8> @srshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: srshl.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -214,7 +214,7 @@ define <8 x i16> @srshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: srshl.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -223,7 +223,7 @@ define <4 x i32> @srshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: srshl.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } @@ -232,7 +232,7 @@ define <2 x i64> @srshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: srshl.2d %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B - %tmp3 = call <2 x i64> @llvm.arm64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i64> %tmp3 } @@ -241,7 +241,7 @@ define <16 x i8> @urshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: urshl.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -250,7 +250,7 @@ define <8 x i16> @urshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: urshl.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -259,7 +259,7 @@ define <4 x i32> @urshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: urshl.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } @@ -268,36 +268,36 @@ define <2 x i64> @urshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: urshl.2d %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B - %tmp3 = call <2 x i64> @llvm.arm64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i64> %tmp3 } -declare <8 x i8> @llvm.arm64.neon.srshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.srshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.srshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <1 x i64> @llvm.arm64.neon.srshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.urshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.urshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.urshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <1 x i64> @llvm.arm64.neon.urshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.srshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.srshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.srshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.srshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.urshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.urshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.urshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.urshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone define <8 x i8> @sqrshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: sqrshl8b: ;CHECK: sqrshl.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.sqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -306,7 +306,7 @@ define <4 x i16> @sqrshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: sqrshl.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.sqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -315,7 +315,7 @@ define <2 x i32> @sqrshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: sqrshl.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.sqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -324,7 +324,7 @@ define <8 x i8> @uqrshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: uqrshl.8b %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.uqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -333,7 +333,7 @@ define <4 x i16> @uqrshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: uqrshl.4h %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.uqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -342,7 +342,7 @@ define <2 x i32> @uqrshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: uqrshl.2s %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.uqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -351,7 +351,7 @@ define <16 x i8> @sqrshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: sqrshl.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.sqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -360,7 +360,7 @@ define <8 x i16> @sqrshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: sqrshl.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.sqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -369,7 +369,7 @@ define <4 x i32> @sqrshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: sqrshl.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.sqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } @@ -378,7 +378,7 @@ define <2 x i64> @sqrshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: sqrshl.2d %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B - %tmp3 = call <2 x i64> @llvm.arm64.neon.sqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i64> %tmp3 } @@ -387,7 +387,7 @@ define <16 x i8> @uqrshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: uqrshl.16b %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.uqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -396,7 +396,7 @@ define <8 x i16> @uqrshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: uqrshl.8h %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.uqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -405,7 +405,7 @@ define <4 x i32> @uqrshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: uqrshl.4s %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.uqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } @@ -414,35 +414,35 @@ define <2 x i64> @uqrshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: uqrshl.2d %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B - %tmp3 = call <2 x i64> @llvm.arm64.neon.uqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i64> %tmp3 } -declare <8 x i8> @llvm.arm64.neon.sqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.sqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.sqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <1 x i64> @llvm.arm64.neon.sqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.uqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.uqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <1 x i64> @llvm.arm64.neon.uqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.sqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.sqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.sqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.sqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.uqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.uqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.uqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.uqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone define <8 x i8> @urshr8b(<8 x i8>* %A) nounwind { ;CHECK-LABEL: urshr8b: ;CHECK: urshr.8b %tmp1 = load <8 x i8>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) ret <8 x i8> %tmp3 } @@ -450,7 +450,7 @@ define <4 x i16> @urshr4h(<4 x i16>* %A) nounwind { ;CHECK-LABEL: urshr4h: ;CHECK: urshr.4h %tmp1 = load <4 x i16>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) ret <4 x i16> %tmp3 } @@ -458,7 +458,7 @@ define <2 x i32> @urshr2s(<2 x i32>* %A) nounwind { ;CHECK-LABEL: urshr2s: ;CHECK: urshr.2s %tmp1 = load <2 x i32>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) ret <2 x i32> %tmp3 } @@ -466,7 +466,7 @@ define <16 x i8> @urshr16b(<16 x i8>* %A) nounwind { ;CHECK-LABEL: urshr16b: ;CHECK: urshr.16b %tmp1 = load <16 x i8>* %A - %tmp3 = call <16 x i8> @llvm.arm64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp3 } @@ -474,7 +474,7 @@ define <8 x i16> @urshr8h(<8 x i16>* %A) nounwind { ;CHECK-LABEL: urshr8h: ;CHECK: urshr.8h %tmp1 = load <8 x i16>* %A - %tmp3 = call <8 x i16> @llvm.arm64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) ret <8 x i16> %tmp3 } @@ -482,7 +482,7 @@ define <4 x i32> @urshr4s(<4 x i32>* %A) nounwind { ;CHECK-LABEL: urshr4s: ;CHECK: urshr.4s %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i32> @llvm.arm64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) ret <4 x i32> %tmp3 } @@ -490,7 +490,7 @@ define <2 x i64> @urshr2d(<2 x i64>* %A) nounwind { ;CHECK-LABEL: urshr2d: ;CHECK: urshr.2d %tmp1 = load <2 x i64>* %A - %tmp3 = call <2 x i64> @llvm.arm64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) ret <2 x i64> %tmp3 } @@ -498,7 +498,7 @@ define <8 x i8> @srshr8b(<8 x i8>* %A) nounwind { ;CHECK-LABEL: srshr8b: ;CHECK: srshr.8b %tmp1 = load <8 x i8>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) ret <8 x i8> %tmp3 } @@ -506,7 +506,7 @@ define <4 x i16> @srshr4h(<4 x i16>* %A) nounwind { ;CHECK-LABEL: srshr4h: ;CHECK: srshr.4h %tmp1 = load <4 x i16>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) ret <4 x i16> %tmp3 } @@ -514,7 +514,7 @@ define <2 x i32> @srshr2s(<2 x i32>* %A) nounwind { ;CHECK-LABEL: srshr2s: ;CHECK: srshr.2s %tmp1 = load <2 x i32>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) ret <2 x i32> %tmp3 } @@ -522,7 +522,7 @@ define <16 x i8> @srshr16b(<16 x i8>* %A) nounwind { ;CHECK-LABEL: srshr16b: ;CHECK: srshr.16b %tmp1 = load <16 x i8>* %A - %tmp3 = call <16 x i8> @llvm.arm64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp3 } @@ -530,7 +530,7 @@ define <8 x i16> @srshr8h(<8 x i16>* %A) nounwind { ;CHECK-LABEL: srshr8h: ;CHECK: srshr.8h %tmp1 = load <8 x i16>* %A - %tmp3 = call <8 x i16> @llvm.arm64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) ret <8 x i16> %tmp3 } @@ -538,7 +538,7 @@ define <4 x i32> @srshr4s(<4 x i32>* %A) nounwind { ;CHECK-LABEL: srshr4s: ;CHECK: srshr.4s %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i32> @llvm.arm64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) ret <4 x i32> %tmp3 } @@ -546,7 +546,7 @@ define <2 x i64> @srshr2d(<2 x i64>* %A) nounwind { ;CHECK-LABEL: srshr2d: ;CHECK: srshr.2d %tmp1 = load <2 x i64>* %A - %tmp3 = call <2 x i64> @llvm.arm64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) ret <2 x i64> %tmp3 } @@ -554,7 +554,7 @@ define <8 x i8> @sqshlu8b(<8 x i8>* %A) nounwind { ;CHECK-LABEL: sqshlu8b: ;CHECK: sqshlu.8b v0, {{v[0-9]+}}, #1 %tmp1 = load <8 x i8>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> ) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> ) ret <8 x i8> %tmp3 } @@ -562,7 +562,7 @@ define <4 x i16> @sqshlu4h(<4 x i16>* %A) nounwind { ;CHECK-LABEL: sqshlu4h: ;CHECK: sqshlu.4h v0, {{v[0-9]+}}, #1 %tmp1 = load <4 x i16>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> ) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> ) ret <4 x i16> %tmp3 } @@ -570,7 +570,7 @@ define <2 x i32> @sqshlu2s(<2 x i32>* %A) nounwind { ;CHECK-LABEL: sqshlu2s: ;CHECK: sqshlu.2s v0, {{v[0-9]+}}, #1 %tmp1 = load <2 x i32>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> ) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> ) ret <2 x i32> %tmp3 } @@ -578,7 +578,7 @@ define <16 x i8> @sqshlu16b(<16 x i8>* %A) nounwind { ;CHECK-LABEL: sqshlu16b: ;CHECK: sqshlu.16b v0, {{v[0-9]+}}, #1 %tmp1 = load <16 x i8>* %A - %tmp3 = call <16 x i8> @llvm.arm64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> ) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp3 } @@ -586,7 +586,7 @@ define <8 x i16> @sqshlu8h(<8 x i16>* %A) nounwind { ;CHECK-LABEL: sqshlu8h: ;CHECK: sqshlu.8h v0, {{v[0-9]+}}, #1 %tmp1 = load <8 x i16>* %A - %tmp3 = call <8 x i16> @llvm.arm64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> ) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> ) ret <8 x i16> %tmp3 } @@ -594,7 +594,7 @@ define <4 x i32> @sqshlu4s(<4 x i32>* %A) nounwind { ;CHECK-LABEL: sqshlu4s: ;CHECK: sqshlu.4s v0, {{v[0-9]+}}, #1 %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i32> @llvm.arm64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> ) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> ) ret <4 x i32> %tmp3 } @@ -602,25 +602,25 @@ define <2 x i64> @sqshlu2d(<2 x i64>* %A) nounwind { ;CHECK-LABEL: sqshlu2d: ;CHECK: sqshlu.2d v0, {{v[0-9]+}}, #1 %tmp1 = load <2 x i64>* %A - %tmp3 = call <2 x i64> @llvm.arm64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> ) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> ) ret <2 x i64> %tmp3 } -declare <8 x i8> @llvm.arm64.neon.sqshlu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.sqshlu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.sqshlu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <1 x i64> @llvm.arm64.neon.sqshlu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.sqshlu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.sqshlu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.sqshlu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.sqshlu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone define <8 x i8> @rshrn8b(<8 x i16>* %A) nounwind { ;CHECK-LABEL: rshrn8b: ;CHECK: rshrn.8b v0, {{v[0-9]+}}, #1 %tmp1 = load <8 x i16>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1) ret <8 x i8> %tmp3 } @@ -628,7 +628,7 @@ define <4 x i16> @rshrn4h(<4 x i32>* %A) nounwind { ;CHECK-LABEL: rshrn4h: ;CHECK: rshrn.4h v0, {{v[0-9]+}}, #1 %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1) ret <4 x i16> %tmp3 } @@ -636,7 +636,7 @@ define <2 x i32> @rshrn2s(<2 x i64>* %A) nounwind { ;CHECK-LABEL: rshrn2s: ;CHECK: rshrn.2s v0, {{v[0-9]+}}, #1 %tmp1 = load <2 x i64>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1) ret <2 x i32> %tmp3 } @@ -645,7 +645,7 @@ define <16 x i8> @rshrn16b(<8 x i8> *%ret, <8 x i16>* %A) nounwind { ;CHECK: rshrn2.16b v0, {{v[0-9]+}}, #1 %out = load <8 x i8>* %ret %tmp1 = load <8 x i16>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1) %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> ret <16 x i8> %tmp4 } @@ -655,7 +655,7 @@ define <8 x i16> @rshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind { ;CHECK: rshrn2.8h v0, {{v[0-9]+}}, #1 %out = load <4 x i16>* %ret %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1) %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> ret <8 x i16> %tmp4 } @@ -665,14 +665,14 @@ define <4 x i32> @rshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind { ;CHECK: rshrn2.4s v0, {{v[0-9]+}}, #1 %out = load <2 x i32>* %ret %tmp1 = load <2 x i64>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1) %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> ret <4 x i32> %tmp4 } -declare <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.rshrn.v4i16(<4 x i32>, i32) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.rshrn.v2i32(<2 x i64>, i32) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32) nounwind readnone define <8 x i8> @shrn8b(<8 x i16>* %A) nounwind { ;CHECK-LABEL: shrn8b: @@ -734,14 +734,14 @@ define <4 x i32> @shrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind { ret <4 x i32> %tmp4 } -declare <8 x i8> @llvm.arm64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone define i32 @sqshrn1s(i64 %A) nounwind { ; CHECK-LABEL: sqshrn1s: ; CHECK: sqshrn {{s[0-9]+}}, d0, #1 - %tmp = call i32 @llvm.arm64.neon.sqshrn.i32(i64 %A, i32 1) + %tmp = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %A, i32 1) ret i32 %tmp } @@ -749,7 +749,7 @@ define <8 x i8> @sqshrn8b(<8 x i16>* %A) nounwind { ;CHECK-LABEL: sqshrn8b: ;CHECK: sqshrn.8b v0, {{v[0-9]+}}, #1 %tmp1 = load <8 x i16>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1) ret <8 x i8> %tmp3 } @@ -757,7 +757,7 @@ define <4 x i16> @sqshrn4h(<4 x i32>* %A) nounwind { ;CHECK-LABEL: sqshrn4h: ;CHECK: sqshrn.4h v0, {{v[0-9]+}}, #1 %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1) ret <4 x i16> %tmp3 } @@ -765,7 +765,7 @@ define <2 x i32> @sqshrn2s(<2 x i64>* %A) nounwind { ;CHECK-LABEL: sqshrn2s: ;CHECK: sqshrn.2s v0, {{v[0-9]+}}, #1 %tmp1 = load <2 x i64>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1) ret <2 x i32> %tmp3 } @@ -775,7 +775,7 @@ define <16 x i8> @sqshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind { ;CHECK: sqshrn2.16b v0, {{v[0-9]+}}, #1 %out = load <8 x i8>* %ret %tmp1 = load <8 x i16>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1) %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> ret <16 x i8> %tmp4 } @@ -785,7 +785,7 @@ define <8 x i16> @sqshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind { ;CHECK: sqshrn2.8h v0, {{v[0-9]+}}, #1 %out = load <4 x i16>* %ret %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1) %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> ret <8 x i16> %tmp4 } @@ -795,20 +795,20 @@ define <4 x i32> @sqshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind { ;CHECK: sqshrn2.4s v0, {{v[0-9]+}}, #1 %out = load <2 x i32>* %ret %tmp1 = load <2 x i64>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1) %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> ret <4 x i32> %tmp4 } -declare i32 @llvm.arm64.neon.sqshrn.i32(i64, i32) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.sqshrn.v8i8(<8 x i16>, i32) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.sqshrn.v4i16(<4 x i32>, i32) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.sqshrn.v2i32(<2 x i64>, i32) nounwind readnone +declare i32 @llvm.aarch64.neon.sqshrn.i32(i64, i32) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32) nounwind readnone define i32 @sqshrun1s(i64 %A) nounwind { ; CHECK-LABEL: sqshrun1s: ; CHECK: sqshrun {{s[0-9]+}}, d0, #1 - %tmp = call i32 @llvm.arm64.neon.sqshrun.i32(i64 %A, i32 1) + %tmp = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %A, i32 1) ret i32 %tmp } @@ -816,7 +816,7 @@ define <8 x i8> @sqshrun8b(<8 x i16>* %A) nounwind { ;CHECK-LABEL: sqshrun8b: ;CHECK: sqshrun.8b v0, {{v[0-9]+}}, #1 %tmp1 = load <8 x i16>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1) ret <8 x i8> %tmp3 } @@ -824,7 +824,7 @@ define <4 x i16> @sqshrun4h(<4 x i32>* %A) nounwind { ;CHECK-LABEL: sqshrun4h: ;CHECK: sqshrun.4h v0, {{v[0-9]+}}, #1 %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1) ret <4 x i16> %tmp3 } @@ -832,7 +832,7 @@ define <2 x i32> @sqshrun2s(<2 x i64>* %A) nounwind { ;CHECK-LABEL: sqshrun2s: ;CHECK: sqshrun.2s v0, {{v[0-9]+}}, #1 %tmp1 = load <2 x i64>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1) ret <2 x i32> %tmp3 } @@ -841,7 +841,7 @@ define <16 x i8> @sqshrun16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind { ;CHECK: sqshrun2.16b v0, {{v[0-9]+}}, #1 %out = load <8 x i8>* %ret %tmp1 = load <8 x i16>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1) %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> ret <16 x i8> %tmp4 } @@ -851,7 +851,7 @@ define <8 x i16> @sqshrun8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind { ;CHECK: sqshrun2.8h v0, {{v[0-9]+}}, #1 %out = load <4 x i16>* %ret %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1) %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> ret <8 x i16> %tmp4 } @@ -861,20 +861,20 @@ define <4 x i32> @sqshrun4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind { ;CHECK: sqshrun2.4s v0, {{v[0-9]+}}, #1 %out = load <2 x i32>* %ret %tmp1 = load <2 x i64>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1) %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> ret <4 x i32> %tmp4 } -declare i32 @llvm.arm64.neon.sqshrun.i32(i64, i32) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.sqshrun.v8i8(<8 x i16>, i32) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.sqshrun.v4i16(<4 x i32>, i32) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.sqshrun.v2i32(<2 x i64>, i32) nounwind readnone +declare i32 @llvm.aarch64.neon.sqshrun.i32(i64, i32) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32) nounwind readnone define i32 @sqrshrn1s(i64 %A) nounwind { ; CHECK-LABEL: sqrshrn1s: ; CHECK: sqrshrn {{s[0-9]+}}, d0, #1 - %tmp = call i32 @llvm.arm64.neon.sqrshrn.i32(i64 %A, i32 1) + %tmp = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %A, i32 1) ret i32 %tmp } @@ -882,7 +882,7 @@ define <8 x i8> @sqrshrn8b(<8 x i16>* %A) nounwind { ;CHECK-LABEL: sqrshrn8b: ;CHECK: sqrshrn.8b v0, {{v[0-9]+}}, #1 %tmp1 = load <8 x i16>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1) ret <8 x i8> %tmp3 } @@ -890,7 +890,7 @@ define <4 x i16> @sqrshrn4h(<4 x i32>* %A) nounwind { ;CHECK-LABEL: sqrshrn4h: ;CHECK: sqrshrn.4h v0, {{v[0-9]+}}, #1 %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1) ret <4 x i16> %tmp3 } @@ -898,7 +898,7 @@ define <2 x i32> @sqrshrn2s(<2 x i64>* %A) nounwind { ;CHECK-LABEL: sqrshrn2s: ;CHECK: sqrshrn.2s v0, {{v[0-9]+}}, #1 %tmp1 = load <2 x i64>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1) ret <2 x i32> %tmp3 } @@ -907,7 +907,7 @@ define <16 x i8> @sqrshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind { ;CHECK: sqrshrn2.16b v0, {{v[0-9]+}}, #1 %out = load <8 x i8>* %ret %tmp1 = load <8 x i16>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1) %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> ret <16 x i8> %tmp4 } @@ -917,7 +917,7 @@ define <8 x i16> @sqrshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind { ;CHECK: sqrshrn2.8h v0, {{v[0-9]+}}, #1 %out = load <4 x i16>* %ret %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1) %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> ret <8 x i16> %tmp4 } @@ -927,20 +927,20 @@ define <4 x i32> @sqrshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind { ;CHECK: sqrshrn2.4s v0, {{v[0-9]+}}, #1 %out = load <2 x i32>* %ret %tmp1 = load <2 x i64>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1) %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> ret <4 x i32> %tmp4 } -declare i32 @llvm.arm64.neon.sqrshrn.i32(i64, i32) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.sqrshrn.v8i8(<8 x i16>, i32) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.sqrshrn.v4i16(<4 x i32>, i32) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.sqrshrn.v2i32(<2 x i64>, i32) nounwind readnone +declare i32 @llvm.aarch64.neon.sqrshrn.i32(i64, i32) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32) nounwind readnone define i32 @sqrshrun1s(i64 %A) nounwind { ; CHECK-LABEL: sqrshrun1s: ; CHECK: sqrshrun {{s[0-9]+}}, d0, #1 - %tmp = call i32 @llvm.arm64.neon.sqrshrun.i32(i64 %A, i32 1) + %tmp = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %A, i32 1) ret i32 %tmp } @@ -948,7 +948,7 @@ define <8 x i8> @sqrshrun8b(<8 x i16>* %A) nounwind { ;CHECK-LABEL: sqrshrun8b: ;CHECK: sqrshrun.8b v0, {{v[0-9]+}}, #1 %tmp1 = load <8 x i16>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1) ret <8 x i8> %tmp3 } @@ -956,7 +956,7 @@ define <4 x i16> @sqrshrun4h(<4 x i32>* %A) nounwind { ;CHECK-LABEL: sqrshrun4h: ;CHECK: sqrshrun.4h v0, {{v[0-9]+}}, #1 %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1) ret <4 x i16> %tmp3 } @@ -964,7 +964,7 @@ define <2 x i32> @sqrshrun2s(<2 x i64>* %A) nounwind { ;CHECK-LABEL: sqrshrun2s: ;CHECK: sqrshrun.2s v0, {{v[0-9]+}}, #1 %tmp1 = load <2 x i64>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1) ret <2 x i32> %tmp3 } @@ -973,7 +973,7 @@ define <16 x i8> @sqrshrun16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind { ;CHECK: sqrshrun2.16b v0, {{v[0-9]+}}, #1 %out = load <8 x i8>* %ret %tmp1 = load <8 x i16>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1) %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> ret <16 x i8> %tmp4 } @@ -983,7 +983,7 @@ define <8 x i16> @sqrshrun8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind { ;CHECK: sqrshrun2.8h v0, {{v[0-9]+}}, #1 %out = load <4 x i16>* %ret %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1) %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> ret <8 x i16> %tmp4 } @@ -993,20 +993,20 @@ define <4 x i32> @sqrshrun4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind { ;CHECK: sqrshrun2.4s v0, {{v[0-9]+}}, #1 %out = load <2 x i32>* %ret %tmp1 = load <2 x i64>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1) %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> ret <4 x i32> %tmp4 } -declare i32 @llvm.arm64.neon.sqrshrun.i32(i64, i32) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.sqrshrun.v8i8(<8 x i16>, i32) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.sqrshrun.v4i16(<4 x i32>, i32) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.sqrshrun.v2i32(<2 x i64>, i32) nounwind readnone +declare i32 @llvm.aarch64.neon.sqrshrun.i32(i64, i32) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32) nounwind readnone define i32 @uqrshrn1s(i64 %A) nounwind { ; CHECK-LABEL: uqrshrn1s: ; CHECK: uqrshrn {{s[0-9]+}}, d0, #1 - %tmp = call i32 @llvm.arm64.neon.uqrshrn.i32(i64 %A, i32 1) + %tmp = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %A, i32 1) ret i32 %tmp } @@ -1014,7 +1014,7 @@ define <8 x i8> @uqrshrn8b(<8 x i16>* %A) nounwind { ;CHECK-LABEL: uqrshrn8b: ;CHECK: uqrshrn.8b v0, {{v[0-9]+}}, #1 %tmp1 = load <8 x i16>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1) ret <8 x i8> %tmp3 } @@ -1022,7 +1022,7 @@ define <4 x i16> @uqrshrn4h(<4 x i32>* %A) nounwind { ;CHECK-LABEL: uqrshrn4h: ;CHECK: uqrshrn.4h v0, {{v[0-9]+}}, #1 %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1) ret <4 x i16> %tmp3 } @@ -1030,7 +1030,7 @@ define <2 x i32> @uqrshrn2s(<2 x i64>* %A) nounwind { ;CHECK-LABEL: uqrshrn2s: ;CHECK: uqrshrn.2s v0, {{v[0-9]+}}, #1 %tmp1 = load <2 x i64>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1) ret <2 x i32> %tmp3 } @@ -1039,7 +1039,7 @@ define <16 x i8> @uqrshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind { ;CHECK: uqrshrn2.16b v0, {{v[0-9]+}}, #1 %out = load <8 x i8>* %ret %tmp1 = load <8 x i16>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1) %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> ret <16 x i8> %tmp4 } @@ -1049,7 +1049,7 @@ define <8 x i16> @uqrshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind { ;CHECK: uqrshrn2.8h v0, {{v[0-9]+}}, #1 %out = load <4 x i16>* %ret %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1) %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> ret <8 x i16> %tmp4 } @@ -1059,20 +1059,20 @@ define <4 x i32> @uqrshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind { ;CHECK: uqrshrn2.4s v0, {{v[0-9]+}}, #1 %out = load <2 x i32>* %ret %tmp1 = load <2 x i64>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1) %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> ret <4 x i32> %tmp4 } -declare i32 @llvm.arm64.neon.uqrshrn.i32(i64, i32) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.uqrshrn.v8i8(<8 x i16>, i32) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.uqrshrn.v4i16(<4 x i32>, i32) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.uqrshrn.v2i32(<2 x i64>, i32) nounwind readnone +declare i32 @llvm.aarch64.neon.uqrshrn.i32(i64, i32) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32) nounwind readnone define i32 @uqshrn1s(i64 %A) nounwind { ; CHECK-LABEL: uqshrn1s: ; CHECK: uqshrn {{s[0-9]+}}, d0, #1 - %tmp = call i32 @llvm.arm64.neon.uqshrn.i32(i64 %A, i32 1) + %tmp = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %A, i32 1) ret i32 %tmp } @@ -1080,7 +1080,7 @@ define <8 x i8> @uqshrn8b(<8 x i16>* %A) nounwind { ;CHECK-LABEL: uqshrn8b: ;CHECK: uqshrn.8b v0, {{v[0-9]+}}, #1 %tmp1 = load <8 x i16>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1) ret <8 x i8> %tmp3 } @@ -1088,7 +1088,7 @@ define <4 x i16> @uqshrn4h(<4 x i32>* %A) nounwind { ;CHECK-LABEL: uqshrn4h: ;CHECK: uqshrn.4h v0, {{v[0-9]+}}, #1 %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1) ret <4 x i16> %tmp3 } @@ -1096,7 +1096,7 @@ define <2 x i32> @uqshrn2s(<2 x i64>* %A) nounwind { ;CHECK-LABEL: uqshrn2s: ;CHECK: uqshrn.2s v0, {{v[0-9]+}}, #1 %tmp1 = load <2 x i64>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1) ret <2 x i32> %tmp3 } @@ -1105,7 +1105,7 @@ define <16 x i8> @uqshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind { ;CHECK: uqshrn2.16b v0, {{v[0-9]+}}, #1 %out = load <8 x i8>* %ret %tmp1 = load <8 x i16>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1) %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> ret <16 x i8> %tmp4 } @@ -1115,7 +1115,7 @@ define <8 x i16> @uqshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind { ;CHECK: uqshrn2.8h v0, {{v[0-9]+}}, #1 %out = load <4 x i16>* %ret %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1) %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> ret <8 x i16> %tmp4 } @@ -1125,15 +1125,15 @@ define <4 x i32> @uqshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind { ;CHECK: uqshrn2.4s v0, {{v[0-9]+}}, #1 %out = load <2 x i32>* %ret %tmp1 = load <2 x i64>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1) %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> ret <4 x i32> %tmp4 } -declare i32 @llvm.arm64.neon.uqshrn.i32(i64, i32) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.uqshrn.v8i8(<8 x i16>, i32) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.uqshrn.v4i16(<4 x i32>, i32) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.uqshrn.v2i32(<2 x i64>, i32) nounwind readnone +declare i32 @llvm.aarch64.neon.uqshrn.i32(i64, i32) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32) nounwind readnone define <8 x i16> @ushll8h(<8 x i8>* %A) nounwind { ;CHECK-LABEL: ushll8h: @@ -1253,7 +1253,7 @@ define <8 x i8> @sqshli8b(<8 x i8>* %A) nounwind { ;CHECK-LABEL: sqshli8b: ;CHECK: sqshl.8b v0, {{v[0-9]+}}, #1 %tmp1 = load <8 x i8>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) ret <8 x i8> %tmp3 } @@ -1261,7 +1261,7 @@ define <4 x i16> @sqshli4h(<4 x i16>* %A) nounwind { ;CHECK-LABEL: sqshli4h: ;CHECK: sqshl.4h v0, {{v[0-9]+}}, #1 %tmp1 = load <4 x i16>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) ret <4 x i16> %tmp3 } @@ -1269,7 +1269,7 @@ define <2 x i32> @sqshli2s(<2 x i32>* %A) nounwind { ;CHECK-LABEL: sqshli2s: ;CHECK: sqshl.2s v0, {{v[0-9]+}}, #1 %tmp1 = load <2 x i32>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) ret <2 x i32> %tmp3 } @@ -1277,7 +1277,7 @@ define <16 x i8> @sqshli16b(<16 x i8>* %A) nounwind { ;CHECK-LABEL: sqshli16b: ;CHECK: sqshl.16b v0, {{v[0-9]+}}, #1 %tmp1 = load <16 x i8>* %A - %tmp3 = call <16 x i8> @llvm.arm64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp3 } @@ -1285,7 +1285,7 @@ define <8 x i16> @sqshli8h(<8 x i16>* %A) nounwind { ;CHECK-LABEL: sqshli8h: ;CHECK: sqshl.8h v0, {{v[0-9]+}}, #1 %tmp1 = load <8 x i16>* %A - %tmp3 = call <8 x i16> @llvm.arm64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) ret <8 x i16> %tmp3 } @@ -1293,7 +1293,7 @@ define <4 x i32> @sqshli4s(<4 x i32>* %A) nounwind { ;CHECK-LABEL: sqshli4s: ;CHECK: sqshl.4s v0, {{v[0-9]+}}, #1 %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i32> @llvm.arm64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) ret <4 x i32> %tmp3 } @@ -1301,7 +1301,7 @@ define <2 x i64> @sqshli2d(<2 x i64>* %A) nounwind { ;CHECK-LABEL: sqshli2d: ;CHECK: sqshl.2d v0, {{v[0-9]+}}, #1 %tmp1 = load <2 x i64>* %A - %tmp3 = call <2 x i64> @llvm.arm64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) ret <2 x i64> %tmp3 } @@ -1309,7 +1309,7 @@ define <8 x i8> @uqshli8b(<8 x i8>* %A) nounwind { ;CHECK-LABEL: uqshli8b: ;CHECK: uqshl.8b v0, {{v[0-9]+}}, #1 %tmp1 = load <8 x i8>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) ret <8 x i8> %tmp3 } @@ -1317,7 +1317,7 @@ define <4 x i16> @uqshli4h(<4 x i16>* %A) nounwind { ;CHECK-LABEL: uqshli4h: ;CHECK: uqshl.4h v0, {{v[0-9]+}}, #1 %tmp1 = load <4 x i16>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) ret <4 x i16> %tmp3 } @@ -1325,7 +1325,7 @@ define <2 x i32> @uqshli2s(<2 x i32>* %A) nounwind { ;CHECK-LABEL: uqshli2s: ;CHECK: uqshl.2s v0, {{v[0-9]+}}, #1 %tmp1 = load <2 x i32>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) ret <2 x i32> %tmp3 } @@ -1333,7 +1333,7 @@ define <16 x i8> @uqshli16b(<16 x i8>* %A) nounwind { ;CHECK-LABEL: uqshli16b: ;CHECK: uqshl.16b %tmp1 = load <16 x i8>* %A - %tmp3 = call <16 x i8> @llvm.arm64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp3 } @@ -1341,7 +1341,7 @@ define <8 x i16> @uqshli8h(<8 x i16>* %A) nounwind { ;CHECK-LABEL: uqshli8h: ;CHECK: uqshl.8h v0, {{v[0-9]+}}, #1 %tmp1 = load <8 x i16>* %A - %tmp3 = call <8 x i16> @llvm.arm64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) ret <8 x i16> %tmp3 } @@ -1349,7 +1349,7 @@ define <4 x i32> @uqshli4s(<4 x i32>* %A) nounwind { ;CHECK-LABEL: uqshli4s: ;CHECK: uqshl.4s v0, {{v[0-9]+}}, #1 %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i32> @llvm.arm64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) ret <4 x i32> %tmp3 } @@ -1357,7 +1357,7 @@ define <2 x i64> @uqshli2d(<2 x i64>* %A) nounwind { ;CHECK-LABEL: uqshli2d: ;CHECK: uqshl.2d v0, {{v[0-9]+}}, #1 %tmp1 = load <2 x i64>* %A - %tmp3 = call <2 x i64> @llvm.arm64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) ret <2 x i64> %tmp3 } @@ -1365,7 +1365,7 @@ define <8 x i8> @ursra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: ursra8b: ;CHECK: ursra.8b v0, {{v[0-9]+}}, #1 %tmp1 = load <8 x i8>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) %tmp4 = load <8 x i8>* %B %tmp5 = add <8 x i8> %tmp3, %tmp4 ret <8 x i8> %tmp5 @@ -1375,7 +1375,7 @@ define <4 x i16> @ursra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK-LABEL: ursra4h: ;CHECK: ursra.4h v0, {{v[0-9]+}}, #1 %tmp1 = load <4 x i16>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) %tmp4 = load <4 x i16>* %B %tmp5 = add <4 x i16> %tmp3, %tmp4 ret <4 x i16> %tmp5 @@ -1385,7 +1385,7 @@ define <2 x i32> @ursra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK-LABEL: ursra2s: ;CHECK: ursra.2s v0, {{v[0-9]+}}, #1 %tmp1 = load <2 x i32>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) %tmp4 = load <2 x i32>* %B %tmp5 = add <2 x i32> %tmp3, %tmp4 ret <2 x i32> %tmp5 @@ -1395,7 +1395,7 @@ define <16 x i8> @ursra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK-LABEL: ursra16b: ;CHECK: ursra.16b v0, {{v[0-9]+}}, #1 %tmp1 = load <16 x i8>* %A - %tmp3 = call <16 x i8> @llvm.arm64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) %tmp4 = load <16 x i8>* %B %tmp5 = add <16 x i8> %tmp3, %tmp4 ret <16 x i8> %tmp5 @@ -1405,7 +1405,7 @@ define <8 x i16> @ursra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK-LABEL: ursra8h: ;CHECK: ursra.8h v0, {{v[0-9]+}}, #1 %tmp1 = load <8 x i16>* %A - %tmp3 = call <8 x i16> @llvm.arm64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) %tmp4 = load <8 x i16>* %B %tmp5 = add <8 x i16> %tmp3, %tmp4 ret <8 x i16> %tmp5 @@ -1415,7 +1415,7 @@ define <4 x i32> @ursra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK-LABEL: ursra4s: ;CHECK: ursra.4s v0, {{v[0-9]+}}, #1 %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i32> @llvm.arm64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) %tmp4 = load <4 x i32>* %B %tmp5 = add <4 x i32> %tmp3, %tmp4 ret <4 x i32> %tmp5 @@ -1425,7 +1425,7 @@ define <2 x i64> @ursra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK-LABEL: ursra2d: ;CHECK: ursra.2d v0, {{v[0-9]+}}, #1 %tmp1 = load <2 x i64>* %A - %tmp3 = call <2 x i64> @llvm.arm64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) %tmp4 = load <2 x i64>* %B %tmp5 = add <2 x i64> %tmp3, %tmp4 ret <2 x i64> %tmp5 @@ -1435,7 +1435,7 @@ define <8 x i8> @srsra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: srsra8b: ;CHECK: srsra.8b v0, {{v[0-9]+}}, #1 %tmp1 = load <8 x i8>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) %tmp4 = load <8 x i8>* %B %tmp5 = add <8 x i8> %tmp3, %tmp4 ret <8 x i8> %tmp5 @@ -1445,7 +1445,7 @@ define <4 x i16> @srsra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK-LABEL: srsra4h: ;CHECK: srsra.4h v0, {{v[0-9]+}}, #1 %tmp1 = load <4 x i16>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) %tmp4 = load <4 x i16>* %B %tmp5 = add <4 x i16> %tmp3, %tmp4 ret <4 x i16> %tmp5 @@ -1455,7 +1455,7 @@ define <2 x i32> @srsra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK-LABEL: srsra2s: ;CHECK: srsra.2s v0, {{v[0-9]+}}, #1 %tmp1 = load <2 x i32>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) %tmp4 = load <2 x i32>* %B %tmp5 = add <2 x i32> %tmp3, %tmp4 ret <2 x i32> %tmp5 @@ -1465,7 +1465,7 @@ define <16 x i8> @srsra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK-LABEL: srsra16b: ;CHECK: srsra.16b v0, {{v[0-9]+}}, #1 %tmp1 = load <16 x i8>* %A - %tmp3 = call <16 x i8> @llvm.arm64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) %tmp4 = load <16 x i8>* %B %tmp5 = add <16 x i8> %tmp3, %tmp4 ret <16 x i8> %tmp5 @@ -1475,7 +1475,7 @@ define <8 x i16> @srsra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK-LABEL: srsra8h: ;CHECK: srsra.8h v0, {{v[0-9]+}}, #1 %tmp1 = load <8 x i16>* %A - %tmp3 = call <8 x i16> @llvm.arm64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) %tmp4 = load <8 x i16>* %B %tmp5 = add <8 x i16> %tmp3, %tmp4 ret <8 x i16> %tmp5 @@ -1485,7 +1485,7 @@ define <4 x i32> @srsra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK-LABEL: srsra4s: ;CHECK: srsra.4s v0, {{v[0-9]+}}, #1 %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i32> @llvm.arm64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) %tmp4 = load <4 x i32>* %B %tmp5 = add <4 x i32> %tmp3, %tmp4 ret <4 x i32> %tmp5 @@ -1495,7 +1495,7 @@ define <2 x i64> @srsra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK-LABEL: srsra2d: ;CHECK: srsra.2d v0, {{v[0-9]+}}, #1 %tmp1 = load <2 x i64>* %A - %tmp3 = call <2 x i64> @llvm.arm64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) %tmp4 = load <2 x i64>* %B %tmp5 = add <2 x i64> %tmp3, %tmp4 ret <2 x i64> %tmp5 @@ -1831,7 +1831,7 @@ define <8 x i8> @sli8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: sli.8b v0, {{v[0-9]+}}, #1 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.vsli.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1) ret <8 x i8> %tmp3 } @@ -1840,7 +1840,7 @@ define <4 x i16> @sli4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: sli.4h v0, {{v[0-9]+}}, #1 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.vsli.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1) ret <4 x i16> %tmp3 } @@ -1849,7 +1849,7 @@ define <2 x i32> @sli2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: sli.2s v0, {{v[0-9]+}}, #1 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.vsli.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1) ret <2 x i32> %tmp3 } @@ -1858,7 +1858,7 @@ define <1 x i64> @sli1d(<1 x i64>* %A, <1 x i64>* %B) nounwind { ;CHECK: sli d0, {{d[0-9]+}}, #1 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B - %tmp3 = call <1 x i64> @llvm.arm64.neon.vsli.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1) + %tmp3 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1) ret <1 x i64> %tmp3 } @@ -1867,7 +1867,7 @@ define <16 x i8> @sli16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: sli.16b v0, {{v[0-9]+}}, #1 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.vsli.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1) + %tmp3 = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1) ret <16 x i8> %tmp3 } @@ -1876,7 +1876,7 @@ define <8 x i16> @sli8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: sli.8h v0, {{v[0-9]+}}, #1 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.vsli.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1) + %tmp3 = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1) ret <8 x i16> %tmp3 } @@ -1885,7 +1885,7 @@ define <4 x i32> @sli4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: sli.4s v0, {{v[0-9]+}}, #1 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.vsli.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1) ret <4 x i32> %tmp3 } @@ -1894,19 +1894,19 @@ define <2 x i64> @sli2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: sli.2d v0, {{v[0-9]+}}, #1 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B - %tmp3 = call <2 x i64> @llvm.arm64.neon.vsli.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1) + %tmp3 = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1) ret <2 x i64> %tmp3 } -declare <8 x i8> @llvm.arm64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32) nounwind readnone -declare <1 x i64> @llvm.arm64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone define <1 x i64> @ashr_v1i64(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: ashr_v1i64: diff --git a/test/CodeGen/ARM64/vshr.ll b/test/CodeGen/AArch64/arm64-vshr.ll similarity index 95% rename from test/CodeGen/ARM64/vshr.ll rename to test/CodeGen/AArch64/arm64-vshr.ll index 1da8f60acb68..21eb579f2522 100644 --- a/test/CodeGen/ARM64/vshr.ll +++ b/test/CodeGen/AArch64/arm64-vshr.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s -mcpu=cyclone | FileCheck %s +; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s -mcpu=cyclone | FileCheck %s define <8 x i16> @testShiftRightArith_v8i16(<8 x i16> %a, <8 x i16> %b) #0 { ; CHECK-LABEL: testShiftRightArith_v8i16: diff --git a/test/CodeGen/ARM64/vshuffle.ll b/test/CodeGen/AArch64/arm64-vshuffle.ll similarity index 100% rename from test/CodeGen/ARM64/vshuffle.ll rename to test/CodeGen/AArch64/arm64-vshuffle.ll diff --git a/test/CodeGen/ARM64/vsqrt.ll b/test/CodeGen/AArch64/arm64-vsqrt.ll similarity index 51% rename from test/CodeGen/ARM64/vsqrt.ll rename to test/CodeGen/AArch64/arm64-vsqrt.ll index 094d7042a4de..02b7c7ec5d80 100644 --- a/test/CodeGen/ARM64/vsqrt.ll +++ b/test/CodeGen/AArch64/arm64-vsqrt.ll @@ -1,11 +1,11 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define <2 x float> @frecps_2s(<2 x float>* %A, <2 x float>* %B) nounwind { ;CHECK-LABEL: frecps_2s: ;CHECK: frecps.2s %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = call <2 x float> @llvm.arm64.neon.frecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) + %tmp3 = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) ret <2 x float> %tmp3 } @@ -14,7 +14,7 @@ define <4 x float> @frecps_4s(<4 x float>* %A, <4 x float>* %B) nounwind { ;CHECK: frecps.4s %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B - %tmp3 = call <4 x float> @llvm.arm64.neon.frecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) + %tmp3 = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) ret <4 x float> %tmp3 } @@ -23,13 +23,13 @@ define <2 x double> @frecps_2d(<2 x double>* %A, <2 x double>* %B) nounwind { ;CHECK: frecps.2d %tmp1 = load <2 x double>* %A %tmp2 = load <2 x double>* %B - %tmp3 = call <2 x double> @llvm.arm64.neon.frecps.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) + %tmp3 = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) ret <2 x double> %tmp3 } -declare <2 x float> @llvm.arm64.neon.frecps.v2f32(<2 x float>, <2 x float>) nounwind readnone -declare <4 x float> @llvm.arm64.neon.frecps.v4f32(<4 x float>, <4 x float>) nounwind readnone -declare <2 x double> @llvm.arm64.neon.frecps.v2f64(<2 x double>, <2 x double>) nounwind readnone +declare <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float>, <4 x float>) nounwind readnone +declare <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double>, <2 x double>) nounwind readnone define <2 x float> @frsqrts_2s(<2 x float>* %A, <2 x float>* %B) nounwind { @@ -37,7 +37,7 @@ define <2 x float> @frsqrts_2s(<2 x float>* %A, <2 x float>* %B) nounwind { ;CHECK: frsqrts.2s %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = call <2 x float> @llvm.arm64.neon.frsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) + %tmp3 = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) ret <2 x float> %tmp3 } @@ -46,7 +46,7 @@ define <4 x float> @frsqrts_4s(<4 x float>* %A, <4 x float>* %B) nounwind { ;CHECK: frsqrts.4s %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B - %tmp3 = call <4 x float> @llvm.arm64.neon.frsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) + %tmp3 = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) ret <4 x float> %tmp3 } @@ -55,19 +55,19 @@ define <2 x double> @frsqrts_2d(<2 x double>* %A, <2 x double>* %B) nounwind { ;CHECK: frsqrts.2d %tmp1 = load <2 x double>* %A %tmp2 = load <2 x double>* %B - %tmp3 = call <2 x double> @llvm.arm64.neon.frsqrts.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) + %tmp3 = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) ret <2 x double> %tmp3 } -declare <2 x float> @llvm.arm64.neon.frsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone -declare <4 x float> @llvm.arm64.neon.frsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone -declare <2 x double> @llvm.arm64.neon.frsqrts.v2f64(<2 x double>, <2 x double>) nounwind readnone +declare <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone +declare <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double>, <2 x double>) nounwind readnone define <2 x float> @frecpe_2s(<2 x float>* %A) nounwind { ;CHECK-LABEL: frecpe_2s: ;CHECK: frecpe.2s %tmp1 = load <2 x float>* %A - %tmp3 = call <2 x float> @llvm.arm64.neon.frecpe.v2f32(<2 x float> %tmp1) + %tmp3 = call <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float> %tmp1) ret <2 x float> %tmp3 } @@ -75,7 +75,7 @@ define <4 x float> @frecpe_4s(<4 x float>* %A) nounwind { ;CHECK-LABEL: frecpe_4s: ;CHECK: frecpe.4s %tmp1 = load <4 x float>* %A - %tmp3 = call <4 x float> @llvm.arm64.neon.frecpe.v4f32(<4 x float> %tmp1) + %tmp3 = call <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float> %tmp1) ret <4 x float> %tmp3 } @@ -83,7 +83,7 @@ define <2 x double> @frecpe_2d(<2 x double>* %A) nounwind { ;CHECK-LABEL: frecpe_2d: ;CHECK: frecpe.2d %tmp1 = load <2 x double>* %A - %tmp3 = call <2 x double> @llvm.arm64.neon.frecpe.v2f64(<2 x double> %tmp1) + %tmp3 = call <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double> %tmp1) ret <2 x double> %tmp3 } @@ -91,7 +91,7 @@ define float @frecpe_s(float* %A) nounwind { ;CHECK-LABEL: frecpe_s: ;CHECK: frecpe s0, {{s[0-9]+}} %tmp1 = load float* %A - %tmp3 = call float @llvm.arm64.neon.frecpe.f32(float %tmp1) + %tmp3 = call float @llvm.aarch64.neon.frecpe.f32(float %tmp1) ret float %tmp3 } @@ -99,21 +99,21 @@ define double @frecpe_d(double* %A) nounwind { ;CHECK-LABEL: frecpe_d: ;CHECK: frecpe d0, {{d[0-9]+}} %tmp1 = load double* %A - %tmp3 = call double @llvm.arm64.neon.frecpe.f64(double %tmp1) + %tmp3 = call double @llvm.aarch64.neon.frecpe.f64(double %tmp1) ret double %tmp3 } -declare <2 x float> @llvm.arm64.neon.frecpe.v2f32(<2 x float>) nounwind readnone -declare <4 x float> @llvm.arm64.neon.frecpe.v4f32(<4 x float>) nounwind readnone -declare <2 x double> @llvm.arm64.neon.frecpe.v2f64(<2 x double>) nounwind readnone -declare float @llvm.arm64.neon.frecpe.f32(float) nounwind readnone -declare double @llvm.arm64.neon.frecpe.f64(double) nounwind readnone +declare <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float>) nounwind readnone +declare <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float>) nounwind readnone +declare <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double>) nounwind readnone +declare float @llvm.aarch64.neon.frecpe.f32(float) nounwind readnone +declare double @llvm.aarch64.neon.frecpe.f64(double) nounwind readnone define float @frecpx_s(float* %A) nounwind { ;CHECK-LABEL: frecpx_s: ;CHECK: frecpx s0, {{s[0-9]+}} %tmp1 = load float* %A - %tmp3 = call float @llvm.arm64.neon.frecpx.f32(float %tmp1) + %tmp3 = call float @llvm.aarch64.neon.frecpx.f32(float %tmp1) ret float %tmp3 } @@ -121,18 +121,18 @@ define double @frecpx_d(double* %A) nounwind { ;CHECK-LABEL: frecpx_d: ;CHECK: frecpx d0, {{d[0-9]+}} %tmp1 = load double* %A - %tmp3 = call double @llvm.arm64.neon.frecpx.f64(double %tmp1) + %tmp3 = call double @llvm.aarch64.neon.frecpx.f64(double %tmp1) ret double %tmp3 } -declare float @llvm.arm64.neon.frecpx.f32(float) nounwind readnone -declare double @llvm.arm64.neon.frecpx.f64(double) nounwind readnone +declare float @llvm.aarch64.neon.frecpx.f32(float) nounwind readnone +declare double @llvm.aarch64.neon.frecpx.f64(double) nounwind readnone define <2 x float> @frsqrte_2s(<2 x float>* %A) nounwind { ;CHECK-LABEL: frsqrte_2s: ;CHECK: frsqrte.2s %tmp1 = load <2 x float>* %A - %tmp3 = call <2 x float> @llvm.arm64.neon.frsqrte.v2f32(<2 x float> %tmp1) + %tmp3 = call <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float> %tmp1) ret <2 x float> %tmp3 } @@ -140,7 +140,7 @@ define <4 x float> @frsqrte_4s(<4 x float>* %A) nounwind { ;CHECK-LABEL: frsqrte_4s: ;CHECK: frsqrte.4s %tmp1 = load <4 x float>* %A - %tmp3 = call <4 x float> @llvm.arm64.neon.frsqrte.v4f32(<4 x float> %tmp1) + %tmp3 = call <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float> %tmp1) ret <4 x float> %tmp3 } @@ -148,7 +148,7 @@ define <2 x double> @frsqrte_2d(<2 x double>* %A) nounwind { ;CHECK-LABEL: frsqrte_2d: ;CHECK: frsqrte.2d %tmp1 = load <2 x double>* %A - %tmp3 = call <2 x double> @llvm.arm64.neon.frsqrte.v2f64(<2 x double> %tmp1) + %tmp3 = call <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double> %tmp1) ret <2 x double> %tmp3 } @@ -156,7 +156,7 @@ define float @frsqrte_s(float* %A) nounwind { ;CHECK-LABEL: frsqrte_s: ;CHECK: frsqrte s0, {{s[0-9]+}} %tmp1 = load float* %A - %tmp3 = call float @llvm.arm64.neon.frsqrte.f32(float %tmp1) + %tmp3 = call float @llvm.aarch64.neon.frsqrte.f32(float %tmp1) ret float %tmp3 } @@ -164,21 +164,21 @@ define double @frsqrte_d(double* %A) nounwind { ;CHECK-LABEL: frsqrte_d: ;CHECK: frsqrte d0, {{d[0-9]+}} %tmp1 = load double* %A - %tmp3 = call double @llvm.arm64.neon.frsqrte.f64(double %tmp1) + %tmp3 = call double @llvm.aarch64.neon.frsqrte.f64(double %tmp1) ret double %tmp3 } -declare <2 x float> @llvm.arm64.neon.frsqrte.v2f32(<2 x float>) nounwind readnone -declare <4 x float> @llvm.arm64.neon.frsqrte.v4f32(<4 x float>) nounwind readnone -declare <2 x double> @llvm.arm64.neon.frsqrte.v2f64(<2 x double>) nounwind readnone -declare float @llvm.arm64.neon.frsqrte.f32(float) nounwind readnone -declare double @llvm.arm64.neon.frsqrte.f64(double) nounwind readnone +declare <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float>) nounwind readnone +declare <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float>) nounwind readnone +declare <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double>) nounwind readnone +declare float @llvm.aarch64.neon.frsqrte.f32(float) nounwind readnone +declare double @llvm.aarch64.neon.frsqrte.f64(double) nounwind readnone define <2 x i32> @urecpe_2s(<2 x i32>* %A) nounwind { ;CHECK-LABEL: urecpe_2s: ;CHECK: urecpe.2s %tmp1 = load <2 x i32>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.urecpe.v2i32(<2 x i32> %tmp1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32> %tmp1) ret <2 x i32> %tmp3 } @@ -186,18 +186,18 @@ define <4 x i32> @urecpe_4s(<4 x i32>* %A) nounwind { ;CHECK-LABEL: urecpe_4s: ;CHECK: urecpe.4s %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i32> @llvm.arm64.neon.urecpe.v4i32(<4 x i32> %tmp1) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32> %tmp1) ret <4 x i32> %tmp3 } -declare <2 x i32> @llvm.arm64.neon.urecpe.v2i32(<2 x i32>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.urecpe.v4i32(<4 x i32>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32>) nounwind readnone define <2 x i32> @ursqrte_2s(<2 x i32>* %A) nounwind { ;CHECK-LABEL: ursqrte_2s: ;CHECK: ursqrte.2s %tmp1 = load <2 x i32>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.ursqrte.v2i32(<2 x i32> %tmp1) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> %tmp1) ret <2 x i32> %tmp3 } @@ -205,18 +205,18 @@ define <4 x i32> @ursqrte_4s(<4 x i32>* %A) nounwind { ;CHECK-LABEL: ursqrte_4s: ;CHECK: ursqrte.4s %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i32> @llvm.arm64.neon.ursqrte.v4i32(<4 x i32> %tmp1) + %tmp3 = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> %tmp1) ret <4 x i32> %tmp3 } -declare <2 x i32> @llvm.arm64.neon.ursqrte.v2i32(<2 x i32>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.ursqrte.v4i32(<4 x i32>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32>) nounwind readnone define float @f1(float %a, float %b) nounwind readnone optsize ssp { ; CHECK-LABEL: f1: ; CHECK: frsqrts s0, s0, s1 ; CHECK-NEXT: ret - %vrsqrtss.i = tail call float @llvm.arm64.neon.frsqrts.f32(float %a, float %b) nounwind + %vrsqrtss.i = tail call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b) nounwind ret float %vrsqrtss.i } @@ -224,9 +224,9 @@ define double @f2(double %a, double %b) nounwind readnone optsize ssp { ; CHECK-LABEL: f2: ; CHECK: frsqrts d0, d0, d1 ; CHECK-NEXT: ret - %vrsqrtsd.i = tail call double @llvm.arm64.neon.frsqrts.f64(double %a, double %b) nounwind + %vrsqrtsd.i = tail call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b) nounwind ret double %vrsqrtsd.i } -declare double @llvm.arm64.neon.frsqrts.f64(double, double) nounwind readnone -declare float @llvm.arm64.neon.frsqrts.f32(float, float) nounwind readnone +declare double @llvm.aarch64.neon.frsqrts.f64(double, double) nounwind readnone +declare float @llvm.aarch64.neon.frsqrts.f32(float, float) nounwind readnone diff --git a/test/CodeGen/ARM64/vsra.ll b/test/CodeGen/AArch64/arm64-vsra.ll similarity index 98% rename from test/CodeGen/ARM64/vsra.ll rename to test/CodeGen/AArch64/arm64-vsra.ll index 3611eb3cba6b..5e9cef3e7e28 100644 --- a/test/CodeGen/ARM64/vsra.ll +++ b/test/CodeGen/AArch64/arm64-vsra.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vsras8: diff --git a/test/CodeGen/ARM64/vsub.ll b/test/CodeGen/AArch64/arm64-vsub.ll similarity index 85% rename from test/CodeGen/ARM64/vsub.ll rename to test/CodeGen/AArch64/arm64-vsub.ll index 5c7e84f46efc..c2c8755c0669 100644 --- a/test/CodeGen/ARM64/vsub.ll +++ b/test/CodeGen/AArch64/arm64-vsub.ll @@ -1,11 +1,11 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define <8 x i8> @subhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK-LABEL: subhn8b: ;CHECK: subhn.8b %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.subhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.subhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i8> %tmp3 } @@ -14,7 +14,7 @@ define <4 x i16> @subhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: subhn.4h %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.subhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.subhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i16> %tmp3 } @@ -23,7 +23,7 @@ define <2 x i32> @subhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: subhn.2s %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.subhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.subhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i32> %tmp3 } @@ -31,8 +31,8 @@ define <16 x i8> @subhn2_16b(<8 x i16> %a, <8 x i16> %b) nounwind { ;CHECK-LABEL: subhn2_16b: ;CHECK: subhn.8b ;CHECK-NEXT: subhn2.16b - %vsubhn2.i = tail call <8 x i8> @llvm.arm64.neon.subhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind - %vsubhn_high2.i = tail call <8 x i8> @llvm.arm64.neon.subhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind + %vsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.subhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind + %vsubhn_high2.i = tail call <8 x i8> @llvm.aarch64.neon.subhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind %res = shufflevector <8 x i8> %vsubhn2.i, <8 x i8> %vsubhn_high2.i, <16 x i32> ret <16 x i8> %res } @@ -41,8 +41,8 @@ define <8 x i16> @subhn2_8h(<4 x i32> %a, <4 x i32> %b) nounwind { ;CHECK-LABEL: subhn2_8h: ;CHECK: subhn.4h ;CHECK-NEXT: subhn2.8h - %vsubhn2.i = tail call <4 x i16> @llvm.arm64.neon.subhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind - %vsubhn_high3.i = tail call <4 x i16> @llvm.arm64.neon.subhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind + %vsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.subhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind + %vsubhn_high3.i = tail call <4 x i16> @llvm.aarch64.neon.subhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind %res = shufflevector <4 x i16> %vsubhn2.i, <4 x i16> %vsubhn_high3.i, <8 x i32> ret <8 x i16> %res } @@ -51,22 +51,22 @@ define <4 x i32> @subhn2_4s(<2 x i64> %a, <2 x i64> %b) nounwind { ;CHECK-LABEL: subhn2_4s: ;CHECK: subhn.2s ;CHECK-NEXT: subhn2.4s - %vsubhn2.i = tail call <2 x i32> @llvm.arm64.neon.subhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind - %vsubhn_high3.i = tail call <2 x i32> @llvm.arm64.neon.subhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind + %vsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.subhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind + %vsubhn_high3.i = tail call <2 x i32> @llvm.aarch64.neon.subhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind %res = shufflevector <2 x i32> %vsubhn2.i, <2 x i32> %vsubhn_high3.i, <4 x i32> ret <4 x i32> %res } -declare <2 x i32> @llvm.arm64.neon.subhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.subhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.subhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.subhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.subhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.subhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone define <8 x i8> @rsubhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK-LABEL: rsubhn8b: ;CHECK: rsubhn.8b %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i8> %tmp3 } @@ -75,7 +75,7 @@ define <4 x i16> @rsubhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: rsubhn.4h %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i16> %tmp3 } @@ -84,7 +84,7 @@ define <2 x i32> @rsubhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: rsubhn.2s %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2) + %tmp3 = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i32> %tmp3 } @@ -92,8 +92,8 @@ define <16 x i8> @rsubhn2_16b(<8 x i16> %a, <8 x i16> %b) nounwind { ;CHECK-LABEL: rsubhn2_16b: ;CHECK: rsubhn.8b ;CHECK-NEXT: rsubhn2.16b - %vrsubhn2.i = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind - %vrsubhn_high2.i = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind + %vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind + %vrsubhn_high2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind %res = shufflevector <8 x i8> %vrsubhn2.i, <8 x i8> %vrsubhn_high2.i, <16 x i32> ret <16 x i8> %res } @@ -102,8 +102,8 @@ define <8 x i16> @rsubhn2_8h(<4 x i32> %a, <4 x i32> %b) nounwind { ;CHECK-LABEL: rsubhn2_8h: ;CHECK: rsubhn.4h ;CHECK-NEXT: rsubhn2.8h - %vrsubhn2.i = tail call <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind - %vrsubhn_high3.i = tail call <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind + %vrsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind + %vrsubhn_high3.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind %res = shufflevector <4 x i16> %vrsubhn2.i, <4 x i16> %vrsubhn_high3.i, <8 x i32> ret <8 x i16> %res } @@ -112,15 +112,15 @@ define <4 x i32> @rsubhn2_4s(<2 x i64> %a, <2 x i64> %b) nounwind { ;CHECK-LABEL: rsubhn2_4s: ;CHECK: rsubhn.2s ;CHECK-NEXT: rsubhn2.4s - %vrsubhn2.i = tail call <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind - %vrsubhn_high3.i = tail call <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind + %vrsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind + %vrsubhn_high3.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind %res = shufflevector <2 x i32> %vrsubhn2.i, <2 x i32> %vrsubhn_high3.i, <4 x i32> ret <4 x i32> %res } -declare <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone define <8 x i16> @ssubl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: ssubl8h: diff --git a/test/CodeGen/ARM64/weak-reference.ll b/test/CodeGen/AArch64/arm64-weak-reference.ll similarity index 100% rename from test/CodeGen/ARM64/weak-reference.ll rename to test/CodeGen/AArch64/arm64-weak-reference.ll diff --git a/test/CodeGen/ARM64/xaluo.ll b/test/CodeGen/AArch64/arm64-xaluo.ll similarity index 100% rename from test/CodeGen/ARM64/xaluo.ll rename to test/CodeGen/AArch64/arm64-xaluo.ll diff --git a/test/CodeGen/ARM64/zero-cycle-regmov.ll b/test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll similarity index 100% rename from test/CodeGen/ARM64/zero-cycle-regmov.ll rename to test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll diff --git a/test/CodeGen/ARM64/zero-cycle-zeroing.ll b/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll similarity index 100% rename from test/CodeGen/ARM64/zero-cycle-zeroing.ll rename to test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll diff --git a/test/CodeGen/ARM64/zext.ll b/test/CodeGen/AArch64/arm64-zext.ll similarity index 100% rename from test/CodeGen/ARM64/zext.ll rename to test/CodeGen/AArch64/arm64-zext.ll diff --git a/test/CodeGen/ARM64/zextload-unscaled.ll b/test/CodeGen/AArch64/arm64-zextload-unscaled.ll similarity index 100% rename from test/CodeGen/ARM64/zextload-unscaled.ll rename to test/CodeGen/AArch64/arm64-zextload-unscaled.ll diff --git a/test/CodeGen/ARM64/zip.ll b/test/CodeGen/AArch64/arm64-zip.ll similarity index 98% rename from test/CodeGen/ARM64/zip.ll rename to test/CodeGen/AArch64/arm64-zip.ll index d06a9f899ddf..304b28099432 100644 --- a/test/CodeGen/ARM64/zip.ll +++ b/test/CodeGen/AArch64/arm64-zip.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vzipi8: diff --git a/test/CodeGen/AArch64/atomic-ops-not-barriers.ll b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll index 162430b9b76b..da095a0a42c5 100644 --- a/test/CodeGen/AArch64/atomic-ops-not-barriers.ll +++ b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s define i32 @foo(i32* %var, i1 %cond) { ; CHECK-LABEL: foo: diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll index 58ea735c8093..58b5d1d078cb 100644 --- a/test/CodeGen/AArch64/atomic-ops.ll +++ b/test/CodeGen/AArch64/atomic-ops.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 -; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-REG +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-REG ; Point of CHECK-REG is to make sure UNPREDICTABLE instructions aren't created @@ -501,9 +501,9 @@ define i8 @test_atomic_load_min_i8(i8 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-ARM64-NEXT: sxtb w[[OLD_EXT:[0-9]+]], w[[OLD]] -; CHECK-ARM64-NEXT: cmp w[[OLD_EXT]], w0, sxtb -; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le +; CHECK-NEXT: sxtb w[[OLD_EXT:[0-9]+]], w[[OLD]] +; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxtb +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 @@ -525,9 +525,9 @@ define i16 @test_atomic_load_min_i16(i16 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-ARM64-NEXT: sxth w[[OLD_EXT:[0-9]+]], w[[OLD]] -; CHECK-ARM64-NEXT: cmp w[[OLD_EXT]], w0, sxth -; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le +; CHECK-NEXT: sxth w[[OLD_EXT:[0-9]+]], w[[OLD]] +; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxth +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] @@ -550,8 +550,8 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-ARM64-NEXT: cmp w[[OLD]], w0 -; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le +; CHECK-NEXT: cmp w[[OLD]], w0 +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] @@ -574,8 +574,8 @@ define i64 @test_atomic_load_min_i64(i64 %offset) nounwind { ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-ARM64-NEXT: cmp x[[OLD]], x0 -; CHECK-ARM64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, le +; CHECK-NEXT: cmp x[[OLD]], x0 +; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, le ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] @@ -598,9 +598,9 @@ define i8 @test_atomic_load_max_i8(i8 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-ARM64-NEXT: sxtb w[[OLD_EXT:[0-9]+]], w[[OLD]] -; CHECK-ARM64-NEXT: cmp w[[OLD_EXT]], w0, sxtb -; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt +; CHECK-NEXT: sxtb w[[OLD_EXT:[0-9]+]], w[[OLD]] +; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxtb +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] @@ -623,9 +623,9 @@ define i16 @test_atomic_load_max_i16(i16 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-ARM64-NEXT: sxth w[[OLD_EXT:[0-9]+]], w[[OLD]] -; CHECK-ARM64-NEXT: cmp w[[OLD_EXT]], w0, sxth -; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt +; CHECK-NEXT: sxth w[[OLD_EXT:[0-9]+]], w[[OLD]] +; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxth +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] @@ -648,8 +648,8 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-ARM64-NEXT: cmp w[[OLD]], w0 -; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt +; CHECK-NEXT: cmp w[[OLD]], w0 +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] @@ -672,8 +672,8 @@ define i64 @test_atomic_load_max_i64(i64 %offset) nounwind { ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-ARM64-NEXT: cmp x[[OLD]], x0 -; CHECK-ARM64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt +; CHECK-NEXT: cmp x[[OLD]], x0 +; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] @@ -696,8 +696,8 @@ define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-ARM64-NEXT: cmp w[[OLD]], w0, uxtb -; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls +; CHECK-NEXT: cmp w[[OLD]], w0, uxtb +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] @@ -720,8 +720,8 @@ define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-ARM64-NEXT: cmp w[[OLD]], w0, uxth -; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls +; CHECK-NEXT: cmp w[[OLD]], w0, uxth +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] @@ -744,8 +744,8 @@ define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-ARM64-NEXT: cmp w[[OLD]], w0 -; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls +; CHECK-NEXT: cmp w[[OLD]], w0 +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] @@ -768,8 +768,8 @@ define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind { ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-ARM64-NEXT: cmp x[[OLD]], x0 -; CHECK-ARM64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, ls +; CHECK-NEXT: cmp x[[OLD]], x0 +; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, ls ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] @@ -792,8 +792,8 @@ define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-ARM64-NEXT: cmp w[[OLD]], w0, uxtb -; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi +; CHECK-NEXT: cmp w[[OLD]], w0, uxtb +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] @@ -816,8 +816,8 @@ define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-ARM64-NEXT: cmp w[[OLD]], w0, uxth -; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi +; CHECK-NEXT: cmp w[[OLD]], w0, uxth +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] @@ -840,8 +840,8 @@ define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-ARM64-NEXT: cmp w[[OLD]], w0 -; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi +; CHECK-NEXT: cmp w[[OLD]], w0 +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] @@ -864,8 +864,8 @@ define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind { ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-ARM64-NEXT: cmp x[[OLD]], x0 -; CHECK-ARM64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi +; CHECK-NEXT: cmp x[[OLD]], x0 +; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] diff --git a/test/CodeGen/AArch64/basic-pic.ll b/test/CodeGen/AArch64/basic-pic.ll index 2c69bee0d1b3..62d41bcead6b 100644 --- a/test/CodeGen/AArch64/basic-pic.ll +++ b/test/CodeGen/AArch64/basic-pic.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -relocation-model=pic %s -o - | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -relocation-model=pic %s -o - | FileCheck %s @var = global i32 0 diff --git a/test/CodeGen/AArch64/bitfield-insert-0.ll b/test/CodeGen/AArch64/bitfield-insert-0.ll index 8959e1b6959e..da0ed8af3126 100644 --- a/test/CodeGen/AArch64/bitfield-insert-0.ll +++ b/test/CodeGen/AArch64/bitfield-insert-0.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-linux-gnu -filetype=obj -o - %s | llvm-objdump -disassemble - | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -filetype=obj -o - %s | llvm-objdump -disassemble - | FileCheck %s ; The encoding of lsb -> immr in the CGed bitfield instructions was wrong at one ; point, in the edge case where lsb = 0. Just make sure. diff --git a/test/CodeGen/AArch64/bitfield-insert.ll b/test/CodeGen/AArch64/bitfield-insert.ll index 8b0b4dafe6c0..2369a55aa92d 100644 --- a/test/CodeGen/AArch64/bitfield-insert.ll +++ b/test/CodeGen/AArch64/bitfield-insert.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-none-linux-gnu < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s --check-prefix=CHECK ; First, a simple example from Clang. The registers could plausibly be ; different, but probably won't be. @@ -64,7 +64,7 @@ define void @test_whole32_from64(i64* %existing, i64* %new) { ; CHECK-LABEL: test_whole32_from64: -; CHECK-ARM64: bfxil {{x[0-9]+}}, {{x[0-9]+}}, #0, #16 +; CHECK: bfxil {{x[0-9]+}}, {{x[0-9]+}}, #0, #16 ; CHECK: ret @@ -83,7 +83,7 @@ define void @test_whole32_from64(i64* %existing, i64* %new) { define void @test_32bit_masked(i32 *%existing, i32 *%new) { ; CHECK-LABEL: test_32bit_masked: -; CHECK-ARM64: and +; CHECK: and ; CHECK: bfi [[INSERT:w[0-9]+]], {{w[0-9]+}}, #3, #4 %oldval = load volatile i32* %existing @@ -101,7 +101,7 @@ define void @test_32bit_masked(i32 *%existing, i32 *%new) { define void @test_64bit_masked(i64 *%existing, i64 *%new) { ; CHECK-LABEL: test_64bit_masked: -; CHECK-ARM64: and +; CHECK: and ; CHECK: bfi [[INSERT:x[0-9]+]], {{x[0-9]+}}, #40, #8 %oldval = load volatile i64* %existing @@ -121,7 +121,7 @@ define void @test_64bit_masked(i64 *%existing, i64 *%new) { define void @test_32bit_complexmask(i32 *%existing, i32 *%new) { ; CHECK-LABEL: test_32bit_complexmask: -; CHECK-ARM64: and +; CHECK: and ; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #3, #4 %oldval = load volatile i32* %existing diff --git a/test/CodeGen/AArch64/bitfield.ll b/test/CodeGen/AArch64/bitfield.ll index 71ffe30c9281..0e1265372bd8 100644 --- a/test/CodeGen/AArch64/bitfield.ll +++ b/test/CodeGen/AArch64/bitfield.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefix=CHECK @var32 = global i32 0 @var64 = global i64 0 @@ -23,7 +23,7 @@ define void @test_extendb(i8 %var) { %uxt64 = zext i8 %var to i64 store volatile i64 %uxt64, i64* @var64 -; CHECK-ARM64: and {{x[0-9]+}}, {{x[0-9]+}}, #0xff +; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xff ret void } @@ -47,7 +47,7 @@ define void @test_extendh(i16 %var) { %uxt64 = zext i16 %var to i64 store volatile i64 %uxt64, i64* @var64 -; CHECK-ARM64: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffff +; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffff ret void } @@ -60,7 +60,7 @@ define void @test_extendw(i32 %var) { %uxt64 = zext i32 %var to i64 store volatile i64 %uxt64, i64* @var64 -; CHECK-ARM64: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #32 +; CHECK: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #32 ret void } diff --git a/test/CodeGen/AArch64/blockaddress.ll b/test/CodeGen/AArch64/blockaddress.ll index 0cbdd3988b72..1eec4cc7f4e7 100644 --- a/test/CodeGen/AArch64/blockaddress.ll +++ b/test/CodeGen/AArch64/blockaddress.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -code-model=large -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -code-model=large -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s @addr = global i8* null diff --git a/test/CodeGen/AArch64/bool-loads.ll b/test/CodeGen/AArch64/bool-loads.ll index 5d92ef67d0eb..881aeaa15dd5 100644 --- a/test/CodeGen/AArch64/bool-loads.ll +++ b/test/CodeGen/AArch64/bool-loads.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s @var = global i1 0 diff --git a/test/CodeGen/AArch64/breg.ll b/test/CodeGen/AArch64/breg.ll index 137173bc4f33..591f48303e27 100644 --- a/test/CodeGen/AArch64/breg.ll +++ b/test/CodeGen/AArch64/breg.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s @stored_label = global i8* null diff --git a/test/CodeGen/AArch64/callee-save.ll b/test/CodeGen/AArch64/callee-save.ll index 9b04a8f979b1..046e6ceac077 100644 --- a/test/CodeGen/AArch64/callee-save.ll +++ b/test/CodeGen/AArch64/callee-save.ll @@ -1,19 +1,14 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK-ARM64 +; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s @var = global float 0.0 define void @foo() { ; CHECK-LABEL: foo: -; CHECK: stp d14, d15, [sp -; CHECK: stp d12, d13, [sp -; CHECK: stp d10, d11, [sp -; CHECK: stp d8, d9, [sp - -; CHECK-ARM64: stp d15, d14, [sp -; CHECK-ARM64: stp d13, d12, [sp -; CHECK-ARM64: stp d11, d10, [sp -; CHECK-ARM64: stp d9, d8, [sp +; CHECK: stp d15, d14, [sp +; CHECK: stp d13, d12, [sp +; CHECK: stp d11, d10, [sp +; CHECK: stp d9, d8, [sp ; Create lots of live variables to exhaust the supply of ; caller-saved registers @@ -83,14 +78,9 @@ define void @foo() { store volatile float %val31, float* @var store volatile float %val32, float* @var -; CHECK: ldp d8, d9, [sp -; CHECK: ldp d10, d11, [sp -; CHECK: ldp d12, d13, [sp -; CHECK: ldp d14, d15, [sp - -; CHECK-ARM64: ldp d9, d8, [sp -; CHECK-ARM64: ldp d11, d10, [sp -; CHECK-ARM64: ldp d13, d12, [sp -; CHECK-ARM64: ldp d15, d14, [sp +; CHECK: ldp d9, d8, [sp +; CHECK: ldp d11, d10, [sp +; CHECK: ldp d13, d12, [sp +; CHECK: ldp d15, d14, [sp ret void } diff --git a/test/CodeGen/AArch64/code-model-large-abs.ll b/test/CodeGen/AArch64/code-model-large-abs.ll index 0408e6f4898a..ca92500855b4 100644 --- a/test/CodeGen/AArch64/code-model-large-abs.ll +++ b/test/CodeGen/AArch64/code-model-large-abs.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-linux-gnu -code-model=large -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -code-model=large -o - %s | FileCheck %s @var8 = global i8 0 @var16 = global i16 0 diff --git a/test/CodeGen/AArch64/compare-branch.ll b/test/CodeGen/AArch64/compare-branch.ll index accbadd4d4eb..a1a87cf51a1a 100644 --- a/test/CodeGen/AArch64/compare-branch.ll +++ b/test/CodeGen/AArch64/compare-branch.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s @var32 = global i32 0 @var64 = global i64 0 diff --git a/test/CodeGen/AArch64/complex-copy-noneon.ll b/test/CodeGen/AArch64/complex-copy-noneon.ll index f65b11612828..4ae547856ecd 100644 --- a/test/CodeGen/AArch64/complex-copy-noneon.ll +++ b/test/CodeGen/AArch64/complex-copy-noneon.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=-neon < %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-neon < %s ; The DAG combiner decided to use a vector load/store for this struct copy ; previously. This probably shouldn't happen without NEON, but the most diff --git a/test/CodeGen/AArch64/cond-sel.ll b/test/CodeGen/AArch64/cond-sel.ll index 96e11b12a171..5f81cba66cbc 100644 --- a/test/CodeGen/AArch64/cond-sel.ll +++ b/test/CodeGen/AArch64/cond-sel.ll @@ -1,5 +1,5 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mcpu=cyclone | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mcpu=cyclone | FileCheck %s --check-prefix=CHECK +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s @var32 = global i32 0 @var64 = global i64 0 @@ -45,7 +45,7 @@ define void @test_floatcsel(float %lhs32, float %rhs32, double %lhs64, double %r ; CHECK-NOFP-NOT: fcmp %val2 = select i1 %tst2, i64 9, i64 15 store i64 %val2, i64* @var64 -; CHECK-ARM64: orr w[[CONST15:[0-9]+]], wzr, #0xf +; CHECK: orr w[[CONST15:[0-9]+]], wzr, #0xf ; CHECK: movz {{[wx]}}[[CONST9:[0-9]+]], #{{9|0x9}} ; CHECK: csel [[MAYBETRUE:x[0-9]+]], x[[CONST9]], x[[CONST15]], eq ; CHECK: csel {{x[0-9]+}}, x[[CONST9]], [[MAYBETRUE]], vs diff --git a/test/CodeGen/AArch64/directcond.ll b/test/CodeGen/AArch64/directcond.ll index 832a01046b0f..1b5192848465 100644 --- a/test/CodeGen/AArch64/directcond.ll +++ b/test/CodeGen/AArch64/directcond.ll @@ -1,10 +1,10 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) { ; CHECK-LABEL: test_select_i32: %val = select i1 %bit, i32 %a, i32 %b -; CHECK-ARM64: tst w0, #0x1 +; CHECK: tst w0, #0x1 ; CHECK-NEXT: csel w0, w1, w2, ne ret i32 %val @@ -13,7 +13,7 @@ define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) { define i64 @test_select_i64(i1 %bit, i64 %a, i64 %b) { ; CHECK-LABEL: test_select_i64: %val = select i1 %bit, i64 %a, i64 %b -; CHECK-ARM64: tst w0, #0x1 +; CHECK: tst w0, #0x1 ; CHECK-NEXT: csel x0, x1, x2, ne ret i64 %val @@ -22,7 +22,7 @@ define i64 @test_select_i64(i1 %bit, i64 %a, i64 %b) { define float @test_select_float(i1 %bit, float %a, float %b) { ; CHECK-LABEL: test_select_float: %val = select i1 %bit, float %a, float %b -; CHECK-ARM64: tst w0, #0x1 +; CHECK: tst w0, #0x1 ; CHECK-NEXT: fcsel s0, s0, s1, ne ; CHECK-NOFP-NOT: fcsel ret float %val @@ -31,7 +31,7 @@ define float @test_select_float(i1 %bit, float %a, float %b) { define double @test_select_double(i1 %bit, double %a, double %b) { ; CHECK-LABEL: test_select_double: %val = select i1 %bit, double %a, double %b -; CHECK-ARM64: tst w0, #0x1 +; CHECK: tst w0, #0x1 ; CHECK-NEXT: fcsel d0, d0, d1, ne ; CHECK-NOFP-NOT: fcsel diff --git a/test/CodeGen/AArch64/dp1.ll b/test/CodeGen/AArch64/dp1.ll index b09ce3668dc9..662b41588541 100644 --- a/test/CodeGen/AArch64/dp1.ll +++ b/test/CodeGen/AArch64/dp1.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s @var32 = global i32 0 @var64 = global i64 0 diff --git a/test/CodeGen/AArch64/eliminate-trunc.ll b/test/CodeGen/AArch64/eliminate-trunc.ll index 02a085acf03e..ea86a084cb42 100644 --- a/test/CodeGen/AArch64/eliminate-trunc.ll +++ b/test/CodeGen/AArch64/eliminate-trunc.ll @@ -1,11 +1,11 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-apple-ios7.0 -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-ARM64 +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-apple-ios7.0 -mcpu=cyclone | FileCheck %s ; Check trunc i64 operation is translated as a subregister access ; eliminating an i32 induction varible. -; CHECK-ARM64-NOT: add {{x[0-9]+}}, {{x[0-9]+}}, #1 -; CHECK-ARM64: add {{w[0-9]+}}, {{w[0-9]+}}, #1 -; CHECK-ARM64-NEXT: cmp {{w[0-9]+}}, {{w[0-9]+}} +; CHECK-NOT: add {{x[0-9]+}}, {{x[0-9]+}}, #1 +; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #1 +; CHECK-NEXT: cmp {{w[0-9]+}}, {{w[0-9]+}} define void @test1_signed([8 x i8]* nocapture %a, i8* nocapture readonly %box, i8 %limit) minsize { entry: %conv = zext i8 %limit to i32 diff --git a/test/CodeGen/AArch64/extern-weak.ll b/test/CodeGen/AArch64/extern-weak.ll index 8f418455ffa0..ce5c0f686615 100644 --- a/test/CodeGen/AArch64/extern-weak.ll +++ b/test/CodeGen/AArch64/extern-weak.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=arm64-none-linux-gnu -o - %s | FileCheck %s --check-prefix=CHECK-ARM64 -; RUN: llc -mtriple=arm64-none-linux-gnu -code-model=large -o - %s | FileCheck --check-prefix=CHECK-LARGE %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -code-model=large -o - %s | FileCheck --check-prefix=CHECK-LARGE %s declare extern_weak i32 @var() @@ -9,8 +9,8 @@ define i32()* @foo() { ret i32()* @var -; CHECK-ARM64: adrp x[[ADDRHI:[0-9]+]], :got:var -; CHECK-ARM64: ldr x0, [x[[ADDRHI]], :got_lo12:var] +; CHECK: adrp x[[ADDRHI:[0-9]+]], :got:var +; CHECK: ldr x0, [x[[ADDRHI]], :got_lo12:var] ; In the large model, the usual relocations are absolute and can ; materialise 0. @@ -27,9 +27,9 @@ define i32* @bar() { %addr = getelementptr [10 x i32]* @arr_var, i32 0, i32 5 -; CHECK-ARM64: adrp x[[ADDRHI:[0-9]+]], :got:arr_var -; CHECK-ARM64: ldr [[BASE:x[0-9]+]], [x[[ADDRHI]], :got_lo12:arr_var] -; CHECK-ARM64: add x0, [[BASE]], #20 +; CHECK: adrp x[[ADDRHI:[0-9]+]], :got:arr_var +; CHECK: ldr [[BASE:x[0-9]+]], [x[[ADDRHI]], :got_lo12:arr_var] +; CHECK: add x0, [[BASE]], #20 ret i32* %addr @@ -46,8 +46,8 @@ define i32* @bar() { define i32* @wibble() { ret i32* @defined_weak_var -; CHECK-ARM64: adrp [[BASE:x[0-9]+]], defined_weak_var -; CHECK-ARM64: add x0, [[BASE]], :lo12:defined_weak_var +; CHECK: adrp [[BASE:x[0-9]+]], defined_weak_var +; CHECK: add x0, [[BASE]], :lo12:defined_weak_var ; CHECK-LARGE: movz x0, #:abs_g3:defined_weak_var ; CHECK-LARGE: movk x0, #:abs_g2_nc:defined_weak_var diff --git a/test/CodeGen/AArch64/fastcc-reserved.ll b/test/CodeGen/AArch64/fastcc-reserved.ll index 09a6ae3ccd2a..a392619a768d 100644 --- a/test/CodeGen/AArch64/fastcc-reserved.ll +++ b/test/CodeGen/AArch64/fastcc-reserved.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -tailcallopt | FileCheck %s --check-prefix=CHECK-ARM64 +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s ; This test is designed to be run in the situation where the ; call-frame is not reserved (hence disable-fp-elim), but where @@ -12,30 +12,22 @@ define fastcc void @foo(i32 %in) { %addr = alloca i8, i32 %in ; Normal frame setup stuff: -; CHECK: sub sp, sp, -; CHECK: stp x29, x30 -; CHECK-ARM64: stp x29, x30, [sp, #-16]! -; CHECK-ARM64: mov x29, sp +; CHECK: stp x29, x30, [sp, #-16]! +; CHECK: mov x29, sp ; Reserve space for call-frame: ; CHECK: sub sp, sp, #16 -; CHECK-ARM64: sub sp, sp, #16 call fastcc void @will_pop([8 x i32] undef, i32 42) ; CHECK: bl will_pop -; CHECK-ARM64: bl will_pop ; Since @will_pop is fastcc with tailcallopt, it will put the stack ; back where it needs to be, we shouldn't duplicate that ; CHECK-NOT: sub sp, sp, #16 ; CHECK-NOT: add sp, sp, -; CHECK-ARM64-NOT: sub sp, sp, #16 -; CHECK-ARM64-NOT: add sp, sp, -; CHECK: ldp x29, x30 -; CHECK: add sp, sp, -; CHECK-ARM64: mov sp, x29 -; CHECK-ARM64: ldp x29, x30, [sp], #16 +; CHECK: mov sp, x29 +; CHECK: ldp x29, x30, [sp], #16 ret void } @@ -46,28 +38,21 @@ define void @foo1(i32 %in) { %addr = alloca i8, i32 %in ; Normal frame setup again -; CHECK: sub sp, sp, -; CHECK: stp x29, x30 -; CHECK-ARM64: stp x29, x30, [sp, #-16]! -; CHECK-ARM64: mov x29, sp +; CHECK: stp x29, x30, [sp, #-16]! +; CHECK: mov x29, sp ; Reserve space for call-frame ; CHECK: sub sp, sp, #16 -; CHECK-ARM64: sub sp, sp, #16 call void @wont_pop([8 x i32] undef, i32 42) ; CHECK: bl wont_pop -; CHECK-ARM64: bl wont_pop ; This time we *do* need to unreserve the call-frame ; CHECK: add sp, sp, #16 -; CHECK-ARM64: add sp, sp, #16 ; Check for epilogue (primarily to make sure sp spotted above wasn't ; part of it). -; CHECK: ldp x29, x30 -; CHECK: add sp, sp, -; CHECK-ARM64: mov sp, x29 -; CHECK-ARM64: ldp x29, x30, [sp], #16 +; CHECK: mov sp, x29 +; CHECK: ldp x29, x30, [sp], #16 ret void } diff --git a/test/CodeGen/AArch64/fastcc.ll b/test/CodeGen/AArch64/fastcc.ll index b641de0ee290..9917fcd044fd 100644 --- a/test/CodeGen/AArch64/fastcc.ll +++ b/test/CodeGen/AArch64/fastcc.ll @@ -1,226 +1,144 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -tailcallopt | FileCheck %s -check-prefix CHECK-ARM64-TAIL -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck --check-prefix=CHECK-ARM64 %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s -check-prefix CHECK-TAIL +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; Without tailcallopt fastcc still means the caller cleans up the ; stack, so try to make sure this is respected. define fastcc void @func_stack0() { ; CHECK-LABEL: func_stack0: -; CHECK: sub sp, sp, #48 - -; CHECK-ARM64-LABEL: func_stack0: -; CHECK-ARM64: stp x29, x30, [sp, #-16]! -; CHECK-ARM64-NEXT: mov x29, sp -; CHECK-ARM64-NEXT: sub sp, sp, #32 +; CHECK: mov x29, sp +; CHECK-NEXT: sub sp, sp, #32 ; CHECK-TAIL-LABEL: func_stack0: -; CHECK-TAIL: sub sp, sp, #48 - -; CHECK-ARM64-TAIL-LABEL: func_stack0: -; CHECK-ARM64-TAIL: stp x29, x30, [sp, #-16]! -; CHECK-ARM64-TAIL-NEXT: mov x29, sp -; CHECK-ARM64-TAIL-NEXT: sub sp, sp, #32 +; CHECK-TAIL: stp x29, x30, [sp, #-16]! +; CHECK-TAIL-NEXT: mov x29, sp +; CHECK-TAIL-NEXT: sub sp, sp, #32 call fastcc void @func_stack8([8 x i32] undef, i32 42) ; CHECK: bl func_stack8 ; CHECK-NOT: sub sp, sp, -; CHECK-ARM64: bl func_stack8 -; CHECK-ARM64-NOT: sub sp, sp, - ; CHECK-TAIL: bl func_stack8 ; CHECK-TAIL: sub sp, sp, #16 -; CHECK-ARM64-TAIL: bl func_stack8 -; CHECK-ARM64-TAIL: sub sp, sp, #16 - call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9) ; CHECK: bl func_stack32 ; CHECK-NOT: sub sp, sp, -; CHECK-ARM64: bl func_stack32 -; CHECK-ARM64-NOT: sub sp, sp, ; CHECK-TAIL: bl func_stack32 ; CHECK-TAIL: sub sp, sp, #32 -; CHECK-ARM64-TAIL: bl func_stack32 -; CHECK-ARM64-TAIL: sub sp, sp, #32 - call fastcc void @func_stack0() ; CHECK: bl func_stack0 ; CHECK-NOT: sub sp, sp -; CHECK-ARM64: bl func_stack0 -; CHECK-ARM64-NOT: sub sp, sp ; CHECK-TAIL: bl func_stack0 ; CHECK-TAIL-NOT: sub sp, sp -; CHECK-ARM64-TAIL: bl func_stack0 -; CHECK-ARM64-TAIL-NOT: sub sp, sp - ret void -; CHECK: add sp, sp, #48 +; CHECK: mov sp, x29 +; CHECK-NEXT: ldp x29, x30, [sp], #16 ; CHECK-NEXT: ret -; CHECK-ARM64: mov sp, x29 -; CHECK-ARM64-NEXT: ldp x29, x30, [sp], #16 -; CHECK-ARM64-NEXT: ret -; CHECK-TAIL: add sp, sp, #48 +; CHECK-TAIL: mov sp, x29 +; CHECK-TAIL-NEXT: ldp x29, x30, [sp], #16 ; CHECK-TAIL-NEXT: ret - -; CHECK-ARM64-TAIL: mov sp, x29 -; CHECK-ARM64-TAIL-NEXT: ldp x29, x30, [sp], #16 -; CHECK-ARM64-TAIL-NEXT: ret } define fastcc void @func_stack8([8 x i32], i32 %stacked) { ; CHECK-LABEL: func_stack8: -; CHECK: sub sp, sp, #48 +; CHECK: stp x29, x30, [sp, #-16]! +; CHECK: mov x29, sp +; CHECK: sub sp, sp, #32 -; CHECK-ARM64-LABEL: func_stack8: -; CHECK-ARM64: stp x29, x30, [sp, #-16]! -; CHECK-ARM64: mov x29, sp -; CHECK-ARM64: sub sp, sp, #32 ; CHECK-TAIL-LABEL: func_stack8: -; CHECK-TAIL: sub sp, sp, #48 - -; CHECK-ARM64-TAIL-LABEL: func_stack8: -; CHECK-ARM64-TAIL: stp x29, x30, [sp, #-16]! -; CHECK-ARM64-TAIL: mov x29, sp -; CHECK-ARM64-TAIL: sub sp, sp, #32 +; CHECK-TAIL: stp x29, x30, [sp, #-16]! +; CHECK-TAIL: mov x29, sp +; CHECK-TAIL: sub sp, sp, #32 call fastcc void @func_stack8([8 x i32] undef, i32 42) ; CHECK: bl func_stack8 ; CHECK-NOT: sub sp, sp, -; CHECK-ARM64: bl func_stack8 -; CHECK-ARM64-NOT: sub sp, sp, ; CHECK-TAIL: bl func_stack8 ; CHECK-TAIL: sub sp, sp, #16 -; CHECK-ARM64-TAIL: bl func_stack8 -; CHECK-ARM64-TAIL: sub sp, sp, #16 - call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9) ; CHECK: bl func_stack32 ; CHECK-NOT: sub sp, sp, -; CHECK-ARM64: bl func_stack32 -; CHECK-ARM64-NOT: sub sp, sp, ; CHECK-TAIL: bl func_stack32 ; CHECK-TAIL: sub sp, sp, #32 -; CHECK-ARM64-TAIL: bl func_stack32 -; CHECK-ARM64-TAIL: sub sp, sp, #32 - call fastcc void @func_stack0() ; CHECK: bl func_stack0 ; CHECK-NOT: sub sp, sp -; CHECK-ARM64: bl func_stack0 -; CHECK-ARM64-NOT: sub sp, sp - ; CHECK-TAIL: bl func_stack0 ; CHECK-TAIL-NOT: sub sp, sp -; CHECK-ARM64-TAIL: bl func_stack0 -; CHECK-ARM64-TAIL-NOT: sub sp, sp - ret void -; CHECK: add sp, sp, #48 +; CHECK: mov sp, x29 +; CHECK-NEXT: ldp x29, x30, [sp], #16 ; CHECK-NEXT: ret -; CHECK-ARM64: mov sp, x29 -; CHECK-ARM64-NEXT: ldp x29, x30, [sp], #16 -; CHECK-ARM64-NEXT: ret -; CHECK-TAIL: add sp, sp, #64 +; CHECK-TAIL: mov sp, x29 +; CHECK-TAIL-NEXT: ldp x29, x30, [sp], #16 ; CHECK-TAIL-NEXT: ret - -; CHECK-ARM64-TAIL: mov sp, x29 -; CHECK-ARM64-TAIL-NEXT: ldp x29, x30, [sp], #16 -; CHECK-ARM64-TAIL-NEXT: ret } define fastcc void @func_stack32([8 x i32], i128 %stacked0, i128 %stacked1) { ; CHECK-LABEL: func_stack32: -; CHECK: sub sp, sp, #48 - -; CHECK-ARM64-LABEL: func_stack32: -; CHECK-ARM64: mov x29, sp +; CHECK: mov x29, sp ; CHECK-TAIL-LABEL: func_stack32: -; CHECK-TAIL: sub sp, sp, #48 - -; CHECK-ARM64-TAIL-LABEL: func_stack32: -; CHECK-ARM64-TAIL: mov x29, sp +; CHECK-TAIL: mov x29, sp call fastcc void @func_stack8([8 x i32] undef, i32 42) ; CHECK: bl func_stack8 ; CHECK-NOT: sub sp, sp, -; CHECK-ARM64: bl func_stack8 -; CHECK-ARM64-NOT: sub sp, sp, - ; CHECK-TAIL: bl func_stack8 ; CHECK-TAIL: sub sp, sp, #16 -; CHECK-ARM64-TAIL: bl func_stack8 -; CHECK-ARM64-TAIL: sub sp, sp, #16 - call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9) ; CHECK: bl func_stack32 ; CHECK-NOT: sub sp, sp, -; CHECK-ARM64: bl func_stack32 -; CHECK-ARM64-NOT: sub sp, sp, ; CHECK-TAIL: bl func_stack32 ; CHECK-TAIL: sub sp, sp, #32 -; CHECK-ARM64-TAIL: bl func_stack32 -; CHECK-ARM64-TAIL: sub sp, sp, #32 - call fastcc void @func_stack0() ; CHECK: bl func_stack0 ; CHECK-NOT: sub sp, sp -; CHECK-ARM64: bl func_stack0 -; CHECK-ARM64-NOT: sub sp, sp ; CHECK-TAIL: bl func_stack0 ; CHECK-TAIL-NOT: sub sp, sp -; CHECK-ARM64-TAIL: bl func_stack0 -; CHECK-ARM64-TAIL-NOT: sub sp, sp - ret void -; CHECK: add sp, sp, #48 +; CHECK: mov sp, x29 +; CHECK-NEXT: ldp x29, x30, [sp], #16 ; CHECK-NEXT: ret -; CHECK-ARM64: mov sp, x29 -; CHECK-ARM64-NEXT: ldp x29, x30, [sp], #16 -; CHECK-ARM64-NEXT: ret - -; CHECK-TAIL: add sp, sp, #80 +; CHECK-TAIL: mov sp, x29 +; CHECK-TAIL-NEXT: ldp x29, x30, [sp], #16 ; CHECK-TAIL-NEXT: ret - -; CHECK-ARM64-TAIL: mov sp, x29 -; CHECK-ARM64-TAIL-NEXT: ldp x29, x30, [sp], #16 -; CHECK-ARM64-TAIL-NEXT: ret } diff --git a/test/CodeGen/AArch64/fcmp.ll b/test/CodeGen/AArch64/fcmp.ll index c54e3e62941e..3c74508bb12b 100644 --- a/test/CodeGen/AArch64/fcmp.ll +++ b/test/CodeGen/AArch64/fcmp.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s declare void @bar(i32) diff --git a/test/CodeGen/AArch64/fcvt-fixed.ll b/test/CodeGen/AArch64/fcvt-fixed.ll index 40800d00e50f..ccb3616b70bf 100644 --- a/test/CodeGen/AArch64/fcvt-fixed.ll +++ b/test/CodeGen/AArch64/fcvt-fixed.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-apple-ios7.0 -O0 ; (The O0 test is to make sure FastISel still constrains its operands properly diff --git a/test/CodeGen/AArch64/flags-multiuse.ll b/test/CodeGen/AArch64/flags-multiuse.ll index 667c05d1653a..c9b0b9ff7d8c 100644 --- a/test/CodeGen/AArch64/flags-multiuse.ll +++ b/test/CodeGen/AArch64/flags-multiuse.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s ; LLVM should be able to cope with multiple uses of the same flag-setting ; instruction at different points of a routine. Either by rematerializing the diff --git a/test/CodeGen/AArch64/floatdp_2source.ll b/test/CodeGen/AArch64/floatdp_2source.ll index 8e98b784bb9d..262271784ec6 100644 --- a/test/CodeGen/AArch64/floatdp_2source.ll +++ b/test/CodeGen/AArch64/floatdp_2source.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu -mcpu=cyclone | FileCheck %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu -mcpu=cyclone | FileCheck %s @varfloat = global float 0.0 @vardouble = global double 0.0 diff --git a/test/CodeGen/AArch64/fp-cond-sel.ll b/test/CodeGen/AArch64/fp-cond-sel.ll index 07cbb4919e61..b4f4d77cd0bc 100644 --- a/test/CodeGen/AArch64/fp-cond-sel.ll +++ b/test/CodeGen/AArch64/fp-cond-sel.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu -mcpu=cyclone | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -mcpu=cyclone | FileCheck %s --check-prefix=CHECK @varfloat = global float 0.0 @vardouble = global double 0.0 @@ -12,7 +12,7 @@ define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) { %tst1 = icmp ugt i32 %lhs32, %rhs32 %val1 = select i1 %tst1, float 0.0, float 1.0 store float %val1, float* @varfloat -; CHECK-ARM64: movi v[[FLT0:[0-9]+]].2d, #0 +; CHECK: movi v[[FLT0:[0-9]+]].2d, #0 ; CHECK: fmov s[[FLT1:[0-9]+]], #1.0 ; CHECK: fcsel {{s[0-9]+}}, s[[FLT0]], s[[FLT1]], hi diff --git a/test/CodeGen/AArch64/fp-dp3.ll b/test/CodeGen/AArch64/fp-dp3.ll index 53113b59127d..10f88fdbbe96 100644 --- a/test/CodeGen/AArch64/fp-dp3.ll +++ b/test/CodeGen/AArch64/fp-dp3.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu -fp-contract=fast | FileCheck %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -fp-contract=fast | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s -check-prefix=CHECK-NOFAST declare float @llvm.fma.f32(float, float, float) diff --git a/test/CodeGen/AArch64/fp128-folding.ll b/test/CodeGen/AArch64/fp128-folding.ll index 4b19deb976c6..892b19c5cf33 100644 --- a/test/CodeGen/AArch64/fp128-folding.ll +++ b/test/CodeGen/AArch64/fp128-folding.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s declare void @bar(i8*, i8*, i32*) ; SelectionDAG used to try to fold some fp128 operations using the ppc128 type, diff --git a/test/CodeGen/AArch64/fpimm.ll b/test/CodeGen/AArch64/fpimm.ll index e279d5b00969..e59520c4dc95 100644 --- a/test/CodeGen/AArch64/fpimm.ll +++ b/test/CodeGen/AArch64/fpimm.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s @varf32 = global float 0.0 @varf64 = global double 0.0 diff --git a/test/CodeGen/AArch64/func-argpassing.ll b/test/CodeGen/AArch64/func-argpassing.ll index 129ab25c8772..abb732ccf43a 100644 --- a/test/CodeGen/AArch64/func-argpassing.ll +++ b/test/CodeGen/AArch64/func-argpassing.ll @@ -1,8 +1,5 @@ - -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM64 %s -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE --check-prefix=CHECK-ARM64-BE %s -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s %myStruct = type { i64 , i8, i32 } @@ -63,7 +60,7 @@ define void @check_byval_align(i32* byval %ignore, %myStruct* byval align 16 %st %val0 = load volatile i32* %addr0 ; Some weird move means x0 is used for one access -; CHECK-ARM64: ldr [[REG32:w[0-9]+]], [sp, #28] +; CHECK: ldr [[REG32:w[0-9]+]], [sp, #28] store i32 %val0, i32* @var32 ; CHECK: str [[REG32]], [{{x[0-9]+}}, {{#?}}:lo12:var32] @@ -149,7 +146,6 @@ define i32 @struct_on_stack(i8 %var0, i16 %var1, i32 %var2, i64 %var3, i128 %var %retval = load volatile i32* %stacked ret i32 %retval ; CHECK-LE: ldr w0, [sp, #16] -; CHECK-BE-AARCH64: ldr w0, [sp, #20] } define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3, @@ -159,8 +155,8 @@ define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3, store float %var8, float* @varfloat ; Beware as above: the offset would be different on big-endian ; machines if the first ldr were changed to use s-registers. -; CHECK-ARM64: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp] -; CHECK-ARM64: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat] +; CHECK: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp] +; CHECK: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat] ret void } @@ -185,11 +181,10 @@ define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3, ; Nothing local on stack in current codegen, so first stack is 16 away ; CHECK-LE: add x[[REG:[0-9]+]], sp, #16 ; CHECK-LE: ldr {{x[0-9]+}}, [x[[REG]], #8] -; CHECK-BE-AARCH64: ldr {{x[0-9]+}}, [sp, #24] ; Important point is that we address sp+24 for second dword -; CHECK-ARM64: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] +; CHECK: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] ret void } @@ -209,6 +204,5 @@ define i16 @stacked_i16(i32 %val0, i32 %val1, i32 %val2, i32 %val3, i32 %val4, i32 %val5, i32 %val6, i32 %val7, i16 %stack1) { ; CHECK-LABEL: stacked_i16 -; CHECK-ARM64-BE: ldrh ret i16 %stack1 } diff --git a/test/CodeGen/AArch64/func-calls.ll b/test/CodeGen/AArch64/func-calls.ll index 8cb5f97e8888..422c5765ec48 100644 --- a/test/CodeGen/AArch64/func-calls.ll +++ b/test/CodeGen/AArch64/func-calls.ll @@ -1,8 +1,7 @@ - -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-neon | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM64-NONEON %s -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefix=CHECK +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-neon | FileCheck --check-prefix=CHECK-NONEON %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s +; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK-BE %s %myStruct = type { i64 , i8, i32 } @@ -90,13 +89,13 @@ define void @check_stack_args() { ; that varstruct is passed on the stack. Rather dependent on how a ; memcpy gets created, but the following works for now. -; CHECK-ARM64-DAG: str {{q[0-9]+}}, [sp] -; CHECK-ARM64-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0 -; CHECK-ARM64: mov v0.16b, v[[FINAL_DOUBLE]].16b +; CHECK-DAG: str {{q[0-9]+}}, [sp] +; CHECK-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0 +; CHECK: mov v0.16b, v[[FINAL_DOUBLE]].16b -; CHECK-ARM64-NONEON-DAG: str {{q[0-9]+}}, [sp] -; CHECK-ARM64-NONEON-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0 -; CHECK-ARM64-NONEON: fmov d0, d[[FINAL_DOUBLE]] +; CHECK-NONEON-DAG: str {{q[0-9]+}}, [sp] +; CHECK-NONEON-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0 +; CHECK-NONEON: fmov d0, d[[FINAL_DOUBLE]] ; CHECK: bl struct_on_stack ; CHECK-NOFP-NOT: fmov @@ -105,11 +104,11 @@ define void @check_stack_args() { float -2.0, float -8.0, float 16.0, float 1.0, float 64.0) -; CHECK-ARM64: movz [[SIXTY_FOUR:w[0-9]+]], #0x4280, lsl #16 -; CHECK-ARM64: str [[SIXTY_FOUR]], [sp] +; CHECK: movz [[SIXTY_FOUR:w[0-9]+]], #0x4280, lsl #16 +; CHECK: str [[SIXTY_FOUR]], [sp] -; CHECK-ARM64-NONEON: movz [[SIXTY_FOUR:w[0-9]+]], #0x4280, lsl #16 -; CHECK-ARM64-NONEON: str [[SIXTY_FOUR]], [sp] +; CHECK-NONEON: movz [[SIXTY_FOUR:w[0-9]+]], #0x4280, lsl #16 +; CHECK-NONEON: str [[SIXTY_FOUR]], [sp] ; CHECK: bl stacked_fpu ret void @@ -131,8 +130,11 @@ define void @check_i128_align() { i32 42, i128 %val) ; CHECK: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:var128] ; CHECK: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, #8] -; CHECK-ARM64: stp [[I128LO]], [[I128HI]], [sp, #16] -; CHECK-ARM64-NONEON: stp [[I128LO]], [[I128HI]], [sp, #16] +; CHECK: stp [[I128LO]], [[I128HI]], [sp, #16] + +; CHECK-NONEON: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, :lo12:var128] +; CHECK-NONEON: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, #8] +; CHECK-NONEON: stp [[I128LO]], [[I128HI]], [sp, #16] ; CHECK: bl check_i128_stackalign call void @check_i128_regalign(i32 0, i128 42) diff --git a/test/CodeGen/AArch64/global-alignment.ll b/test/CodeGen/AArch64/global-alignment.ll index 2bf4a2cbce4a..451b9d6741ee 100644 --- a/test/CodeGen/AArch64/global-alignment.ll +++ b/test/CodeGen/AArch64/global-alignment.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s @var32 = global [3 x i32] zeroinitializer @var64 = global [3 x i64] zeroinitializer diff --git a/test/CodeGen/AArch64/got-abuse.ll b/test/CodeGen/AArch64/got-abuse.ll index c23edaf4360f..7a02b104e777 100644 --- a/test/CodeGen/AArch64/got-abuse.ll +++ b/test/CodeGen/AArch64/got-abuse.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s -; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -filetype=obj -o - %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj -o - %s ; LLVM gives well-defined semantics to this horrible construct (though C says ; it's undefined). Regardless, we shouldn't crash. The important feature here is diff --git a/test/CodeGen/AArch64/illegal-float-ops.ll b/test/CodeGen/AArch64/illegal-float-ops.ll index 8320f3ab0443..9f7dd998bc21 100644 --- a/test/CodeGen/AArch64/illegal-float-ops.ll +++ b/test/CodeGen/AArch64/illegal-float-ops.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s @varfloat = global float 0.0 @vardouble = global double 0.0 diff --git a/test/CodeGen/AArch64/init-array.ll b/test/CodeGen/AArch64/init-array.ll index d3ed363821c3..f47b490baebd 100644 --- a/test/CodeGen/AArch64/init-array.ll +++ b/test/CodeGen/AArch64/init-array.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -use-init-array -o - %s | FileCheck %s -; RUN: llc -mtriple=arm64-none-none-eabi -verify-machineinstrs -use-init-array -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -use-init-array -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-none-eabi -verify-machineinstrs -use-init-array -o - %s | FileCheck %s define internal void @_GLOBAL__I_a() section ".text.startup" { ret void diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badI.ll b/test/CodeGen/AArch64/inline-asm-constraints-badI.ll index 7ca9ade9cc62..9d833d936c06 100644 --- a/test/CodeGen/AArch64/inline-asm-constraints-badI.ll +++ b/test/CodeGen/AArch64/inline-asm-constraints-badI.ll @@ -1,4 +1,4 @@ -; RUN: not llc -mtriple=arm64-none-linux-gnu -o - %s +; RUN: not llc -mtriple=aarch64-none-linux-gnu -o - %s define void @foo() { ; Out of range immediate for I. diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll b/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll index 6bc633814c7e..172601301993 100644 --- a/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll +++ b/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll @@ -1,4 +1,4 @@ -; RUN: not llc -mtriple=arm64-none-linux-gnu -o - %s +; RUN: not llc -mtriple=aarch64-none-linux-gnu -o - %s define void @foo() { ; 32-bit bitpattern ending in 1101 can't be produced. diff --git a/test/CodeGen/AArch64/jump-table.ll b/test/CodeGen/AArch64/jump-table.ll index a0fcafa45100..1dfb789ac8e9 100644 --- a/test/CodeGen/AArch64/jump-table.ll +++ b/test/CodeGen/AArch64/jump-table.ll @@ -1,6 +1,6 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck %s -; RUN: llc -code-model=large -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck --check-prefix=CHECK-LARGE %s -; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -o - %s | FileCheck --check-prefix=CHECK-PIC %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s +; RUN: llc -code-model=large -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK-LARGE %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -o - %s | FileCheck --check-prefix=CHECK-PIC %s define i32 @test_jumptable(i32 %in) { ; CHECK: test_jumptable diff --git a/test/CodeGen/AArch64/large-consts.ll b/test/CodeGen/AArch64/large-consts.ll index b5f6c32eef4e..6bf85e829f61 100644 --- a/test/CodeGen/AArch64/large-consts.ll +++ b/test/CodeGen/AArch64/large-consts.ll @@ -1,14 +1,14 @@ -; RUN: llc -mtriple=arm64-linux-gnu -o - %s -code-model=large -show-mc-encoding | FileCheck %s --check-prefix=CHECK-ARM64 +; RUN: llc -mtriple=aarch64-linux-gnu -o - %s -code-model=large -show-mc-encoding | FileCheck %s ; Make sure the shift amount is encoded into the instructions by LLVM because ; it's not the linker's job to put it there. define double @foo() { -; CHECK-ARM64: movz [[CPADDR:x[0-9]+]], #:abs_g3:.LCPI0_0 // encoding: [0bAAA01000,A,0b111AAAAA,0xd2] -; CHECK-ARM64: movk [[CPADDR]], #:abs_g2_nc:.LCPI0_0 // encoding: [0bAAA01000,A,0b110AAAAA,0xf2] -; CHECK-ARM64: movk [[CPADDR]], #:abs_g1_nc:.LCPI0_0 // encoding: [0bAAA01000,A,0b101AAAAA,0xf2] -; CHECK-ARM64: movk [[CPADDR]], #:abs_g0_nc:.LCPI0_0 // encoding: [0bAAA01000,A,0b100AAAAA,0xf2] +; CHECK: movz [[CPADDR:x[0-9]+]], #:abs_g3:.LCPI0_0 // encoding: [0bAAA01000,A,0b111AAAAA,0xd2] +; CHECK: movk [[CPADDR]], #:abs_g2_nc:.LCPI0_0 // encoding: [0bAAA01000,A,0b110AAAAA,0xf2] +; CHECK: movk [[CPADDR]], #:abs_g1_nc:.LCPI0_0 // encoding: [0bAAA01000,A,0b101AAAAA,0xf2] +; CHECK: movk [[CPADDR]], #:abs_g0_nc:.LCPI0_0 // encoding: [0bAAA01000,A,0b100AAAAA,0xf2] ret double 3.14159 } diff --git a/test/CodeGen/AArch64/ldst-regoffset.ll b/test/CodeGen/AArch64/ldst-regoffset.ll index b13634ca706a..e2fa08bcce69 100644 --- a/test/CodeGen/AArch64/ldst-regoffset.ll +++ b/test/CodeGen/AArch64/ldst-regoffset.ll @@ -1,5 +1,5 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s @var_8bit = global i8 0 @var_16bit = global i16 0 diff --git a/test/CodeGen/AArch64/ldst-unscaledimm.ll b/test/CodeGen/AArch64/ldst-unscaledimm.ll index d738cfdaa26a..1de8443d9ed2 100644 --- a/test/CodeGen/AArch64/ldst-unscaledimm.ll +++ b/test/CodeGen/AArch64/ldst-unscaledimm.ll @@ -1,5 +1,5 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s @var_8bit = global i8 0 @var_16bit = global i16 0 diff --git a/test/CodeGen/AArch64/ldst-unsignedimm.ll b/test/CodeGen/AArch64/ldst-unsignedimm.ll index d6475f904294..e171d22b6c7c 100644 --- a/test/CodeGen/AArch64/ldst-unsignedimm.ll +++ b/test/CodeGen/AArch64/ldst-unsignedimm.ll @@ -1,5 +1,5 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s @var_8bit = global i8 0 @var_16bit = global i16 0 diff --git a/test/CodeGen/ARM64/lit.local.cfg b/test/CodeGen/AArch64/lit.local.cfg similarity index 89% rename from test/CodeGen/ARM64/lit.local.cfg rename to test/CodeGen/AArch64/lit.local.cfg index 3468e27f07f5..77493d88b2aa 100644 --- a/test/CodeGen/ARM64/lit.local.cfg +++ b/test/CodeGen/AArch64/lit.local.cfg @@ -3,7 +3,7 @@ import re config.suffixes = ['.ll'] targets = set(config.root.targets_to_build.split()) -if not 'ARM64' in targets: +if not 'AArch64' in targets: config.unsupported = True # For now we don't test arm64-win32. diff --git a/test/CodeGen/AArch64/literal_pools_float.ll b/test/CodeGen/AArch64/literal_pools_float.ll index 6f9f3fc37722..e53b8b62c6f3 100644 --- a/test/CodeGen/AArch64/literal_pools_float.ll +++ b/test/CodeGen/AArch64/literal_pools_float.ll @@ -1,7 +1,7 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu -mcpu=cyclone | FileCheck %s -; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu -code-model=large -mcpu=cyclone | FileCheck --check-prefix=CHECK-LARGE %s -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -code-model=large -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP-LARGE %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -mcpu=cyclone | FileCheck %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -code-model=large -mcpu=cyclone | FileCheck --check-prefix=CHECK-LARGE %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -code-model=large -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP-LARGE %s @varfloat = global float 0.0 @vardouble = global double 0.0 diff --git a/test/CodeGen/AArch64/local_vars.ll b/test/CodeGen/AArch64/local_vars.ll index 4518fa210233..2f5b9f2adb48 100644 --- a/test/CodeGen/AArch64/local_vars.ll +++ b/test/CodeGen/AArch64/local_vars.ll @@ -1,5 +1,5 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -disable-fp-elim | FileCheck -check-prefix CHECK-WITHFP-ARM64 %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -disable-fp-elim | FileCheck -check-prefix CHECK-WITHFP-ARM64 %s ; Make sure a reasonably sane prologue and epilogue are ; generated. This test is not robust in the face of an frame-handling diff --git a/test/CodeGen/AArch64/logical_shifted_reg.ll b/test/CodeGen/AArch64/logical_shifted_reg.ll index 608d44fc9d7b..b249d72e0f90 100644 --- a/test/CodeGen/AArch64/logical_shifted_reg.ll +++ b/test/CodeGen/AArch64/logical_shifted_reg.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s @var1_32 = global i32 0 @var2_32 = global i32 0 diff --git a/test/CodeGen/AArch64/mature-mc-support.ll b/test/CodeGen/AArch64/mature-mc-support.ll index 2948da9f2007..276c54d2cc4e 100644 --- a/test/CodeGen/AArch64/mature-mc-support.ll +++ b/test/CodeGen/AArch64/mature-mc-support.ll @@ -1,14 +1,10 @@ ; Test that inline assembly is parsed by the MC layer when MC support is mature ; (even when the output is assembly). -; RUN: FileCheck %s < %t1 - -; RUN: FileCheck %s < %t2 - -; RUN: not llc -mtriple=arm64-pc-linux < %s > /dev/null 2> %t3 +; RUN: not llc -mtriple=aarch64-pc-linux < %s > /dev/null 2> %t3 ; RUN: FileCheck %s < %t3 -; RUN: not llc -mtriple=arm64-pc-linux -filetype=obj < %s > /dev/null 2> %t4 +; RUN: not llc -mtriple=aarch64-pc-linux -filetype=obj < %s > /dev/null 2> %t4 ; RUN: FileCheck %s < %t4 module asm " .this_directive_is_very_unlikely_to_exist" diff --git a/test/CodeGen/AArch64/movw-consts.ll b/test/CodeGen/AArch64/movw-consts.ll index 6fe000974d5f..93c181271755 100644 --- a/test/CodeGen/AArch64/movw-consts.ll +++ b/test/CodeGen/AArch64/movw-consts.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 +; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK define i64 @test0() { ; CHECK-LABEL: test0: @@ -9,43 +9,43 @@ define i64 @test0() { define i64 @test1() { ; CHECK-LABEL: test1: -; CHECK-ARM64: orr w0, wzr, #0x1 +; CHECK: orr w0, wzr, #0x1 ret i64 1 } define i64 @test2() { ; CHECK-LABEL: test2: -; CHECK-ARM64: orr w0, wzr, #0xffff +; CHECK: orr w0, wzr, #0xffff ret i64 65535 } define i64 @test3() { ; CHECK-LABEL: test3: -; CHECK-ARM64: orr w0, wzr, #0x10000 +; CHECK: orr w0, wzr, #0x10000 ret i64 65536 } define i64 @test4() { ; CHECK-LABEL: test4: -; CHECK-ARM64: orr w0, wzr, #0xffff0000 +; CHECK: orr w0, wzr, #0xffff0000 ret i64 4294901760 } define i64 @test5() { ; CHECK-LABEL: test5: -; CHECK-ARM64: orr x0, xzr, #0x100000000 +; CHECK: orr x0, xzr, #0x100000000 ret i64 4294967296 } define i64 @test6() { ; CHECK-LABEL: test6: -; CHECK-ARM64: orr x0, xzr, #0xffff00000000 +; CHECK: orr x0, xzr, #0xffff00000000 ret i64 281470681743360 } define i64 @test7() { ; CHECK-LABEL: test7: -; CHECK-ARM64: orr x0, xzr, #0x1000000000000 +; CHECK: orr x0, xzr, #0x1000000000000 ret i64 281474976710656 } @@ -75,35 +75,35 @@ define i64 @test10() { define void @test11() { ; CHECK-LABEL: test11: -; CHECK-ARM64: str wzr +; CHECK: str wzr store i32 0, i32* @var32 ret void } define void @test12() { ; CHECK-LABEL: test12: -; CHECK-ARM64: orr {{w[0-9]+}}, wzr, #0x1 +; CHECK: orr {{w[0-9]+}}, wzr, #0x1 store i32 1, i32* @var32 ret void } define void @test13() { ; CHECK-LABEL: test13: -; CHECK-ARM64: orr {{w[0-9]+}}, wzr, #0xffff +; CHECK: orr {{w[0-9]+}}, wzr, #0xffff store i32 65535, i32* @var32 ret void } define void @test14() { ; CHECK-LABEL: test14: -; CHECK-ARM64: orr {{w[0-9]+}}, wzr, #0x10000 +; CHECK: orr {{w[0-9]+}}, wzr, #0x10000 store i32 65536, i32* @var32 ret void } define void @test15() { ; CHECK-LABEL: test15: -; CHECK-ARM64: orr {{w[0-9]+}}, wzr, #0xffff0000 +; CHECK: orr {{w[0-9]+}}, wzr, #0xffff0000 store i32 4294901760, i32* @var32 ret void } @@ -119,6 +119,6 @@ define i64 @test17() { ; CHECK-LABEL: test17: ; Mustn't MOVN w0 here. -; CHECK-ARM64: orr x0, xzr, #0xfffffffffffffffd +; CHECK: orr x0, xzr, #0xfffffffffffffffd ret i64 -3 } diff --git a/test/CodeGen/AArch64/movw-shift-encoding.ll b/test/CodeGen/AArch64/movw-shift-encoding.ll index 2fe9dd4516e5..178fccce333b 100644 --- a/test/CodeGen/AArch64/movw-shift-encoding.ll +++ b/test/CodeGen/AArch64/movw-shift-encoding.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-linux-gnu < %s -show-mc-encoding -code-model=large | FileCheck %s --check-prefix=CHECK-ARM64 +; RUN: llc -mtriple=aarch64-linux-gnu < %s -show-mc-encoding -code-model=large | FileCheck %s @var = global i32 0 @@ -8,8 +8,8 @@ define i32* @get_var() { ret i32* @var -; CHECK-ARM64: movz x0, #:abs_g3:var // encoding: [0bAAA00000,A,0b111AAAAA,0xd2] -; CHECK-ARM64: movk x0, #:abs_g2_nc:var // encoding: [0bAAA00000,A,0b110AAAAA,0xf2] -; CHECK-ARM64: movk x0, #:abs_g1_nc:var // encoding: [0bAAA00000,A,0b101AAAAA,0xf2] -; CHECK-ARM64: movk x0, #:abs_g0_nc:var // encoding: [0bAAA00000,A,0b100AAAAA,0xf2] +; CHECK: movz x0, #:abs_g3:var // encoding: [0bAAA00000,A,0b111AAAAA,0xd2] +; CHECK: movk x0, #:abs_g2_nc:var // encoding: [0bAAA00000,A,0b110AAAAA,0xf2] +; CHECK: movk x0, #:abs_g1_nc:var // encoding: [0bAAA00000,A,0b101AAAAA,0xf2] +; CHECK: movk x0, #:abs_g0_nc:var // encoding: [0bAAA00000,A,0b100AAAAA,0xf2] } diff --git a/test/CodeGen/AArch64/neon-bitcast.ll b/test/CodeGen/AArch64/neon-bitcast.ll index b70cda3175ad..61099d48fdd2 100644 --- a/test/CodeGen/AArch64/neon-bitcast.ll +++ b/test/CodeGen/AArch64/neon-bitcast.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s ; From <8 x i8> diff --git a/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/test/CodeGen/AArch64/neon-bitwise-instructions.ll index dfaf1f251792..6497856c7d36 100644 --- a/test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ b/test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i8> @and8xi8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: and8xi8: diff --git a/test/CodeGen/AArch64/neon-compare-instructions.ll b/test/CodeGen/AArch64/neon-compare-instructions.ll index b99057ebf2bc..6d89dfbacf41 100644 --- a/test/CodeGen/AArch64/neon-compare-instructions.ll +++ b/test/CodeGen/AArch64/neon-compare-instructions.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s define <8 x i8> @cmeq8xi8(<8 x i8> %A, <8 x i8> %B) { ; CHECK-LABEL: cmeq8xi8: diff --git a/test/CodeGen/AArch64/neon-diagnostics.ll b/test/CodeGen/AArch64/neon-diagnostics.ll index e28df29f3e85..099b6856cec0 100644 --- a/test/CodeGen/AArch64/neon-diagnostics.ll +++ b/test/CodeGen/AArch64/neon-diagnostics.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { ; CHECK: test_vfma_lane_f32: diff --git a/test/CodeGen/AArch64/neon-extract.ll b/test/CodeGen/AArch64/neon-extract.ll index 96b4084a2574..f270b54abb46 100644 --- a/test/CodeGen/AArch64/neon-extract.ll +++ b/test/CodeGen/AArch64/neon-extract.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i8> @test_vext_s8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: test_vext_s8: diff --git a/test/CodeGen/AArch64/neon-fma.ll b/test/CodeGen/AArch64/neon-fma.ll index 6df494dedaee..af70302ca939 100644 --- a/test/CodeGen/AArch64/neon-fma.ll +++ b/test/CodeGen/AArch64/neon-fma.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s define <2 x float> @fmla2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) { ;CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s diff --git a/test/CodeGen/AArch64/neon-fpround_f128.ll b/test/CodeGen/AArch64/neon-fpround_f128.ll index e48dbbaec929..a93f3f2723c3 100644 --- a/test/CodeGen/AArch64/neon-fpround_f128.ll +++ b/test/CodeGen/AArch64/neon-fpround_f128.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s define <1 x double> @test_fpround_v1f128(<1 x fp128>* %a) { ; CHECK-LABEL: test_fpround_v1f128: diff --git a/test/CodeGen/AArch64/neon-idiv.ll b/test/CodeGen/AArch64/neon-idiv.ll index 11e1af7e143e..de402c4780be 100644 --- a/test/CodeGen/AArch64/neon-idiv.ll +++ b/test/CodeGen/AArch64/neon-idiv.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-none-linux-gnu < %s -mattr=+neon | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -mattr=+neon | FileCheck %s define <4 x i32> @test1(<4 x i32> %a) { %rem = srem <4 x i32> %a, diff --git a/test/CodeGen/AArch64/neon-mla-mls.ll b/test/CodeGen/AArch64/neon-mla-mls.ll index e7bff748ad37..71bb0e70abfa 100644 --- a/test/CodeGen/AArch64/neon-mla-mls.ll +++ b/test/CodeGen/AArch64/neon-mla-mls.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i8> @mla8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) { diff --git a/test/CodeGen/AArch64/neon-mov.ll b/test/CodeGen/AArch64/neon-mov.ll index b7baf25f807a..40649aeb1b8e 100644 --- a/test/CodeGen/AArch64/neon-mov.ll +++ b/test/CodeGen/AArch64/neon-mov.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK define <8 x i8> @movi8b() { ; CHECK-LABEL: movi8b: @@ -14,75 +14,75 @@ define <16 x i8> @movi16b() { define <2 x i32> @movi2s_lsl0() { ; CHECK-LABEL: movi2s_lsl0: -; CHECK-ARM64: movi {{d[0-9]+}}, #0x0000ff000000ff +; CHECK: movi {{d[0-9]+}}, #0x0000ff000000ff ret <2 x i32> < i32 255, i32 255 > } define <2 x i32> @movi2s_lsl8() { ; CHECK-LABEL: movi2s_lsl8: -; CHECK-ARM64: movi {{d[0-9]+}}, #0x00ff000000ff00 +; CHECK: movi {{d[0-9]+}}, #0x00ff000000ff00 ret <2 x i32> < i32 65280, i32 65280 > } define <2 x i32> @movi2s_lsl16() { ; CHECK-LABEL: movi2s_lsl16: -; CHECK-ARM64: movi {{d[0-9]+}}, #0xff000000ff0000 +; CHECK: movi {{d[0-9]+}}, #0xff000000ff0000 ret <2 x i32> < i32 16711680, i32 16711680 > } define <2 x i32> @movi2s_lsl24() { ; CHECK-LABEL: movi2s_lsl24: -; CHECK-ARM64: movi {{d[0-9]+}}, #0xff000000ff000000 +; CHECK: movi {{d[0-9]+}}, #0xff000000ff000000 ret <2 x i32> < i32 4278190080, i32 4278190080 > } define <4 x i32> @movi4s_lsl0() { ; CHECK-LABEL: movi4s_lsl0: -; CHECK-ARM64: movi {{v[0-9]+}}.2d, #0x0000ff000000ff +; CHECK: movi {{v[0-9]+}}.2d, #0x0000ff000000ff ret <4 x i32> < i32 255, i32 255, i32 255, i32 255 > } define <4 x i32> @movi4s_lsl8() { ; CHECK-LABEL: movi4s_lsl8: -; CHECK-ARM64: movi {{v[0-9]+}}.2d, #0x00ff000000ff00 +; CHECK: movi {{v[0-9]+}}.2d, #0x00ff000000ff00 ret <4 x i32> < i32 65280, i32 65280, i32 65280, i32 65280 > } define <4 x i32> @movi4s_lsl16() { ; CHECK-LABEL: movi4s_lsl16: -; CHECK-ARM64: movi {{v[0-9]+}}.2d, #0xff000000ff0000 +; CHECK: movi {{v[0-9]+}}.2d, #0xff000000ff0000 ret <4 x i32> < i32 16711680, i32 16711680, i32 16711680, i32 16711680 > } define <4 x i32> @movi4s_lsl24() { ; CHECK-LABEL: movi4s_lsl24: -; CHECK-ARM64: movi {{v[0-9]+}}.2d, #0xff000000ff000000 +; CHECK: movi {{v[0-9]+}}.2d, #0xff000000ff000000 ret <4 x i32> < i32 4278190080, i32 4278190080, i32 4278190080, i32 4278190080 > } define <4 x i16> @movi4h_lsl0() { ; CHECK-LABEL: movi4h_lsl0: -; CHECK-ARM64: movi {{d[0-9]+}}, #0xff00ff00ff00ff +; CHECK: movi {{d[0-9]+}}, #0xff00ff00ff00ff ret <4 x i16> < i16 255, i16 255, i16 255, i16 255 > } define <4 x i16> @movi4h_lsl8() { ; CHECK-LABEL: movi4h_lsl8: -; CHECK-ARM64: movi d0, #0xff00ff00ff00ff00 +; CHECK: movi d0, #0xff00ff00ff00ff00 ret <4 x i16> < i16 65280, i16 65280, i16 65280, i16 65280 > } define <8 x i16> @movi8h_lsl0() { ; CHECK-LABEL: movi8h_lsl0: -; CHECK-ARM64: movi v0.2d, #0xff00ff00ff00ff +; CHECK: movi v0.2d, #0xff00ff00ff00ff ret <8 x i16> < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255 > } define <8 x i16> @movi8h_lsl8() { ; CHECK-LABEL: movi8h_lsl8: -; CHECK-ARM64: movi v0.2d, #0xff00ff00ff00ff00 +; CHECK: movi v0.2d, #0xff00ff00ff00ff00 ret <8 x i16> < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 > } @@ -164,26 +164,26 @@ define <8 x i16> @mvni8h_lsl8() { define <2 x i32> @movi2s_msl8(<2 x i32> %a) { ; CHECK-LABEL: movi2s_msl8: -; CHECK-ARM64: movi {{d[0-9]+}}, #0x00ffff0000ffff +; CHECK: movi {{d[0-9]+}}, #0x00ffff0000ffff ret <2 x i32> < i32 65535, i32 65535 > } define <2 x i32> @movi2s_msl16() { ; CHECK-LABEL: movi2s_msl16: -; CHECK-ARM64: movi d0, #0xffffff00ffffff +; CHECK: movi d0, #0xffffff00ffffff ret <2 x i32> < i32 16777215, i32 16777215 > } define <4 x i32> @movi4s_msl8() { ; CHECK-LABEL: movi4s_msl8: -; CHECK-ARM64: movi v0.2d, #0x00ffff0000ffff +; CHECK: movi v0.2d, #0x00ffff0000ffff ret <4 x i32> < i32 65535, i32 65535, i32 65535, i32 65535 > } define <4 x i32> @movi4s_msl16() { ; CHECK-LABEL: movi4s_msl16: -; CHECK-ARM64: movi v0.2d, #0xffffff00ffffff +; CHECK: movi v0.2d, #0xffffff00ffffff ret <4 x i32> < i32 16777215, i32 16777215, i32 16777215, i32 16777215 > } diff --git a/test/CodeGen/AArch64/neon-or-combine.ll b/test/CodeGen/AArch64/neon-or-combine.ll index d98c12802a0c..260f6935ddef 100644 --- a/test/CodeGen/AArch64/neon-or-combine.ll +++ b/test/CodeGen/AArch64/neon-or-combine.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s ; Check that the DAGCombiner does not crash with an assertion failure ; when performing a target specific combine to simplify a 'or' dag node diff --git a/test/CodeGen/AArch64/neon-perm.ll b/test/CodeGen/AArch64/neon-perm.ll index d45dde649e47..4f8571db7480 100644 --- a/test/CodeGen/AArch64/neon-perm.ll +++ b/test/CodeGen/AArch64/neon-perm.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK %struct.int8x8x2_t = type { [2 x <8 x i8>] } %struct.int16x4x2_t = type { [2 x <4 x i16>] } @@ -53,7 +53,7 @@ entry: define <2 x i32> @test_vuzp1_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vuzp1_s32: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %shuffle.i @@ -69,7 +69,7 @@ entry: define <2 x i64> @test_vuzp1q_s64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vuzp1q_s64: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i @@ -109,7 +109,7 @@ entry: define <2 x i32> @test_vuzp1_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vuzp1_u32: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %shuffle.i @@ -125,7 +125,7 @@ entry: define <2 x i64> @test_vuzp1q_u64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vuzp1q_u64: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i @@ -133,7 +133,7 @@ entry: define <2 x float> @test_vuzp1_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vuzp1_f32: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> ret <2 x float> %shuffle.i @@ -149,7 +149,7 @@ entry: define <2 x double> @test_vuzp1q_f64(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: test_vuzp1q_f64: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle.i @@ -221,7 +221,7 @@ entry: define <2 x i32> @test_vuzp2_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vuzp2_s32: -; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %shuffle.i @@ -237,7 +237,7 @@ entry: define <2 x i64> @test_vuzp2q_s64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vuzp2q_s64: -; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i @@ -277,7 +277,7 @@ entry: define <2 x i32> @test_vuzp2_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vuzp2_u32: -; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %shuffle.i @@ -293,7 +293,7 @@ entry: define <2 x i64> @test_vuzp2q_u64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vuzp2q_u64: -; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i @@ -301,7 +301,7 @@ entry: define <2 x float> @test_vuzp2_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vuzp2_f32: -; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> ret <2 x float> %shuffle.i @@ -317,7 +317,7 @@ entry: define <2 x double> @test_vuzp2q_f64(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: test_vuzp2q_f64: -; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle.i @@ -389,7 +389,7 @@ entry: define <2 x i32> @test_vzip1_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vzip1_s32: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %shuffle.i @@ -405,7 +405,7 @@ entry: define <2 x i64> @test_vzip1q_s64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vzip1q_s64: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i @@ -445,7 +445,7 @@ entry: define <2 x i32> @test_vzip1_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vzip1_u32: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %shuffle.i @@ -461,7 +461,7 @@ entry: define <2 x i64> @test_vzip1q_u64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vzip1q_u64: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i @@ -469,7 +469,7 @@ entry: define <2 x float> @test_vzip1_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vzip1_f32: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> ret <2 x float> %shuffle.i @@ -485,7 +485,7 @@ entry: define <2 x double> @test_vzip1q_f64(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: test_vzip1q_f64: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle.i @@ -557,7 +557,7 @@ entry: define <2 x i32> @test_vzip2_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vzip2_s32: -; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %shuffle.i @@ -573,7 +573,7 @@ entry: define <2 x i64> @test_vzip2q_s64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vzip2q_s64: -; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i @@ -613,7 +613,7 @@ entry: define <2 x i32> @test_vzip2_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vzip2_u32: -; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %shuffle.i @@ -629,7 +629,7 @@ entry: define <2 x i64> @test_vzip2q_u64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vzip2q_u64: -; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i @@ -637,7 +637,7 @@ entry: define <2 x float> @test_vzip2_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vzip2_f32: -; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> ret <2 x float> %shuffle.i @@ -653,7 +653,7 @@ entry: define <2 x double> @test_vzip2q_f64(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: test_vzip2q_f64: -; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle.i @@ -725,7 +725,7 @@ entry: define <2 x i32> @test_vtrn1_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vtrn1_s32: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %shuffle.i @@ -741,7 +741,7 @@ entry: define <2 x i64> @test_vtrn1q_s64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vtrn1q_s64: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i @@ -781,7 +781,7 @@ entry: define <2 x i32> @test_vtrn1_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vtrn1_u32: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %shuffle.i @@ -797,7 +797,7 @@ entry: define <2 x i64> @test_vtrn1q_u64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vtrn1q_u64: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i @@ -805,7 +805,7 @@ entry: define <2 x float> @test_vtrn1_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vtrn1_f32: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> ret <2 x float> %shuffle.i @@ -821,7 +821,7 @@ entry: define <2 x double> @test_vtrn1q_f64(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: test_vtrn1q_f64: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle.i @@ -893,7 +893,7 @@ entry: define <2 x i32> @test_vtrn2_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vtrn2_s32: -; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %shuffle.i @@ -909,7 +909,7 @@ entry: define <2 x i64> @test_vtrn2q_s64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vtrn2q_s64: -; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i @@ -949,7 +949,7 @@ entry: define <2 x i32> @test_vtrn2_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vtrn2_u32: -; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %shuffle.i @@ -965,7 +965,7 @@ entry: define <2 x i64> @test_vtrn2q_u64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vtrn2q_u64: -; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i @@ -973,7 +973,7 @@ entry: define <2 x float> @test_vtrn2_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vtrn2_f32: -; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> ret <2 x float> %shuffle.i @@ -989,7 +989,7 @@ entry: define <2 x double> @test_vtrn2q_f64(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: test_vtrn2q_f64: -; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle.i @@ -2494,8 +2494,8 @@ entry: define %struct.int32x2x2_t @test_vuzp_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vuzp_s32: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %vuzp.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> %vuzp1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> @@ -2530,8 +2530,8 @@ entry: define %struct.uint32x2x2_t @test_vuzp_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vuzp_u32: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %vuzp.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> %vuzp1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> @@ -2542,8 +2542,8 @@ entry: define %struct.float32x2x2_t @test_vuzp_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vuzp_f32: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %vuzp.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> %vuzp1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> @@ -2710,8 +2710,8 @@ entry: define %struct.int32x2x2_t @test_vzip_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vzip_s32: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %vzip.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> %vzip1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> @@ -2746,8 +2746,8 @@ entry: define %struct.uint32x2x2_t @test_vzip_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vzip_u32: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %vzip.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> %vzip1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> @@ -2758,8 +2758,8 @@ entry: define %struct.float32x2x2_t @test_vzip_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vzip_f32: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %vzip.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> %vzip1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> @@ -2926,8 +2926,8 @@ entry: define %struct.int32x2x2_t @test_vtrn_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vtrn_s32: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %vtrn.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> %vtrn1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> @@ -2962,8 +2962,8 @@ entry: define %struct.uint32x2x2_t @test_vtrn_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vtrn_u32: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %vtrn.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> %vtrn1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> @@ -2974,8 +2974,8 @@ entry: define %struct.float32x2x2_t @test_vtrn_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vtrn_f32: -; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %vtrn.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> %vtrn1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> diff --git a/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll b/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll index 6cfdc5be1314..32f59626b381 100644 --- a/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll +++ b/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s declare float @llvm.fma.f32(float, float, float) declare double @llvm.fma.f64(double, double, double) diff --git a/test/CodeGen/AArch64/neon-scalar-copy.ll b/test/CodeGen/AArch64/neon-scalar-copy.ll index ab7ea661b406..a01df3275a99 100644 --- a/test/CodeGen/AArch64/neon-scalar-copy.ll +++ b/test/CodeGen/AArch64/neon-scalar-copy.ll @@ -1,9 +1,9 @@ -; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefix=CHECK define float @test_dup_sv2S(<2 x float> %v) { ; CHECK-LABEL: test_dup_sv2S - ; CHECK-ARM64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] + ; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] %tmp1 = extractelement <2 x float> %v, i32 1 ret float %tmp1 } @@ -37,14 +37,14 @@ define double @test_dup_dvD(<1 x double> %v) { define double @test_dup_dv2D(<2 x double> %v) { ; CHECK-LABEL: test_dup_dv2D - ; CHECK-ARM64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] + ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] %tmp1 = extractelement <2 x double> %v, i32 1 ret double %tmp1 } define double @test_dup_dv2D_0(<2 x double> %v) { ; CHECK-LABEL: test_dup_dv2D_0 - ; CHECK-ARM64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] + ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] ; CHECK: ret %tmp1 = extractelement <2 x double> %v, i32 1 ret double %tmp1 @@ -88,7 +88,7 @@ define <1 x i32> @test_vector_dup_sv2S(<2 x i32> %v1) { define <1 x i64> @test_vector_dup_dv2D(<2 x i64> %v1) { ; CHECK-LABEL: test_vector_dup_dv2D - ; CHECK-ARM64: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #8 + ; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #8 %shuffle.i = shufflevector <2 x i64> %v1, <2 x i64> undef, <1 x i32> ret <1 x i64> %shuffle.i } diff --git a/test/CodeGen/AArch64/neon-shift-left-long.ll b/test/CodeGen/AArch64/neon-shift-left-long.ll index 1d9c92c999d9..d10d551805a6 100644 --- a/test/CodeGen/AArch64/neon-shift-left-long.ll +++ b/test/CodeGen/AArch64/neon-shift-left-long.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i16> @test_sshll_v8i8(<8 x i8> %a) { ; CHECK: test_sshll_v8i8: diff --git a/test/CodeGen/AArch64/neon-truncStore-extLoad.ll b/test/CodeGen/AArch64/neon-truncStore-extLoad.ll index f15cd24e5d42..1df3719c8867 100644 --- a/test/CodeGen/AArch64/neon-truncStore-extLoad.ll +++ b/test/CodeGen/AArch64/neon-truncStore-extLoad.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s ; A vector TruncStore can not be selected. ; Test a trunc IR and a vector store IR can be selected correctly. diff --git a/test/CodeGen/AArch64/pic-eh-stubs.ll b/test/CodeGen/AArch64/pic-eh-stubs.ll index d2697910e6f7..e8c762504fc9 100644 --- a/test/CodeGen/AArch64/pic-eh-stubs.ll +++ b/test/CodeGen/AArch64/pic-eh-stubs.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s ; RUN: llc -mtriple=arm64_be-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s ; Make sure exception-handling PIC code can be linked correctly. An alternative diff --git a/test/CodeGen/AArch64/regress-f128csel-flags.ll b/test/CodeGen/AArch64/regress-f128csel-flags.ll index 313cdb1bf0c4..25b5e0c5f776 100644 --- a/test/CodeGen/AArch64/regress-f128csel-flags.ll +++ b/test/CodeGen/AArch64/regress-f128csel-flags.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s ; We used to not mark NZCV as being used in the continuation basic-block ; when lowering a 128-bit "select" to branches. This meant a subsequent use diff --git a/test/CodeGen/AArch64/regress-fp128-livein.ll b/test/CodeGen/AArch64/regress-fp128-livein.ll index 141c0d862f6a..5e6ab0a9675b 100644 --- a/test/CodeGen/AArch64/regress-fp128-livein.ll +++ b/test/CodeGen/AArch64/regress-fp128-livein.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -o - %s +; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s ; Regression test for NZCV reg live-in not being added to fp128csel IfTrue BB, ; causing a crash during live range calc. diff --git a/test/CodeGen/AArch64/regress-tblgen-chains.ll b/test/CodeGen/AArch64/regress-tblgen-chains.ll index 55c3bcdcdd46..477d99625eec 100644 --- a/test/CodeGen/AArch64/regress-tblgen-chains.ll +++ b/test/CodeGen/AArch64/regress-tblgen-chains.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s --check-prefix CHECK-ARM64 +; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s ; When generating DAG selection tables, TableGen used to only flag an ; instruction as needing a chain on its own account if it had a built-in pattern @@ -12,7 +12,7 @@ declare void @bar(i8*) define i64 @test_chains() { -; CHECK-ARM64-LABEL: test_chains: +; CHECK-LABEL: test_chains: %locvar = alloca i8 @@ -25,13 +25,13 @@ define i64 @test_chains() { %inc.4 = trunc i64 %inc.3 to i8 store i8 %inc.4, i8* %locvar -; CHECK-ARM64: ldurb {{w[0-9]+}}, [x29, [[LOCADDR:#-?[0-9]+]]] -; CHECK-ARM64: add {{w[0-9]+}}, {{w[0-9]+}}, #1 -; CHECK-ARM64: sturb {{w[0-9]+}}, [x29, [[LOCADDR]]] -; CHECK-ARM64: ldurb {{w[0-9]+}}, [x29, [[LOCADDR]]] +; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR:#-?[0-9]+]]] +; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #1 +; CHECK: sturb {{w[0-9]+}}, [x29, [[LOCADDR]]] +; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR]]] %ret.1 = load i8* %locvar %ret.2 = zext i8 %ret.1 to i64 ret i64 %ret.2 -; CHECK-ARM64: ret +; CHECK: ret } diff --git a/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll b/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll index cc42b0c9df41..c3167e4f4bdd 100644 --- a/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll +++ b/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s @var = global i32 0 declare void @bar() diff --git a/test/CodeGen/AArch64/setcc-takes-i32.ll b/test/CodeGen/AArch64/setcc-takes-i32.ll index f06c8ecd28da..ec8615910cf0 100644 --- a/test/CodeGen/AArch64/setcc-takes-i32.ll +++ b/test/CodeGen/AArch64/setcc-takes-i32.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -mtriple=arm64-none-linux-gnu -o - %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -o - %s | FileCheck %s ; Most important point here is that the promotion of the i1 works ; correctly. Previously LLVM thought that i64 was the appropriate SetCC output, diff --git a/test/CodeGen/AArch64/sibling-call.ll b/test/CodeGen/AArch64/sibling-call.ll index 85245718afc0..34e3bb410e8c 100644 --- a/test/CodeGen/AArch64/sibling-call.ll +++ b/test/CodeGen/AArch64/sibling-call.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -arm64-load-store-opt=0 | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -aarch64-load-store-opt=0 | FileCheck %s declare void @callee_stack0() declare void @callee_stack8([8 x i32], i64) diff --git a/test/CodeGen/AArch64/sincos-expansion.ll b/test/CodeGen/AArch64/sincos-expansion.ll index 5ba1d8d0a834..c3a172dfb427 100644 --- a/test/CodeGen/AArch64/sincos-expansion.ll +++ b/test/CodeGen/AArch64/sincos-expansion.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s define float @test_sincos_f32(float %f) { %sin = call float @sinf(float %f) readnone diff --git a/test/CodeGen/AArch64/sincospow-vector-expansion.ll b/test/CodeGen/AArch64/sincospow-vector-expansion.ll index 38c8bb2d5e35..22f33a83394b 100644 --- a/test/CodeGen/AArch64/sincospow-vector-expansion.ll +++ b/test/CodeGen/AArch64/sincospow-vector-expansion.ll @@ -1,4 +1,4 @@ -; RUN: llc -o - %s -verify-machineinstrs -mtriple=arm64-linux-gnu -mattr=+neon | FileCheck %s +; RUN: llc -o - %s -verify-machineinstrs -mtriple=aarch64-linux-gnu -mattr=+neon | FileCheck %s define <2 x float> @test_cos_v2f64(<2 x double> %v1) { diff --git a/test/CodeGen/AArch64/tail-call.ll b/test/CodeGen/AArch64/tail-call.ll index b3841fac68ab..8aab84215260 100644 --- a/test/CodeGen/AArch64/tail-call.ll +++ b/test/CodeGen/AArch64/tail-call.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -tailcallopt | FileCheck --check-prefix=CHECK-ARM64 %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s declare fastcc void @callee_stack0() declare fastcc void @callee_stack8([8 x i32], i64) @@ -8,91 +8,59 @@ define fastcc void @caller_to0_from0() nounwind { ; CHECK-LABEL: caller_to0_from0: ; CHECK-NEXT: // BB -; CHECK-ARM64-LABEL: caller_to0_from0: -; CHECK-ARM64-NEXT: // BB - tail call fastcc void @callee_stack0() ret void ; CHECK-NEXT: b callee_stack0 - -; CHECK-ARM64-NEXT: b callee_stack0 } define fastcc void @caller_to0_from8([8 x i32], i64) { ; CHECK-LABEL: caller_to0_from8: -; CHECK-ARM64-LABEL: caller_to0_from8: - tail call fastcc void @callee_stack0() ret void ; CHECK: add sp, sp, #16 ; CHECK-NEXT: b callee_stack0 - -; CHECK-ARM64: add sp, sp, #16 -; CHECK-ARM64-NEXT: b callee_stack0 } define fastcc void @caller_to8_from0() { ; CHECK-LABEL: caller_to8_from0: ; CHECK: sub sp, sp, #32 -; CHECK-ARM64-LABEL: caller_to8_from0: -; CHECK-ARM64: sub sp, sp, #32 - ; Key point is that the "42" should go #16 below incoming stack ; pointer (we didn't have arg space to reuse). tail call fastcc void @callee_stack8([8 x i32] undef, i64 42) ret void -; CHECK: str {{x[0-9]+}}, [sp, #16] -; CHECK-NEXT: add sp, sp, #16 +; CHECK: str {{x[0-9]+}}, [sp, #16]! ; CHECK-NEXT: b callee_stack8 - -; CHECK-ARM64: str {{x[0-9]+}}, [sp, #16]! -; CHECK-ARM64-NEXT: b callee_stack8 } define fastcc void @caller_to8_from8([8 x i32], i64 %a) { ; CHECK-LABEL: caller_to8_from8: ; CHECK: sub sp, sp, #16 -; CHECK-ARM64-LABEL: caller_to8_from8: -; CHECK-ARM64: sub sp, sp, #16 - ; Key point is that the "%a" should go where at SP on entry. tail call fastcc void @callee_stack8([8 x i32] undef, i64 42) ret void -; CHECK: str {{x[0-9]+}}, [sp, #16] -; CHECK-NEXT: add sp, sp, #16 +; CHECK: str {{x[0-9]+}}, [sp, #16]! ; CHECK-NEXT: b callee_stack8 - -; CHECK-ARM64: str {{x[0-9]+}}, [sp, #16]! -; CHECK-ARM64-NEXT: b callee_stack8 } define fastcc void @caller_to16_from8([8 x i32], i64 %a) { ; CHECK-LABEL: caller_to16_from8: ; CHECK: sub sp, sp, #16 -; CHECK-ARM64-LABEL: caller_to16_from8: -; CHECK-ARM64: sub sp, sp, #16 - ; Important point is that the call reuses the "dead" argument space ; above %a on the stack. If it tries to go below incoming-SP then the ; callee will not deallocate the space, even in fastcc. tail call fastcc void @callee_stack16([8 x i32] undef, i64 42, i64 2) -; CHECK: str {{x[0-9]+}}, [sp, #24] -; CHECK: str {{x[0-9]+}}, [sp, #16] +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: b callee_stack16 - -; CHECK-ARM64: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] -; CHECK-ARM64-NEXT: add sp, sp, #16 -; CHECK-ARM64-NEXT: b callee_stack16 ret void } @@ -101,19 +69,12 @@ define fastcc void @caller_to8_from24([8 x i32], i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: caller_to8_from24: ; CHECK: sub sp, sp, #16 -; CHECK-ARM64-LABEL: caller_to8_from24: -; CHECK-ARM64: sub sp, sp, #16 - ; Key point is that the "%a" should go where at #16 above SP on entry. tail call fastcc void @callee_stack8([8 x i32] undef, i64 42) ret void -; CHECK: str {{x[0-9]+}}, [sp, #32] -; CHECK-NEXT: add sp, sp, #32 +; CHECK: str {{x[0-9]+}}, [sp, #32]! ; CHECK-NEXT: b callee_stack8 - -; CHECK-ARM64: str {{x[0-9]+}}, [sp, #32]! -; CHECK-ARM64-NEXT: b callee_stack8 } @@ -121,24 +82,13 @@ define fastcc void @caller_to16_from16([8 x i32], i64 %a, i64 %b) { ; CHECK-LABEL: caller_to16_from16: ; CHECK: sub sp, sp, #16 -; CHECK-ARM64-LABEL: caller_to16_from16: -; CHECK-ARM64: sub sp, sp, #16 - ; Here we want to make sure that both loads happen before the stores: ; otherwise either %a or %b will be wrongly clobbered. tail call fastcc void @callee_stack16([8 x i32] undef, i64 %b, i64 %a) ret void -; CHECK: ldr x0, -; CHECK: ldr x1, -; CHECK: str x1, -; CHECK: str x0, - +; CHECK: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: b callee_stack16 - -; CHECK-ARM64: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] -; CHECK-ARM64: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] -; CHECK-ARM64-NEXT: add sp, sp, #16 -; CHECK-ARM64-NEXT: b callee_stack16 } diff --git a/test/CodeGen/AArch64/zero-reg.ll b/test/CodeGen/AArch64/zero-reg.ll index 44072c67d904..bc112ab8db98 100644 --- a/test/CodeGen/AArch64/zero-reg.ll +++ b/test/CodeGen/AArch64/zero-reg.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s @var32 = global i32 0 @var64 = global i64 0 diff --git a/test/CodeGen/ARM64/compact-unwind-unhandled-cfi.S b/test/CodeGen/ARM64/compact-unwind-unhandled-cfi.S deleted file mode 100644 index 250732d6e842..000000000000 --- a/test/CodeGen/ARM64/compact-unwind-unhandled-cfi.S +++ /dev/null @@ -1,17 +0,0 @@ -; RUN: llvm-mc -triple arm64-apple-darwin -filetype=obj -o /dev/null %s - - .text - .globl _foo - .cfi_startproc -_foo: - stp x29, x30, [sp, #-16]! - .cfi_adjust_cfa_offset 16 - - ldp x29, x30, [sp], #16 - .cfi_adjust_cfa_offset -16 - .cfi_restore x29 - .cfi_restore x30 - - ret - - .cfi_endproc diff --git a/test/CodeGen/ARM64/fminv.ll b/test/CodeGen/ARM64/fminv.ll deleted file mode 100644 index ca706d897ca6..000000000000 --- a/test/CodeGen/ARM64/fminv.ll +++ /dev/null @@ -1,101 +0,0 @@ -; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s - -define float @test_fminv_v2f32(<2 x float> %in) { -; CHECK: test_fminv_v2f32: -; CHECK: fminp s0, v0.2s - %min = call float @llvm.arm64.neon.fminv.f32.v2f32(<2 x float> %in) - ret float %min -} - -define float @test_fminv_v4f32(<4 x float> %in) { -; CHECK: test_fminv_v4f32: -; CHECK: fminv s0, v0.4s - %min = call float @llvm.arm64.neon.fminv.f32.v4f32(<4 x float> %in) - ret float %min -} - -define double @test_fminv_v2f64(<2 x double> %in) { -; CHECK: test_fminv_v2f64: -; CHECK: fminp d0, v0.2d - %min = call double @llvm.arm64.neon.fminv.f64.v2f64(<2 x double> %in) - ret double %min -} - -declare float @llvm.arm64.neon.fminv.f32.v2f32(<2 x float>) -declare float @llvm.arm64.neon.fminv.f32.v4f32(<4 x float>) -declare double @llvm.arm64.neon.fminv.f64.v2f64(<2 x double>) - -define float @test_fmaxv_v2f32(<2 x float> %in) { -; CHECK: test_fmaxv_v2f32: -; CHECK: fmaxp s0, v0.2s - %max = call float @llvm.arm64.neon.fmaxv.f32.v2f32(<2 x float> %in) - ret float %max -} - -define float @test_fmaxv_v4f32(<4 x float> %in) { -; CHECK: test_fmaxv_v4f32: -; CHECK: fmaxv s0, v0.4s - %max = call float @llvm.arm64.neon.fmaxv.f32.v4f32(<4 x float> %in) - ret float %max -} - -define double @test_fmaxv_v2f64(<2 x double> %in) { -; CHECK: test_fmaxv_v2f64: -; CHECK: fmaxp d0, v0.2d - %max = call double @llvm.arm64.neon.fmaxv.f64.v2f64(<2 x double> %in) - ret double %max -} - -declare float @llvm.arm64.neon.fmaxv.f32.v2f32(<2 x float>) -declare float @llvm.arm64.neon.fmaxv.f32.v4f32(<4 x float>) -declare double @llvm.arm64.neon.fmaxv.f64.v2f64(<2 x double>) - -define float @test_fminnmv_v2f32(<2 x float> %in) { -; CHECK: test_fminnmv_v2f32: -; CHECK: fminnmp s0, v0.2s - %minnm = call float @llvm.arm64.neon.fminnmv.f32.v2f32(<2 x float> %in) - ret float %minnm -} - -define float @test_fminnmv_v4f32(<4 x float> %in) { -; CHECK: test_fminnmv_v4f32: -; CHECK: fminnmv s0, v0.4s - %minnm = call float @llvm.arm64.neon.fminnmv.f32.v4f32(<4 x float> %in) - ret float %minnm -} - -define double @test_fminnmv_v2f64(<2 x double> %in) { -; CHECK: test_fminnmv_v2f64: -; CHECK: fminnmp d0, v0.2d - %minnm = call double @llvm.arm64.neon.fminnmv.f64.v2f64(<2 x double> %in) - ret double %minnm -} - -declare float @llvm.arm64.neon.fminnmv.f32.v2f32(<2 x float>) -declare float @llvm.arm64.neon.fminnmv.f32.v4f32(<4 x float>) -declare double @llvm.arm64.neon.fminnmv.f64.v2f64(<2 x double>) - -define float @test_fmaxnmv_v2f32(<2 x float> %in) { -; CHECK: test_fmaxnmv_v2f32: -; CHECK: fmaxnmp s0, v0.2s - %maxnm = call float @llvm.arm64.neon.fmaxnmv.f32.v2f32(<2 x float> %in) - ret float %maxnm -} - -define float @test_fmaxnmv_v4f32(<4 x float> %in) { -; CHECK: test_fmaxnmv_v4f32: -; CHECK: fmaxnmv s0, v0.4s - %maxnm = call float @llvm.arm64.neon.fmaxnmv.f32.v4f32(<4 x float> %in) - ret float %maxnm -} - -define double @test_fmaxnmv_v2f64(<2 x double> %in) { -; CHECK: test_fmaxnmv_v2f64: -; CHECK: fmaxnmp d0, v0.2d - %maxnm = call double @llvm.arm64.neon.fmaxnmv.f64.v2f64(<2 x double> %in) - ret double %maxnm -} - -declare float @llvm.arm64.neon.fmaxnmv.f32.v2f32(<2 x float>) -declare float @llvm.arm64.neon.fmaxnmv.f32.v4f32(<4 x float>) -declare double @llvm.arm64.neon.fmaxnmv.f64.v2f64(<2 x double>) diff --git a/test/CodeGen/ARM64/simd-scalar-to-vector.ll b/test/CodeGen/ARM64/simd-scalar-to-vector.ll deleted file mode 100644 index 10e8d6c0ea41..000000000000 --- a/test/CodeGen/ARM64/simd-scalar-to-vector.ll +++ /dev/null @@ -1,22 +0,0 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -mcpu=cyclone | FileCheck %s -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -O0 -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST - -define <16 x i8> @foo(<16 x i8> %a) nounwind optsize readnone ssp { -; CHECK: uaddlv.16b h0, v0 -; CHECK: rshrn.8b v0, v0, #4 -; CHECK: dup.16b v0, v0[0] -; CHECK: ret - -; CHECK-FAST: uaddlv.16b -; CHECK-FAST: rshrn.8b -; CHECK-FAST: dup.16b - %tmp = tail call i32 @llvm.arm64.neon.uaddlv.i32.v16i8(<16 x i8> %a) nounwind - %tmp1 = trunc i32 %tmp to i16 - %tmp2 = insertelement <8 x i16> undef, i16 %tmp1, i32 0 - %tmp3 = tail call <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16> %tmp2, i32 4) - %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <16 x i32> zeroinitializer - ret <16 x i8> %tmp4 -} - -declare <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone -declare i32 @llvm.arm64.neon.uaddlv.i32.v16i8(<16 x i8>) nounwind readnone diff --git a/test/CodeGen/ARM64/tbl.ll b/test/CodeGen/ARM64/tbl.ll deleted file mode 100644 index e1edd21d8a24..000000000000 --- a/test/CodeGen/ARM64/tbl.ll +++ /dev/null @@ -1,132 +0,0 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s - -define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind { -; CHECK: tbl1_8b -; CHECK: tbl.8b - %tmp3 = call <8 x i8> @llvm.arm64.neon.tbl1.v8i8(<16 x i8> %A, <8 x i8> %B) - ret <8 x i8> %tmp3 -} - -define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind { -; CHECK: tbl1_16b -; CHECK: tbl.16b - %tmp3 = call <16 x i8> @llvm.arm64.neon.tbl1.v16i8(<16 x i8> %A, <16 x i8> %B) - ret <16 x i8> %tmp3 -} - -define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) { -; CHECK: tbl2_8b -; CHECK: tbl.8b - %tmp3 = call <8 x i8> @llvm.arm64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) - ret <8 x i8> %tmp3 -} - -define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { -; CHECK: tbl2_16b -; CHECK: tbl.16b - %tmp3 = call <16 x i8> @llvm.arm64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) - ret <16 x i8> %tmp3 -} - -define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { -; CHECK: tbl3_8b -; CHECK: tbl.8b - %tmp3 = call <8 x i8> @llvm.arm64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) - ret <8 x i8> %tmp3 -} - -define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { -; CHECK: tbl3_16b -; CHECK: tbl.16b - %tmp3 = call <16 x i8> @llvm.arm64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) - ret <16 x i8> %tmp3 -} - -define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { -; CHECK: tbl4_8b -; CHECK: tbl.8b - %tmp3 = call <8 x i8> @llvm.arm64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) - ret <8 x i8> %tmp3 -} - -define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { -; CHECK: tbl4_16b -; CHECK: tbl.16b - %tmp3 = call <16 x i8> @llvm.arm64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) - ret <16 x i8> %tmp3 -} - -declare <8 x i8> @llvm.arm64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone - -define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind { -; CHECK: tbx1_8b -; CHECK: tbx.8b - %tmp3 = call <8 x i8> @llvm.arm64.neon.tbx1.v8i8(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) - ret <8 x i8> %tmp3 -} - -define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind { -; CHECK: tbx1_16b -; CHECK: tbx.16b - %tmp3 = call <16 x i8> @llvm.arm64.neon.tbx1.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) - ret <16 x i8> %tmp3 -} - -define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { -; CHECK: tbx2_8b -; CHECK: tbx.8b - %tmp3 = call <8 x i8> @llvm.arm64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) - ret <8 x i8> %tmp3 -} - -define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { -; CHECK: tbx2_16b -; CHECK: tbx.16b - %tmp3 = call <16 x i8> @llvm.arm64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) - ret <16 x i8> %tmp3 -} - -define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { -; CHECK: tbx3_8b -; CHECK: tbx.8b - %tmp3 = call <8 x i8> @llvm.arm64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) - ret <8 x i8> %tmp3 -} - -define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { -; CHECK: tbx3_16b -; CHECK: tbx.16b - %tmp3 = call <16 x i8> @llvm.arm64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) - ret <16 x i8> %tmp3 -} - -define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) { -; CHECK: tbx4_8b -; CHECK: tbx.8b - %tmp3 = call <8 x i8> @llvm.arm64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) - ret <8 x i8> %tmp3 -} - -define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) { -; CHECK: tbx4_16b -; CHECK: tbx.16b - %tmp3 = call <16 x i8> @llvm.arm64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) - ret <16 x i8> %tmp3 -} - -declare <8 x i8> @llvm.arm64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i8> @llvm.arm64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone - diff --git a/test/CodeGen/ARM64/vcnt.ll b/test/CodeGen/ARM64/vcnt.ll deleted file mode 100644 index e00658a4bda2..000000000000 --- a/test/CodeGen/ARM64/vcnt.ll +++ /dev/null @@ -1,56 +0,0 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s - -define <8 x i8> @cls_8b(<8 x i8>* %A) nounwind { -;CHECK-LABEL: cls_8b: -;CHECK: cls.8b - %tmp1 = load <8 x i8>* %A - %tmp3 = call <8 x i8> @llvm.arm64.neon.cls.v8i8(<8 x i8> %tmp1) - ret <8 x i8> %tmp3 -} - -define <16 x i8> @cls_16b(<16 x i8>* %A) nounwind { -;CHECK-LABEL: cls_16b: -;CHECK: cls.16b - %tmp1 = load <16 x i8>* %A - %tmp3 = call <16 x i8> @llvm.arm64.neon.cls.v16i8(<16 x i8> %tmp1) - ret <16 x i8> %tmp3 -} - -define <4 x i16> @cls_4h(<4 x i16>* %A) nounwind { -;CHECK-LABEL: cls_4h: -;CHECK: cls.4h - %tmp1 = load <4 x i16>* %A - %tmp3 = call <4 x i16> @llvm.arm64.neon.cls.v4i16(<4 x i16> %tmp1) - ret <4 x i16> %tmp3 -} - -define <8 x i16> @cls_8h(<8 x i16>* %A) nounwind { -;CHECK-LABEL: cls_8h: -;CHECK: cls.8h - %tmp1 = load <8 x i16>* %A - %tmp3 = call <8 x i16> @llvm.arm64.neon.cls.v8i16(<8 x i16> %tmp1) - ret <8 x i16> %tmp3 -} - -define <2 x i32> @cls_2s(<2 x i32>* %A) nounwind { -;CHECK-LABEL: cls_2s: -;CHECK: cls.2s - %tmp1 = load <2 x i32>* %A - %tmp3 = call <2 x i32> @llvm.arm64.neon.cls.v2i32(<2 x i32> %tmp1) - ret <2 x i32> %tmp3 -} - -define <4 x i32> @cls_4s(<4 x i32>* %A) nounwind { -;CHECK-LABEL: cls_4s: -;CHECK: cls.4s - %tmp1 = load <4 x i32>* %A - %tmp3 = call <4 x i32> @llvm.arm64.neon.cls.v4i32(<4 x i32> %tmp1) - ret <4 x i32> %tmp3 -} - -declare <8 x i8> @llvm.arm64.neon.cls.v8i8(<8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm64.neon.cls.v16i8(<16 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.cls.v4i16(<4 x i16>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.cls.v8i16(<8 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.cls.v2i32(<2 x i32>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.cls.v4i32(<4 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM64/vcvt_n.ll b/test/CodeGen/ARM64/vcvt_n.ll deleted file mode 100644 index 46de557b0709..000000000000 --- a/test/CodeGen/ARM64/vcvt_n.ll +++ /dev/null @@ -1,49 +0,0 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s - -define <2 x float> @cvtf32fxpu(<2 x i32> %a) nounwind readnone ssp { -; CHECK-LABEL: cvtf32fxpu: -; CHECK: ucvtf.2s v0, v0, #9 -; CHECK: ret - %vcvt_n1 = tail call <2 x float> @llvm.arm64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %a, i32 9) - ret <2 x float> %vcvt_n1 -} - -define <2 x float> @cvtf32fxps(<2 x i32> %a) nounwind readnone ssp { -; CHECK-LABEL: cvtf32fxps: -; CHECK: scvtf.2s v0, v0, #12 -; CHECK: ret - %vcvt_n1 = tail call <2 x float> @llvm.arm64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %a, i32 12) - ret <2 x float> %vcvt_n1 -} - -define <4 x float> @cvtqf32fxpu(<4 x i32> %a) nounwind readnone ssp { -; CHECK-LABEL: cvtqf32fxpu: -; CHECK: ucvtf.4s v0, v0, #18 -; CHECK: ret - %vcvt_n1 = tail call <4 x float> @llvm.arm64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %a, i32 18) - ret <4 x float> %vcvt_n1 -} - -define <4 x float> @cvtqf32fxps(<4 x i32> %a) nounwind readnone ssp { -; CHECK-LABEL: cvtqf32fxps: -; CHECK: scvtf.4s v0, v0, #30 -; CHECK: ret - %vcvt_n1 = tail call <4 x float> @llvm.arm64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %a, i32 30) - ret <4 x float> %vcvt_n1 -} -define <2 x double> @f1(<2 x i64> %a) nounwind readnone ssp { - %vcvt_n1 = tail call <2 x double> @llvm.arm64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> %a, i32 12) - ret <2 x double> %vcvt_n1 -} - -define <2 x double> @f2(<2 x i64> %a) nounwind readnone ssp { - %vcvt_n1 = tail call <2 x double> @llvm.arm64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> %a, i32 9) - ret <2 x double> %vcvt_n1 -} - -declare <4 x float> @llvm.arm64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone -declare <4 x float> @llvm.arm64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone -declare <2 x float> @llvm.arm64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone -declare <2 x float> @llvm.arm64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone -declare <2 x double> @llvm.arm64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32) nounwind readnone -declare <2 x double> @llvm.arm64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32) nounwind readnone diff --git a/test/CodeGen/ARM64/vminmaxnm.ll b/test/CodeGen/ARM64/vminmaxnm.ll deleted file mode 100644 index 628640759a51..000000000000 --- a/test/CodeGen/ARM64/vminmaxnm.ll +++ /dev/null @@ -1,68 +0,0 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s - -define <2 x float> @f1(<2 x float> %a, <2 x float> %b) nounwind readnone ssp { -; CHECK: fmaxnm.2s v0, v0, v1 -; CHECK: ret - %vmaxnm2.i = tail call <2 x float> @llvm.arm64.neon.fmaxnm.v2f32(<2 x float> %a, <2 x float> %b) nounwind - ret <2 x float> %vmaxnm2.i -} - -define <4 x float> @f2(<4 x float> %a, <4 x float> %b) nounwind readnone ssp { -; CHECK: fmaxnm.4s v0, v0, v1 -; CHECK: ret - %vmaxnm2.i = tail call <4 x float> @llvm.arm64.neon.fmaxnm.v4f32(<4 x float> %a, <4 x float> %b) nounwind - ret <4 x float> %vmaxnm2.i -} - -define <2 x double> @f3(<2 x double> %a, <2 x double> %b) nounwind readnone ssp { -; CHECK: fmaxnm.2d v0, v0, v1 -; CHECK: ret - %vmaxnm2.i = tail call <2 x double> @llvm.arm64.neon.fmaxnm.v2f64(<2 x double> %a, <2 x double> %b) nounwind - ret <2 x double> %vmaxnm2.i -} - -define <2 x float> @f4(<2 x float> %a, <2 x float> %b) nounwind readnone ssp { -; CHECK: fminnm.2s v0, v0, v1 -; CHECK: ret - %vminnm2.i = tail call <2 x float> @llvm.arm64.neon.fminnm.v2f32(<2 x float> %a, <2 x float> %b) nounwind - ret <2 x float> %vminnm2.i -} - -define <4 x float> @f5(<4 x float> %a, <4 x float> %b) nounwind readnone ssp { -; CHECK: fminnm.4s v0, v0, v1 -; CHECK: ret - %vminnm2.i = tail call <4 x float> @llvm.arm64.neon.fminnm.v4f32(<4 x float> %a, <4 x float> %b) nounwind - ret <4 x float> %vminnm2.i -} - -define <2 x double> @f6(<2 x double> %a, <2 x double> %b) nounwind readnone ssp { -; CHECK: fminnm.2d v0, v0, v1 -; CHECK: ret - %vminnm2.i = tail call <2 x double> @llvm.arm64.neon.fminnm.v2f64(<2 x double> %a, <2 x double> %b) nounwind - ret <2 x double> %vminnm2.i -} - -declare <2 x double> @llvm.arm64.neon.fminnm.v2f64(<2 x double>, <2 x double>) nounwind readnone -declare <4 x float> @llvm.arm64.neon.fminnm.v4f32(<4 x float>, <4 x float>) nounwind readnone -declare <2 x float> @llvm.arm64.neon.fminnm.v2f32(<2 x float>, <2 x float>) nounwind readnone -declare <2 x double> @llvm.arm64.neon.fmaxnm.v2f64(<2 x double>, <2 x double>) nounwind readnone -declare <4 x float> @llvm.arm64.neon.fmaxnm.v4f32(<4 x float>, <4 x float>) nounwind readnone -declare <2 x float> @llvm.arm64.neon.fmaxnm.v2f32(<2 x float>, <2 x float>) nounwind readnone - - -define double @test_fmaxnmv(<2 x double> %in) { -; CHECK-LABEL: test_fmaxnmv: -; CHECK: fmaxnmp.2d d0, v0 - %max = call double @llvm.arm64.neon.fmaxnmv.f64.v2f64(<2 x double> %in) - ret double %max -} - -define double @test_fminnmv(<2 x double> %in) { -; CHECK-LABEL: test_fminnmv: -; CHECK: fminnmp.2d d0, v0 - %min = call double @llvm.arm64.neon.fminnmv.f64.v2f64(<2 x double> %in) - ret double %min -} - -declare double @llvm.arm64.neon.fmaxnmv.f64.v2f64(<2 x double>) -declare double @llvm.arm64.neon.fminnmv.f64.v2f64(<2 x double>) diff --git a/test/CodeGen/ARM64/vqsub.ll b/test/CodeGen/ARM64/vqsub.ll deleted file mode 100644 index 0afeb68348b6..000000000000 --- a/test/CodeGen/ARM64/vqsub.ll +++ /dev/null @@ -1,147 +0,0 @@ -; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s - -define <8 x i8> @sqsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: sqsub8b: -;CHECK: sqsub.8b - %tmp1 = load <8 x i8>* %A - %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.sqsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i8> %tmp3 -} - -define <4 x i16> @sqsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { -;CHECK-LABEL: sqsub4h: -;CHECK: sqsub.4h - %tmp1 = load <4 x i16>* %A - %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.sqsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i16> %tmp3 -} - -define <2 x i32> @sqsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { -;CHECK-LABEL: sqsub2s: -;CHECK: sqsub.2s - %tmp1 = load <2 x i32>* %A - %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.sqsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i32> %tmp3 -} - -define <8 x i8> @uqsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: uqsub8b: -;CHECK: uqsub.8b - %tmp1 = load <8 x i8>* %A - %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm64.neon.uqsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i8> %tmp3 -} - -define <4 x i16> @uqsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { -;CHECK-LABEL: uqsub4h: -;CHECK: uqsub.4h - %tmp1 = load <4 x i16>* %A - %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm64.neon.uqsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i16> %tmp3 -} - -define <2 x i32> @uqsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { -;CHECK-LABEL: uqsub2s: -;CHECK: uqsub.2s - %tmp1 = load <2 x i32>* %A - %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm64.neon.uqsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i32> %tmp3 -} - -define <16 x i8> @sqsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { -;CHECK-LABEL: sqsub16b: -;CHECK: sqsub.16b - %tmp1 = load <16 x i8>* %A - %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.sqsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) - ret <16 x i8> %tmp3 -} - -define <8 x i16> @sqsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: sqsub8h: -;CHECK: sqsub.8h - %tmp1 = load <8 x i16>* %A - %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.sqsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) - ret <8 x i16> %tmp3 -} - -define <4 x i32> @sqsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: sqsub4s: -;CHECK: sqsub.4s - %tmp1 = load <4 x i32>* %A - %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) - ret <4 x i32> %tmp3 -} - -define <2 x i64> @sqsub2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { -;CHECK-LABEL: sqsub2d: -;CHECK: sqsub.2d - %tmp1 = load <2 x i64>* %A - %tmp2 = load <2 x i64>* %B - %tmp3 = call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) - ret <2 x i64> %tmp3 -} - -define <16 x i8> @uqsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { -;CHECK-LABEL: uqsub16b: -;CHECK: uqsub.16b - %tmp1 = load <16 x i8>* %A - %tmp2 = load <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm64.neon.uqsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) - ret <16 x i8> %tmp3 -} - -define <8 x i16> @uqsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: uqsub8h: -;CHECK: uqsub.8h - %tmp1 = load <8 x i16>* %A - %tmp2 = load <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm64.neon.uqsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) - ret <8 x i16> %tmp3 -} - -define <4 x i32> @uqsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: uqsub4s: -;CHECK: uqsub.4s - %tmp1 = load <4 x i32>* %A - %tmp2 = load <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm64.neon.uqsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) - ret <4 x i32> %tmp3 -} - -define <2 x i64> @uqsub2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { -;CHECK-LABEL: uqsub2d: -;CHECK: uqsub.2d - %tmp1 = load <2 x i64>* %A - %tmp2 = load <2 x i64>* %B - %tmp3 = call <2 x i64> @llvm.arm64.neon.uqsub.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) - ret <2 x i64> %tmp3 -} - -declare <8 x i8> @llvm.arm64.neon.sqsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.sqsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.sqsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <1 x i64> @llvm.arm64.neon.sqsub.v1i64(<1 x i64>, <1 x i64>) nounwind readnone - -declare <8 x i8> @llvm.arm64.neon.uqsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm64.neon.uqsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm64.neon.uqsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <1 x i64> @llvm.arm64.neon.uqsub.v1i64(<1 x i64>, <1 x i64>) nounwind readnone - -declare <16 x i8> @llvm.arm64.neon.sqsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.sqsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) nounwind readnone - -declare <16 x i8> @llvm.arm64.neon.uqsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm64.neon.uqsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.uqsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm64.neon.uqsub.v2i64(<2 x i64>, <2 x i64>) nounwind readnone diff --git a/test/DebugInfo/ARM64/struct_by_value.ll b/test/DebugInfo/AArch64/struct_by_value.ll similarity index 100% rename from test/DebugInfo/ARM64/struct_by_value.ll rename to test/DebugInfo/AArch64/struct_by_value.ll diff --git a/test/MC/AArch64/adrp-relocation.s b/test/MC/AArch64/adrp-relocation.s index 03b930d53970..3bcef34e4f5d 100644 --- a/test/MC/AArch64/adrp-relocation.s +++ b/test/MC/AArch64/adrp-relocation.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=arm64-linux-gnu -filetype=obj -o - %s| llvm-readobj -r - | FileCheck %s +// RUN: llvm-mc -triple=aarch64-linux-gnu -filetype=obj -o - %s| llvm-readobj -r - | FileCheck %s .text // These should produce an ADRP/ADD pair to calculate the address of // testfn. The important point is that LLVM shouldn't think it can deal with the diff --git a/test/MC/ARM64/adr.s b/test/MC/AArch64/arm64-adr.s similarity index 88% rename from test/MC/ARM64/adr.s rename to test/MC/AArch64/arm64-adr.s index 3442225dfe3a..131e545d3bb5 100644 --- a/test/MC/ARM64/adr.s +++ b/test/MC/AArch64/arm64-adr.s @@ -8,9 +8,9 @@ adr x0, foo // CHECK: adr x0, #0 // encoding: [0x00,0x00,0x00,0x10] // CHECK: adr x0, #1 // encoding: [0x00,0x00,0x00,0x30] // CHECK: adr x0, .Ltmp0 // encoding: [A,A,A,0x10'A'] -// CHECK-NEXT: // fixup A - offset: 0, value: .Ltmp0, kind: fixup_arm64_pcrel_adr_imm21 +// CHECK-NEXT: // fixup A - offset: 0, value: .Ltmp0, kind: fixup_aarch64_pcrel_adr_imm21 // CHECK: adr x0, foo // encoding: [A,A,A,0x10'A'] -// CHECK-NEXT: // fixup A - offset: 0, value: foo, kind: fixup_arm64_pcrel_adr_imm21 +// CHECK-NEXT: // fixup A - offset: 0, value: foo, kind: fixup_aarch64_pcrel_adr_imm21 adrp x0, #0 adrp x0, #4096 @@ -19,9 +19,9 @@ adrp x0, foo // CHECK: adrp x0, #0 // encoding: [0x00,0x00,0x00,0x90] // CHECK: adrp x0, #4096 // encoding: [0x00,0x00,0x00,0xb0] // CHECK: adrp x0, .Ltmp0 // encoding: [A,A,A,0x90'A'] -// CHECK-NEXT: // fixup A - offset: 0, value: .Ltmp0, kind: fixup_arm64_pcrel_adrp_imm21 +// CHECK-NEXT: // fixup A - offset: 0, value: .Ltmp0, kind: fixup_aarch64_pcrel_adrp_imm21 // CHECK: adrp x0, foo // encoding: [A,A,A,0x90'A'] -// CHECK-NEXT: // fixup A - offset: 0, value: foo, kind: fixup_arm64_pcrel_adrp_imm21 +// CHECK-NEXT: // fixup A - offset: 0, value: foo, kind: fixup_aarch64_pcrel_adrp_imm21 adr x0, #0xffffffff adrp x0, #0xffffffff diff --git a/test/MC/ARM64/advsimd.s b/test/MC/AArch64/arm64-advsimd.s similarity index 100% rename from test/MC/ARM64/advsimd.s rename to test/MC/AArch64/arm64-advsimd.s diff --git a/test/MC/ARM64/aliases.s b/test/MC/AArch64/arm64-aliases.s similarity index 100% rename from test/MC/ARM64/aliases.s rename to test/MC/AArch64/arm64-aliases.s diff --git a/test/MC/ARM64/arithmetic-encoding.s b/test/MC/AArch64/arm64-arithmetic-encoding.s similarity index 100% rename from test/MC/ARM64/arithmetic-encoding.s rename to test/MC/AArch64/arm64-arithmetic-encoding.s diff --git a/test/MC/ARM64/arm64-fixup.s b/test/MC/AArch64/arm64-arm64-fixup.s similarity index 76% rename from test/MC/ARM64/arm64-fixup.s rename to test/MC/AArch64/arm64-arm64-fixup.s index eae6f68390ef..81306fb5ac06 100644 --- a/test/MC/ARM64/arm64-fixup.s +++ b/test/MC/AArch64/arm64-arm64-fixup.s @@ -3,8 +3,8 @@ foo: adr x3, Lbar ; CHECK: adr x3, Lbar ; encoding: [0x03'A',A,A,0x10'A'] -; CHECK: fixup A - offset: 0, value: Lbar, kind: fixup_arm64_pcrel_adr_imm21 +; CHECK: fixup A - offset: 0, value: Lbar, kind: fixup_aarch64_pcrel_adr_imm21 Lbar: adrp x3, _printf@page ; CHECK: adrp x3, _printf@PAGE ; encoding: [0x03'A',A,A,0x90'A'] -; CHECK: fixup A - offset: 0, value: _printf@PAGE, kind: fixup_arm64_pcrel_adrp_imm21 +; CHECK: fixup A - offset: 0, value: _printf@PAGE, kind: fixup_aarch64_pcrel_adrp_imm21 diff --git a/test/MC/ARM64/basic-a64-instructions.s b/test/MC/AArch64/arm64-basic-a64-instructions.s similarity index 100% rename from test/MC/ARM64/basic-a64-instructions.s rename to test/MC/AArch64/arm64-basic-a64-instructions.s diff --git a/test/MC/ARM64/be-datalayout.s b/test/MC/AArch64/arm64-be-datalayout.s similarity index 100% rename from test/MC/ARM64/be-datalayout.s rename to test/MC/AArch64/arm64-be-datalayout.s diff --git a/test/MC/ARM64/bitfield-encoding.s b/test/MC/AArch64/arm64-bitfield-encoding.s similarity index 100% rename from test/MC/ARM64/bitfield-encoding.s rename to test/MC/AArch64/arm64-bitfield-encoding.s diff --git a/test/MC/ARM64/branch-encoding.s b/test/MC/AArch64/arm64-branch-encoding.s similarity index 77% rename from test/MC/ARM64/branch-encoding.s rename to test/MC/AArch64/arm64-branch-encoding.s index ba8fb3d90187..48c2099012f6 100644 --- a/test/MC/ARM64/branch-encoding.s +++ b/test/MC/AArch64/arm64-branch-encoding.s @@ -20,7 +20,7 @@ foo: ; CHECK: encoding: [0x20,0x01,0x3f,0xd6] bl L1 ; CHECK: bl L1 ; encoding: [A,A,A,0b100101AA] -; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_call26 +; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_call26 ;----------------------------------------------------------------------------- ; Contitional branch instructions. @@ -28,52 +28,52 @@ foo: b L1 ; CHECK: b L1 ; encoding: [A,A,A,0b000101AA] -; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch26 +; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch26 b.eq L1 ; CHECK: b.eq L1 ; encoding: [0bAAA00000,A,A,0x54] -; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19 +; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19 b.ne L1 ; CHECK: b.ne L1 ; encoding: [0bAAA00001,A,A,0x54] -; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19 +; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19 b.cs L1 ; CHECK: b.hs L1 ; encoding: [0bAAA00010,A,A,0x54] -; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19 +; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19 b.cc L1 ; CHECK: b.lo L1 ; encoding: [0bAAA00011,A,A,0x54] -; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19 +; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19 b.mi L1 ; CHECK: b.mi L1 ; encoding: [0bAAA00100,A,A,0x54] -; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19 +; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19 b.pl L1 ; CHECK: b.pl L1 ; encoding: [0bAAA00101,A,A,0x54] -; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19 +; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19 b.vs L1 ; CHECK: b.vs L1 ; encoding: [0bAAA00110,A,A,0x54] -; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19 +; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19 b.vc L1 ; CHECK: b.vc L1 ; encoding: [0bAAA00111,A,A,0x54] -; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19 +; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19 b.hi L1 ; CHECK: b.hi L1 ; encoding: [0bAAA01000,A,A,0x54] -; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19 +; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19 b.ls L1 ; CHECK: b.ls L1 ; encoding: [0bAAA01001,A,A,0x54] -; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19 +; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19 b.ge L1 ; CHECK: b.ge L1 ; encoding: [0bAAA01010,A,A,0x54] -; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19 +; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19 b.lt L1 ; CHECK: b.lt L1 ; encoding: [0bAAA01011,A,A,0x54] -; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19 +; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19 b.gt L1 ; CHECK: b.gt L1 ; encoding: [0bAAA01100,A,A,0x54] -; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19 +; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19 b.le L1 ; CHECK: b.le L1 ; encoding: [0bAAA01101,A,A,0x54] -; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19 +; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19 b.al L1 ; CHECK: b.al L1 ; encoding: [0bAAA01110,A,A,0x54] -; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19 +; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19 L1: b #28 ; CHECK: b #28 diff --git a/test/MC/ARM64/condbr-without-dots.s b/test/MC/AArch64/arm64-condbr-without-dots.s similarity index 100% rename from test/MC/ARM64/condbr-without-dots.s rename to test/MC/AArch64/arm64-condbr-without-dots.s diff --git a/test/MC/ARM64/crypto.s b/test/MC/AArch64/arm64-crypto.s similarity index 100% rename from test/MC/ARM64/crypto.s rename to test/MC/AArch64/arm64-crypto.s diff --git a/test/MC/ARM64/diagno-predicate.s b/test/MC/AArch64/arm64-diagno-predicate.s similarity index 100% rename from test/MC/ARM64/diagno-predicate.s rename to test/MC/AArch64/arm64-diagno-predicate.s diff --git a/test/MC/ARM64/diags.s b/test/MC/AArch64/arm64-diags.s similarity index 99% rename from test/MC/ARM64/diags.s rename to test/MC/AArch64/arm64-diags.s index 3ff2b54998f7..cf00e9826e16 100644 --- a/test/MC/ARM64/diags.s +++ b/test/MC/AArch64/arm64-diags.s @@ -8,7 +8,7 @@ foo: ldr x3, (foo + 4) ldr x3, [foo + 4] ; CHECK: ldr x3, foo+4 ; encoding: [0bAAA00011,A,A,0x58] -; CHECK: ; fixup A - offset: 0, value: foo+4, kind: fixup_arm64_ldr_pcrel_imm19 +; CHECK: ; fixup A - offset: 0, value: foo+4, kind: fixup_aarch64_ldr_pcrel_imm19 ; CHECK-ERRORS: error: invalid operand for instruction ; The last argument should be flagged as an error. rdar://9576009 diff --git a/test/MC/ARM64/directive_loh.s b/test/MC/AArch64/arm64-directive_loh.s similarity index 100% rename from test/MC/ARM64/directive_loh.s rename to test/MC/AArch64/arm64-directive_loh.s diff --git a/test/MC/ARM64/elf-reloc-condbr.s b/test/MC/AArch64/arm64-elf-reloc-condbr.s similarity index 100% rename from test/MC/ARM64/elf-reloc-condbr.s rename to test/MC/AArch64/arm64-elf-reloc-condbr.s diff --git a/test/MC/ARM64/elf-relocs.s b/test/MC/AArch64/arm64-elf-relocs.s similarity index 100% rename from test/MC/ARM64/elf-relocs.s rename to test/MC/AArch64/arm64-elf-relocs.s diff --git a/test/MC/ARM64/fp-encoding.s b/test/MC/AArch64/arm64-fp-encoding.s similarity index 100% rename from test/MC/ARM64/fp-encoding.s rename to test/MC/AArch64/arm64-fp-encoding.s diff --git a/test/MC/ARM64/large-relocs.s b/test/MC/AArch64/arm64-large-relocs.s similarity index 83% rename from test/MC/ARM64/large-relocs.s rename to test/MC/AArch64/arm64-large-relocs.s index 48aea43d6bcb..2a0cfa222862 100644 --- a/test/MC/ARM64/large-relocs.s +++ b/test/MC/AArch64/arm64-large-relocs.s @@ -4,9 +4,9 @@ movz x2, #:abs_g0:sym movk w3, #:abs_g0_nc:sym // CHECK: movz x2, #:abs_g0:sym // encoding: [0bAAA00010,A,0b100AAAAA,0xd2] -// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g0:sym, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g0:sym, kind: fixup_aarch64_movw // CHECK: movk w3, #:abs_g0_nc:sym // encoding: [0bAAA00011,A,0b100AAAAA,0x72] -// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g0_nc:sym, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g0_nc:sym, kind: fixup_aarch64_movw // CHECK-OBJ: 0 R_AARCH64_MOVW_UABS_G0 sym // CHECK-OBJ: 4 R_AARCH64_MOVW_UABS_G0_NC sym @@ -14,9 +14,9 @@ movz x4, #:abs_g1:sym movk w5, #:abs_g1_nc:sym // CHECK: movz x4, #:abs_g1:sym // encoding: [0bAAA00100,A,0b101AAAAA,0xd2] -// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g1:sym, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g1:sym, kind: fixup_aarch64_movw // CHECK: movk w5, #:abs_g1_nc:sym // encoding: [0bAAA00101,A,0b101AAAAA,0x72] -// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g1_nc:sym, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g1_nc:sym, kind: fixup_aarch64_movw // CHECK-OBJ: 8 R_AARCH64_MOVW_UABS_G1 sym // CHECK-OBJ: c R_AARCH64_MOVW_UABS_G1_NC sym @@ -24,15 +24,15 @@ movz x6, #:abs_g2:sym movk x7, #:abs_g2_nc:sym // CHECK: movz x6, #:abs_g2:sym // encoding: [0bAAA00110,A,0b110AAAAA,0xd2] -// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g2:sym, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g2:sym, kind: fixup_aarch64_movw // CHECK: movk x7, #:abs_g2_nc:sym // encoding: [0bAAA00111,A,0b110AAAAA,0xf2] -// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g2_nc:sym, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g2_nc:sym, kind: fixup_aarch64_movw // CHECK-OBJ: 10 R_AARCH64_MOVW_UABS_G2 sym // CHECK-OBJ: 14 R_AARCH64_MOVW_UABS_G2_NC sym movz x8, #:abs_g3:sym // CHECK: movz x8, #:abs_g3:sym // encoding: [0bAAA01000,A,0b111AAAAA,0xd2] -// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_aarch64_movw // CHECK-OBJ: 18 R_AARCH64_MOVW_UABS_G3 sym diff --git a/test/MC/ARM64/leaf-compact-unwind.s b/test/MC/AArch64/arm64-leaf-compact-unwind.s similarity index 100% rename from test/MC/ARM64/leaf-compact-unwind.s rename to test/MC/AArch64/arm64-leaf-compact-unwind.s diff --git a/test/MC/ARM64/logical-encoding.s b/test/MC/AArch64/arm64-logical-encoding.s similarity index 100% rename from test/MC/ARM64/logical-encoding.s rename to test/MC/AArch64/arm64-logical-encoding.s diff --git a/test/MC/ARM64/mapping-across-sections.s b/test/MC/AArch64/arm64-mapping-across-sections.s similarity index 100% rename from test/MC/ARM64/mapping-across-sections.s rename to test/MC/AArch64/arm64-mapping-across-sections.s diff --git a/test/MC/ARM64/mapping-within-section.s b/test/MC/AArch64/arm64-mapping-within-section.s similarity index 100% rename from test/MC/ARM64/mapping-within-section.s rename to test/MC/AArch64/arm64-mapping-within-section.s diff --git a/test/MC/ARM64/memory.s b/test/MC/AArch64/arm64-memory.s similarity index 100% rename from test/MC/ARM64/memory.s rename to test/MC/AArch64/arm64-memory.s diff --git a/test/MC/ARM64/nv-cond.s b/test/MC/AArch64/arm64-nv-cond.s similarity index 100% rename from test/MC/ARM64/nv-cond.s rename to test/MC/AArch64/arm64-nv-cond.s diff --git a/test/MC/ARM64/optional-hash.s b/test/MC/AArch64/arm64-optional-hash.s similarity index 100% rename from test/MC/ARM64/optional-hash.s rename to test/MC/AArch64/arm64-optional-hash.s diff --git a/test/MC/ARM64/separator.s b/test/MC/AArch64/arm64-separator.s similarity index 100% rename from test/MC/ARM64/separator.s rename to test/MC/AArch64/arm64-separator.s diff --git a/test/MC/ARM64/simd-ldst.s b/test/MC/AArch64/arm64-simd-ldst.s similarity index 100% rename from test/MC/ARM64/simd-ldst.s rename to test/MC/AArch64/arm64-simd-ldst.s diff --git a/test/MC/ARM64/small-data-fixups.s b/test/MC/AArch64/arm64-small-data-fixups.s similarity index 100% rename from test/MC/ARM64/small-data-fixups.s rename to test/MC/AArch64/arm64-small-data-fixups.s diff --git a/test/MC/ARM64/spsel-sysreg.s b/test/MC/AArch64/arm64-spsel-sysreg.s similarity index 100% rename from test/MC/ARM64/spsel-sysreg.s rename to test/MC/AArch64/arm64-spsel-sysreg.s diff --git a/test/MC/ARM64/system-encoding.s b/test/MC/AArch64/arm64-system-encoding.s similarity index 100% rename from test/MC/ARM64/system-encoding.s rename to test/MC/AArch64/arm64-system-encoding.s diff --git a/test/MC/ARM64/target-specific-sysreg.s b/test/MC/AArch64/arm64-target-specific-sysreg.s similarity index 100% rename from test/MC/ARM64/target-specific-sysreg.s rename to test/MC/AArch64/arm64-target-specific-sysreg.s diff --git a/test/MC/ARM64/tls-modifiers-darwin.s b/test/MC/AArch64/arm64-tls-modifiers-darwin.s similarity index 100% rename from test/MC/ARM64/tls-modifiers-darwin.s rename to test/MC/AArch64/arm64-tls-modifiers-darwin.s diff --git a/test/MC/ARM64/tls-relocs.s b/test/MC/AArch64/arm64-tls-relocs.s similarity index 82% rename from test/MC/ARM64/tls-relocs.s rename to test/MC/AArch64/arm64-tls-relocs.s index 681f616d909d..96c2b55c36d8 100644 --- a/test/MC/ARM64/tls-relocs.s +++ b/test/MC/AArch64/arm64-tls-relocs.s @@ -9,14 +9,14 @@ movz x15, #:gottprel_g1:var // CHECK: movz x15, #:gottprel_g1:var // encoding: [0bAAA01111,A,0b101AAAAA,0x92] -// CHECK-NEXT: // fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_aarch64_movw // CHECK-ELF: {{0x[0-9A-F]+}} R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 [[VARSYM:[^ ]+]] movk x13, #:gottprel_g0_nc:var // CHECK: movk x13, #:gottprel_g0_nc:var // encoding: [0bAAA01101,A,0b100AAAAA,0xf2] -// CHECK-NEXT: // fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_aarch64_movw // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC [[VARSYM]] @@ -25,11 +25,11 @@ ldr x10, [x0, #:gottprel_lo12:var] ldr x9, :gottprel:var // CHECK: adrp x11, :gottprel:var // encoding: [0x0b'A',A,A,0x90'A'] -// CHECK-NEXT: // fixup A - offset: 0, value: :gottprel:var, kind: fixup_arm64_pcrel_adrp_imm21 +// CHECK-NEXT: // fixup A - offset: 0, value: :gottprel:var, kind: fixup_aarch64_pcrel_adrp_imm21 // CHECK: ldr x10, [x0, :gottprel_lo12:var] // encoding: [0x0a,0bAAAAAA00,0b01AAAAAA,0xf9] -// CHECK-NEXT: // fixup A - offset: 0, value: :gottprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale8 +// CHECK-NEXT: // fixup A - offset: 0, value: :gottprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8 // CHECK: ldr x9, :gottprel:var // encoding: [0bAAA01001,A,A,0x58] -// CHECK-NEXT: // fixup A - offset: 0, value: :gottprel:var, kind: fixup_arm64_ldr_pcrel_imm19 +// CHECK-NEXT: // fixup A - offset: 0, value: :gottprel:var, kind: fixup_aarch64_ldr_pcrel_imm19 // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 [[VARSYM]] // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC [[VARSYM]] @@ -43,9 +43,9 @@ movz x3, #:tprel_g2:var movn x4, #:tprel_g2:var // CHECK: movz x3, #:tprel_g2:var // encoding: [0bAAA00011,A,0b110AAAAA,0x92] -// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_aarch64_movw // CHECK: movn x4, #:tprel_g2:var // encoding: [0bAAA00100,A,0b110AAAAA,0x92] -// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_aarch64_movw // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G2 [[VARSYM]] // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G2 [[VARSYM]] @@ -55,11 +55,11 @@ movn x6, #:tprel_g1:var movz w7, #:tprel_g1:var // CHECK: movz x5, #:tprel_g1:var // encoding: [0bAAA00101,A,0b101AAAAA,0x92] -// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw // CHECK: movn x6, #:tprel_g1:var // encoding: [0bAAA00110,A,0b101AAAAA,0x92] -// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw // CHECK: movz w7, #:tprel_g1:var // encoding: [0bAAA00111,A,0b101AAAAA,0x12] -// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G1 [[VARSYM]] // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G1 [[VARSYM]] @@ -69,9 +69,9 @@ movk x9, #:tprel_g1_nc:var movk w10, #:tprel_g1_nc:var // CHECK: movk x9, #:tprel_g1_nc:var // encoding: [0bAAA01001,A,0b101AAAAA,0xf2] -// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_aarch64_movw // CHECK: movk w10, #:tprel_g1_nc:var // encoding: [0bAAA01010,A,0b101AAAAA,0x72] -// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_aarch64_movw // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G1_NC [[VARSYM]] // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G1_NC [[VARSYM]] @@ -81,11 +81,11 @@ movn x12, #:tprel_g0:var movz w13, #:tprel_g0:var // CHECK: movz x11, #:tprel_g0:var // encoding: [0bAAA01011,A,0b100AAAAA,0x92] -// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw // CHECK: movn x12, #:tprel_g0:var // encoding: [0bAAA01100,A,0b100AAAAA,0x92] -// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw // CHECK: movz w13, #:tprel_g0:var // encoding: [0bAAA01101,A,0b100AAAAA,0x12] -// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G0 [[VARSYM]] // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G0 [[VARSYM]] @@ -95,9 +95,9 @@ movk x15, #:tprel_g0_nc:var movk w16, #:tprel_g0_nc:var // CHECK: movk x15, #:tprel_g0_nc:var // encoding: [0bAAA01111,A,0b100AAAAA,0xf2] -// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_aarch64_movw // CHECK: movk w16, #:tprel_g0_nc:var // encoding: [0bAAA10000,A,0b100AAAAA,0x72] -// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_aarch64_movw // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G0_NC [[VARSYM]] // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G0_NC [[VARSYM]] @@ -105,14 +105,14 @@ add x21, x22, #:tprel_lo12:var // CHECK: add x21, x22, :tprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91] -// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_add_imm12 +// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_add_imm12 // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLE_ADD_TPREL_LO12 [[VARSYM]] add x25, x26, #:tprel_lo12_nc:var // CHECK: add x25, x26, :tprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91] -// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_add_imm12 +// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_add_imm12 // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLE_ADD_TPREL_LO12_NC [[VARSYM]] @@ -120,9 +120,9 @@ ldrb w29, [x30, #:tprel_lo12:var] ldrsb x29, [x28, #:tprel_lo12_nc:var] // CHECK: ldrb w29, [x30, :tprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39] -// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale1 +// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale1 // CHECK: ldrsb x29, [x28, :tprel_lo12_nc:var] // encoding: [0x9d,0bAAAAAA11,0b10AAAAAA,0x39] -// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale1 +// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale1 // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST8_TPREL_LO12 [[VARSYM]] // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC [[VARSYM]] @@ -131,9 +131,9 @@ strh w27, [x26, #:tprel_lo12:var] ldrsh x25, [x24, #:tprel_lo12_nc:var] // CHECK: strh w27, [x26, :tprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79] -// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale2 +// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale2 // CHECK: ldrsh x25, [x24, :tprel_lo12_nc:var] // encoding: [0x19,0bAAAAAA11,0b10AAAAAA,0x79] -// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale2 +// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale2 // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST16_TPREL_LO12 [[VARSYM]] // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC [[VARSYM]] @@ -142,9 +142,9 @@ ldr w23, [x22, #:tprel_lo12:var] ldrsw x21, [x20, #:tprel_lo12_nc:var] // CHECK: ldr w23, [x22, :tprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9] -// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale4 +// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale4 // CHECK: ldrsw x21, [x20, :tprel_lo12_nc:var] // encoding: [0x95,0bAAAAAA10,0b10AAAAAA,0xb9] -// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale4 +// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale4 // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST32_TPREL_LO12 [[VARSYM]] // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC [[VARSYM]] @@ -152,9 +152,9 @@ ldr x19, [x18, #:tprel_lo12:var] str x17, [x16, #:tprel_lo12_nc:var] // CHECK: ldr x19, [x18, :tprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9] -// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale8 +// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8 // CHECK: str x17, [x16, :tprel_lo12_nc:var] // encoding: [0x11,0bAAAAAA10,0b00AAAAAA,0xf9] -// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale8 +// CHECK-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale8 // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST64_TPREL_LO12 [[VARSYM]] // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC [[VARSYM]] @@ -167,9 +167,9 @@ movz x3, #:dtprel_g2:var movn x4, #:dtprel_g2:var // CHECK: movz x3, #:dtprel_g2:var // encoding: [0bAAA00011,A,0b110AAAAA,0x92] -// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_aarch64_movw // CHECK: movn x4, #:dtprel_g2:var // encoding: [0bAAA00100,A,0b110AAAAA,0x92] -// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_aarch64_movw // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G2 [[VARSYM]] // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G2 [[VARSYM]] @@ -179,11 +179,11 @@ movn x6, #:dtprel_g1:var movz w7, #:dtprel_g1:var // CHECK: movz x5, #:dtprel_g1:var // encoding: [0bAAA00101,A,0b101AAAAA,0x92] -// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw // CHECK: movn x6, #:dtprel_g1:var // encoding: [0bAAA00110,A,0b101AAAAA,0x92] -// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw // CHECK: movz w7, #:dtprel_g1:var // encoding: [0bAAA00111,A,0b101AAAAA,0x12] -// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G1 [[VARSYM]] // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G1 [[VARSYM]] @@ -193,9 +193,9 @@ movk x9, #:dtprel_g1_nc:var movk w10, #:dtprel_g1_nc:var // CHECK: movk x9, #:dtprel_g1_nc:var // encoding: [0bAAA01001,A,0b101AAAAA,0xf2] -// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_aarch64_movw // CHECK: movk w10, #:dtprel_g1_nc:var // encoding: [0bAAA01010,A,0b101AAAAA,0x72] -// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_aarch64_movw // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC [[VARSYM]] // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC [[VARSYM]] @@ -205,11 +205,11 @@ movn x12, #:dtprel_g0:var movz w13, #:dtprel_g0:var // CHECK: movz x11, #:dtprel_g0:var // encoding: [0bAAA01011,A,0b100AAAAA,0x92] -// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw // CHECK: movn x12, #:dtprel_g0:var // encoding: [0bAAA01100,A,0b100AAAAA,0x92] -// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw // CHECK: movz w13, #:dtprel_g0:var // encoding: [0bAAA01101,A,0b100AAAAA,0x12] -// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G0 [[VARSYM]] // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G0 [[VARSYM]] @@ -219,9 +219,9 @@ movk x15, #:dtprel_g0_nc:var movk w16, #:dtprel_g0_nc:var // CHECK: movk x15, #:dtprel_g0_nc:var // encoding: [0bAAA01111,A,0b100AAAAA,0xf2] -// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_aarch64_movw // CHECK: movk w16, #:dtprel_g0_nc:var // encoding: [0bAAA10000,A,0b100AAAAA,0x72] -// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_arm64_movw +// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_aarch64_movw // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC [[VARSYM]] // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC [[VARSYM]] @@ -229,14 +229,14 @@ add x21, x22, #:dtprel_lo12:var // CHECK: add x21, x22, :dtprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91] -// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_add_imm12 +// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_add_imm12 // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLD_ADD_DTPREL_LO12 [[VARSYM]] add x25, x26, #:dtprel_lo12_nc:var // CHECK: add x25, x26, :dtprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91] -// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_add_imm12 +// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_add_imm12 // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC [[VARSYM]] @@ -244,9 +244,9 @@ ldrb w29, [x30, #:dtprel_lo12:var] ldrsb x29, [x28, #:dtprel_lo12_nc:var] // CHECK: ldrb w29, [x30, :dtprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39] -// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale1 +// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale1 // CHECK: ldrsb x29, [x28, :dtprel_lo12_nc:var] // encoding: [0x9d,0bAAAAAA11,0b10AAAAAA,0x39] -// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale1 +// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale1 // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST8_DTPREL_LO12 [[VARSYM]] // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC [[VARSYM]] @@ -255,9 +255,9 @@ strh w27, [x26, #:dtprel_lo12:var] ldrsh x25, [x24, #:dtprel_lo12_nc:var] // CHECK: strh w27, [x26, :dtprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79] -// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale2 +// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale2 // CHECK: ldrsh x25, [x24, :dtprel_lo12_nc:var] // encoding: [0x19,0bAAAAAA11,0b10AAAAAA,0x79] -// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale2 +// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale2 // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST16_DTPREL_LO12 [[VARSYM]] // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC [[VARSYM]] @@ -266,9 +266,9 @@ ldr w23, [x22, #:dtprel_lo12:var] ldrsw x21, [x20, #:dtprel_lo12_nc:var] // CHECK: ldr w23, [x22, :dtprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9] -// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale4 +// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale4 // CHECK: ldrsw x21, [x20, :dtprel_lo12_nc:var] // encoding: [0x95,0bAAAAAA10,0b10AAAAAA,0xb9] -// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale4 +// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale4 // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST32_DTPREL_LO12 [[VARSYM]] // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC [[VARSYM]] @@ -276,9 +276,9 @@ ldr x19, [x18, #:dtprel_lo12:var] str x17, [x16, #:dtprel_lo12_nc:var] // CHECK: ldr x19, [x18, :dtprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9] -// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale8 +// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8 // CHECK: str x17, [x16, :dtprel_lo12_nc:var] // encoding: [0x11,0bAAAAAA10,0b00AAAAAA,0xf9] -// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale8 +// CHECK-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale8 // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST64_DTPREL_LO12 [[VARSYM]] // CHECK-ELF-NEXT: {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC [[VARSYM]] @@ -294,13 +294,13 @@ blr x3 // CHECK: adrp x8, :tlsdesc:var // encoding: [0x08'A',A,A,0x90'A'] -// CHECK-NEXT: // fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_arm64_pcrel_adrp_imm21 +// CHECK-NEXT: // fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_aarch64_pcrel_adrp_imm21 // CHECK: ldr x7, [x6, :tlsdesc_lo12:var] // encoding: [0xc7,0bAAAAAA00,0b01AAAAAA,0xf9] -// CHECK-NEXT: // fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_arm64_ldst_imm12_scale8 +// CHECK-NEXT: // fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8 // CHECK: add x5, x4, :tlsdesc_lo12:var // encoding: [0x85,0bAAAAAA00,0b00AAAAAA,0x91] -// CHECK-NEXT: // fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_arm64_add_imm12 +// CHECK-NEXT: // fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_aarch64_add_imm12 // CHECK: .tlsdesccall var // encoding: [] -// CHECK-NEXT: // fixup A - offset: 0, value: var, kind: fixup_arm64_tlsdesc_call +// CHECK-NEXT: // fixup A - offset: 0, value: var, kind: fixup_aarch64_tlsdesc_call // CHECK: blr x3 // encoding: [0x60,0x00,0x3f,0xd6] diff --git a/test/MC/ARM64/v128_lo-diagnostics.s b/test/MC/AArch64/arm64-v128_lo-diagnostics.s similarity index 100% rename from test/MC/ARM64/v128_lo-diagnostics.s rename to test/MC/AArch64/arm64-v128_lo-diagnostics.s diff --git a/test/MC/ARM64/variable-exprs.s b/test/MC/AArch64/arm64-variable-exprs.s similarity index 100% rename from test/MC/ARM64/variable-exprs.s rename to test/MC/AArch64/arm64-variable-exprs.s diff --git a/test/MC/ARM64/vector-lists.s b/test/MC/AArch64/arm64-vector-lists.s similarity index 100% rename from test/MC/ARM64/vector-lists.s rename to test/MC/AArch64/arm64-vector-lists.s diff --git a/test/MC/ARM64/verbose-vector-case.s b/test/MC/AArch64/arm64-verbose-vector-case.s similarity index 100% rename from test/MC/ARM64/verbose-vector-case.s rename to test/MC/AArch64/arm64-verbose-vector-case.s diff --git a/test/MC/AArch64/basic-a64-diagnostics.s b/test/MC/AArch64/basic-a64-diagnostics.s index c6cb6b01f8a6..a4a3b1379c9b 100644 --- a/test/MC/AArch64/basic-a64-diagnostics.s +++ b/test/MC/AArch64/basic-a64-diagnostics.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -triple arm64-none-linux-gnu < %s 2> %t +// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2> %t // RUN: FileCheck --check-prefix=CHECK-ERROR --check-prefix=CHECK-ERROR-ARM64 < %t %s //------------------------------------------------------------------------------ diff --git a/test/MC/AArch64/basic-a64-instructions.s b/test/MC/AArch64/basic-a64-instructions.s index 72156bc9c51c..a12968b04e30 100644 --- a/test/MC/AArch64/basic-a64-instructions.s +++ b/test/MC/AArch64/basic-a64-instructions.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -show-encoding -mattr=+fp-armv8 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+fp-armv8 < %s | FileCheck %s .globl _func // Check that the assembler can handle the documented syntax from the ARM ARM. @@ -127,7 +127,7 @@ _func: // CHECK: adds w19, w17, w1, uxtx // encoding: [0x33,0x62,0x21,0x2b] // CHECK: adds w2, w5, w1, sxtb #1 // encoding: [0xa2,0x84,0x21,0x2b] // CHECK: adds w26, wsp, w19, sxth // encoding: [0xfa,0xa3,0x33,0x2b] -// CHECK-ARM64: cmn w2, w3, sxtw // encoding: [0x5f,0xc0,0x23,0x2b] +// CHECK: cmn w2, w3, sxtw // encoding: [0x5f,0xc0,0x23,0x2b] // CHECK: adds w2, w3, w5, sxtx // encoding: [0x62,0xe0,0x25,0x2b] // subs @@ -255,7 +255,7 @@ _func: // CHECK: sub sp, x3, x7, lsl #4 // encoding: [0x7f,0x70,0x27,0xcb] // CHECK: add w2, wsp, w3, lsl #1 // encoding: [0xe2,0x47,0x23,0x0b] // CHECK: cmp wsp, w9 // encoding: [0xff,0x43,0x29,0x6b] -// CHECK-ARM64: cmn wsp, w3, lsl #4 // encoding: [0xff,0x53,0x23,0x2b] +// CHECK: cmn wsp, w3, lsl #4 // encoding: [0xff,0x53,0x23,0x2b] // CHECK: subs x3, sp, x9, lsl #2 // encoding: [0xe3,0x6b,0x29,0xeb] //------------------------------------------------------------------------------ @@ -349,8 +349,8 @@ _func: // A relocation check (default to lo12, which is the only sane relocation anyway really) add x0, x4, #:lo12:var -// CHECK-ARM64: add x0, x4, :lo12:var // encoding: [0x80,0bAAAAAA00,0b00AAAAAA,0x91] -// CHECK-ARM64: // fixup A - offset: 0, value: :lo12:var, kind: fixup_arm64_add_imm12 +// CHECK: add x0, x4, :lo12:var // encoding: [0x80,0bAAAAAA00,0b00AAAAAA,0x91] +// CHECK: // fixup A - offset: 0, value: :lo12:var, kind: fixup_aarch64_add_imm12 //------------------------------------------------------------------------------ // Add-sub (shifted register) @@ -484,7 +484,7 @@ _func: sub w4, w6, wzr // CHECK: sub w3, w5, w7 // encoding: [0xa3,0x00,0x07,0x4b] // CHECK: sub wzr, w3, w5 // encoding: [0x7f,0x00,0x05,0x4b] -// CHECK-ARM64: neg w20, w4 // encoding: [0xf4,0x03,0x04,0x4b] +// CHECK: neg w20, w4 // encoding: [0xf4,0x03,0x04,0x4b] // CHECK: sub w4, w6, wzr // encoding: [0xc4,0x00,0x1f,0x4b] sub w11, w13, w15, lsl #0 @@ -514,7 +514,7 @@ _func: sub x4, x6, xzr // CHECK: sub x3, x5, x7 // encoding: [0xa3,0x00,0x07,0xcb] // CHECK: sub xzr, x3, x5 // encoding: [0x7f,0x00,0x05,0xcb] -// CHECK-ARM64: neg x20, x4 // encoding: [0xf4,0x03,0x04,0xcb] +// CHECK: neg x20, x4 // encoding: [0xf4,0x03,0x04,0xcb] // CHECK: sub x4, x6, xzr // encoding: [0xc4,0x00,0x1f,0xcb] sub x11, x13, x15, lsl #0 @@ -544,7 +544,7 @@ _func: subs w4, w6, wzr // CHECK: subs w3, w5, w7 // encoding: [0xa3,0x00,0x07,0x6b] // CHECK: {{subs wzr,|cmp}} w3, w5 // encoding: [0x7f,0x00,0x05,0x6b] -// CHECK-ARM64: negs w20, w4 // encoding: [0xf4,0x03,0x04,0x6b] +// CHECK: negs w20, w4 // encoding: [0xf4,0x03,0x04,0x6b] // CHECK: subs w4, w6, wzr // encoding: [0xc4,0x00,0x1f,0x6b] subs w11, w13, w15, lsl #0 @@ -574,7 +574,7 @@ _func: subs x4, x6, xzr // CHECK: subs x3, x5, x7 // encoding: [0xa3,0x00,0x07,0xeb] // CHECK: {{subs xzr,|cmp}} x3, x5 // encoding: [0x7f,0x00,0x05,0xeb] -// CHECK-ARM64: negs x20, x4 // encoding: [0xf4,0x03,0x04,0xeb] +// CHECK: negs x20, x4 // encoding: [0xf4,0x03,0x04,0xeb] // CHECK: subs x4, x6, xzr // encoding: [0xc4,0x00,0x1f,0xeb] subs x11, x13, x15, lsl #0 @@ -713,17 +713,17 @@ _func: neg w29, w30 neg w30, wzr neg wzr, w0 -// CHECK-ARM64: neg w29, w30 // encoding: [0xfd,0x03,0x1e,0x4b] -// CHECK-ARM64: neg w30, wzr // encoding: [0xfe,0x03,0x1f,0x4b] -// CHECK-ARM64: neg wzr, w0 // encoding: [0xff,0x03,0x00,0x4b] +// CHECK: neg w29, w30 // encoding: [0xfd,0x03,0x1e,0x4b] +// CHECK: neg w30, wzr // encoding: [0xfe,0x03,0x1f,0x4b] +// CHECK: neg wzr, w0 // encoding: [0xff,0x03,0x00,0x4b] neg w28, w27, lsl #0 neg w26, w25, lsl #29 neg w24, w23, lsl #31 -// CHECK-ARM64: neg w28, w27 // encoding: [0xfc,0x03,0x1b,0x4b] -// CHECK-ARM64: neg w26, w25, lsl #29 // encoding: [0xfa,0x77,0x19,0x4b] -// CHECK-ARM64: neg w24, w23, lsl #31 // encoding: [0xf8,0x7f,0x17,0x4b] +// CHECK: neg w28, w27 // encoding: [0xfc,0x03,0x1b,0x4b] +// CHECK: neg w26, w25, lsl #29 // encoding: [0xfa,0x77,0x19,0x4b] +// CHECK: neg w24, w23, lsl #31 // encoding: [0xf8,0x7f,0x17,0x4b] neg w22, w21, lsr #0 neg w20, w19, lsr #1 @@ -742,17 +742,17 @@ _func: neg x29, x30 neg x30, xzr neg xzr, x0 -// CHECK-ARM64: neg x29, x30 // encoding: [0xfd,0x03,0x1e,0xcb] -// CHECK-ARM64: neg x30, xzr // encoding: [0xfe,0x03,0x1f,0xcb] -// CHECK-ARM64: neg xzr, x0 // encoding: [0xff,0x03,0x00,0xcb] +// CHECK: neg x29, x30 // encoding: [0xfd,0x03,0x1e,0xcb] +// CHECK: neg x30, xzr // encoding: [0xfe,0x03,0x1f,0xcb] +// CHECK: neg xzr, x0 // encoding: [0xff,0x03,0x00,0xcb] neg x28, x27, lsl #0 neg x26, x25, lsl #29 neg x24, x23, lsl #31 -// CHECK-ARM64: neg x28, x27 // encoding: [0xfc,0x03,0x1b,0xcb] -// CHECK-ARM64: neg x26, x25, lsl #29 // encoding: [0xfa,0x77,0x19,0xcb] -// CHECK-ARM64: neg x24, x23, lsl #31 // encoding: [0xf8,0x7f,0x17,0xcb] +// CHECK: neg x28, x27 // encoding: [0xfc,0x03,0x1b,0xcb] +// CHECK: neg x26, x25, lsl #29 // encoding: [0xfa,0x77,0x19,0xcb] +// CHECK: neg x24, x23, lsl #31 // encoding: [0xf8,0x7f,0x17,0xcb] neg x22, x21, lsr #0 neg x20, x19, lsr #1 @@ -771,17 +771,17 @@ _func: negs w29, w30 negs w30, wzr negs wzr, w0 -// CHECK-ARM64: negs w29, w30 // encoding: [0xfd,0x03,0x1e,0x6b] -// CHECK-ARM64: negs w30, wzr // encoding: [0xfe,0x03,0x1f,0x6b] -// CHECK-ARM64: cmp wzr, w0 // encoding: [0xff,0x03,0x00,0x6b] +// CHECK: negs w29, w30 // encoding: [0xfd,0x03,0x1e,0x6b] +// CHECK: negs w30, wzr // encoding: [0xfe,0x03,0x1f,0x6b] +// CHECK: cmp wzr, w0 // encoding: [0xff,0x03,0x00,0x6b] negs w28, w27, lsl #0 negs w26, w25, lsl #29 negs w24, w23, lsl #31 -// CHECK-ARM64: negs w28, w27 // encoding: [0xfc,0x03,0x1b,0x6b] -// CHECK-ARM64: negs w26, w25, lsl #29 // encoding: [0xfa,0x77,0x19,0x6b] -// CHECK-ARM64: negs w24, w23, lsl #31 // encoding: [0xf8,0x7f,0x17,0x6b] +// CHECK: negs w28, w27 // encoding: [0xfc,0x03,0x1b,0x6b] +// CHECK: negs w26, w25, lsl #29 // encoding: [0xfa,0x77,0x19,0x6b] +// CHECK: negs w24, w23, lsl #31 // encoding: [0xf8,0x7f,0x17,0x6b] negs w22, w21, lsr #0 negs w20, w19, lsr #1 @@ -800,17 +800,17 @@ _func: negs x29, x30 negs x30, xzr negs xzr, x0 -// CHECK-ARM64: negs x29, x30 // encoding: [0xfd,0x03,0x1e,0xeb] -// CHECK-ARM64: negs x30, xzr // encoding: [0xfe,0x03,0x1f,0xeb] -// CHECK-ARM64: cmp xzr, x0 // encoding: [0xff,0x03,0x00,0xeb] +// CHECK: negs x29, x30 // encoding: [0xfd,0x03,0x1e,0xeb] +// CHECK: negs x30, xzr // encoding: [0xfe,0x03,0x1f,0xeb] +// CHECK: cmp xzr, x0 // encoding: [0xff,0x03,0x00,0xeb] negs x28, x27, lsl #0 negs x26, x25, lsl #29 negs x24, x23, lsl #31 -// CHECK-ARM64: negs x28, x27 // encoding: [0xfc,0x03,0x1b,0xeb] -// CHECK-ARM64: negs x26, x25, lsl #29 // encoding: [0xfa,0x77,0x19,0xeb] -// CHECK-ARM64: negs x24, x23, lsl #31 // encoding: [0xf8,0x7f,0x17,0xeb] +// CHECK: negs x28, x27 // encoding: [0xfc,0x03,0x1b,0xeb] +// CHECK: negs x26, x25, lsl #29 // encoding: [0xfa,0x77,0x19,0xeb] +// CHECK: negs x24, x23, lsl #31 // encoding: [0xf8,0x7f,0x17,0xeb] negs x22, x21, lsr #0 negs x20, x19, lsr #1 @@ -938,28 +938,28 @@ _func: sbfm wzr, wzr, #31, #31 sbfm w12, w9, #0, #0 -// CHECK-ARM64: sbfx x1, x2, #3, #2 // encoding: [0x41,0x10,0x43,0x93] -// CHECK-ARM64: asr x3, x4, #63 // encoding: [0x83,0xfc,0x7f,0x93] -// CHECK-ARM64: asr wzr, wzr, #31 // encoding: [0xff,0x7f,0x1f,0x13] -// CHECK-ARM64: sbfx w12, w9, #0, #1 // encoding: [0x2c,0x01,0x00,0x13] +// CHECK: sbfx x1, x2, #3, #2 // encoding: [0x41,0x10,0x43,0x93] +// CHECK: asr x3, x4, #63 // encoding: [0x83,0xfc,0x7f,0x93] +// CHECK: asr wzr, wzr, #31 // encoding: [0xff,0x7f,0x1f,0x13] +// CHECK: sbfx w12, w9, #0, #1 // encoding: [0x2c,0x01,0x00,0x13] ubfm x4, x5, #12, #10 ubfm xzr, x4, #0, #0 ubfm x4, xzr, #63, #5 ubfm x5, x6, #12, #63 -// CHECK-ARM64: ubfiz x4, x5, #52, #11 // encoding: [0xa4,0x28,0x4c,0xd3] -// CHECK-ARM64: ubfx xzr, x4, #0, #1 // encoding: [0x9f,0x00,0x40,0xd3] -// CHECK-ARM64: ubfiz x4, xzr, #1, #6 // encoding: [0xe4,0x17,0x7f,0xd3] -// CHECK-ARM64: lsr x5, x6, #12 // encoding: [0xc5,0xfc,0x4c,0xd3] +// CHECK: ubfiz x4, x5, #52, #11 // encoding: [0xa4,0x28,0x4c,0xd3] +// CHECK: ubfx xzr, x4, #0, #1 // encoding: [0x9f,0x00,0x40,0xd3] +// CHECK: ubfiz x4, xzr, #1, #6 // encoding: [0xe4,0x17,0x7f,0xd3] +// CHECK: lsr x5, x6, #12 // encoding: [0xc5,0xfc,0x4c,0xd3] bfm x4, x5, #12, #10 bfm xzr, x4, #0, #0 bfm x4, xzr, #63, #5 bfm x5, x6, #12, #63 -// CHECK-ARM64: bfi x4, x5, #52, #11 // encoding: [0xa4,0x28,0x4c,0xb3] -// CHECK-ARM64: bfxil xzr, x4, #0, #1 // encoding: [0x9f,0x00,0x40,0xb3] -// CHECK-ARM64: bfi x4, xzr, #1, #6 // encoding: [0xe4,0x17,0x7f,0xb3] -// CHECK-ARM64: bfxil x5, x6, #12, #52 // encoding: [0xc5,0xfc,0x4c,0xb3] +// CHECK: bfi x4, x5, #52, #11 // encoding: [0xa4,0x28,0x4c,0xb3] +// CHECK: bfxil xzr, x4, #0, #1 // encoding: [0x9f,0x00,0x40,0xb3] +// CHECK: bfi x4, xzr, #1, #6 // encoding: [0xe4,0x17,0x7f,0xb3] +// CHECK: bfxil x5, x6, #12, #52 // encoding: [0xc5,0xfc,0x4c,0xb3] sxtb w1, w2 sxtb xzr, w3 @@ -1018,9 +1018,9 @@ _func: sbfiz xzr, xzr, #10, #11 // CHECK: {{sbfiz|sbfx}} w9, w10, #0, #1 // encoding: [0x49,0x01,0x00,0x13] // CHECK: sbfiz x2, x3, #63, #1 // encoding: [0x62,0x00,0x41,0x93] -// CHECK-ARM64: asr x19, x20, #0 // encoding: [0x93,0xfe,0x40,0x93] +// CHECK: asr x19, x20, #0 // encoding: [0x93,0xfe,0x40,0x93] // CHECK: sbfiz x9, x10, #5, #59 // encoding: [0x49,0xe9,0x7b,0x93] -// CHECK-ARM64: asr w9, w10, #0 // encoding: [0x49,0x7d,0x00,0x13] +// CHECK: asr w9, w10, #0 // encoding: [0x49,0x7d,0x00,0x13] // CHECK: sbfiz w11, w12, #31, #1 // encoding: [0x8b,0x01,0x01,0x13] // CHECK: sbfiz w13, w14, #29, #3 // encoding: [0xcd,0x09,0x03,0x13] // CHECK: sbfiz xzr, xzr, #10, #11 // encoding: [0xff,0x2b,0x76,0x93] @@ -1034,12 +1034,12 @@ _func: sbfx w13, w14, #29, #3 sbfx xzr, xzr, #10, #11 // CHECK: sbfx w9, w10, #0, #1 // encoding: [0x49,0x01,0x00,0x13] -// CHECK-ARM64: asr x2, x3, #63 // encoding: [0x62,0xfc,0x7f,0x93] -// CHECK-ARM64: asr x19, x20, #0 // encoding: [0x93,0xfe,0x40,0x93] -// CHECK-ARM64: asr x9, x10, #5 // encoding: [0x49,0xfd,0x45,0x93] -// CHECK-ARM64: asr w9, w10, #0 // encoding: [0x49,0x7d,0x00,0x13] -// CHECK-ARM64: asr w11, w12, #31 // encoding: [0x8b,0x7d,0x1f,0x13] -// CHECK-ARM64: asr w13, w14, #29 // encoding: [0xcd,0x7d,0x1d,0x13] +// CHECK: asr x2, x3, #63 // encoding: [0x62,0xfc,0x7f,0x93] +// CHECK: asr x19, x20, #0 // encoding: [0x93,0xfe,0x40,0x93] +// CHECK: asr x9, x10, #5 // encoding: [0x49,0xfd,0x45,0x93] +// CHECK: asr w9, w10, #0 // encoding: [0x49,0x7d,0x00,0x13] +// CHECK: asr w11, w12, #31 // encoding: [0x8b,0x7d,0x1f,0x13] +// CHECK: asr w13, w14, #29 // encoding: [0xcd,0x7d,0x1d,0x13] // CHECK: sbfx xzr, xzr, #10, #11 // encoding: [0xff,0x53,0x4a,0x93] bfi w9, w10, #0, #1 @@ -1051,14 +1051,14 @@ _func: bfi w13, w14, #29, #3 bfi xzr, xzr, #10, #11 -// CHECK-ARM64: bfxil w9, w10, #0, #1 // encoding: [0x49,0x01,0x00,0x33] -// CHECK-ARM64: bfi x2, x3, #63, #1 // encoding: [0x62,0x00,0x41,0xb3] -// CHECK-ARM64: bfxil x19, x20, #0, #64 // encoding: [0x93,0xfe,0x40,0xb3] -// CHECK-ARM64: bfi x9, x10, #5, #59 // encoding: [0x49,0xe9,0x7b,0xb3] -// CHECK-ARM64: bfxil w9, w10, #0, #32 // encoding: [0x49,0x7d,0x00,0x33] -// CHECK-ARM64: bfi w11, w12, #31, #1 // encoding: [0x8b,0x01,0x01,0x33] -// CHECK-ARM64: bfi w13, w14, #29, #3 // encoding: [0xcd,0x09,0x03,0x33] -// CHECK-ARM64: bfi xzr, xzr, #10, #11 // encoding: [0xff,0x2b,0x76,0xb3] +// CHECK: bfxil w9, w10, #0, #1 // encoding: [0x49,0x01,0x00,0x33] +// CHECK: bfi x2, x3, #63, #1 // encoding: [0x62,0x00,0x41,0xb3] +// CHECK: bfxil x19, x20, #0, #64 // encoding: [0x93,0xfe,0x40,0xb3] +// CHECK: bfi x9, x10, #5, #59 // encoding: [0x49,0xe9,0x7b,0xb3] +// CHECK: bfxil w9, w10, #0, #32 // encoding: [0x49,0x7d,0x00,0x33] +// CHECK: bfi w11, w12, #31, #1 // encoding: [0x8b,0x01,0x01,0x33] +// CHECK: bfi w13, w14, #29, #3 // encoding: [0xcd,0x09,0x03,0x33] +// CHECK: bfi xzr, xzr, #10, #11 // encoding: [0xff,0x2b,0x76,0xb3] bfxil w9, w10, #0, #1 bfxil x2, x3, #63, #1 @@ -1086,14 +1086,14 @@ _func: ubfiz w13, w14, #29, #3 ubfiz xzr, xzr, #10, #11 -// CHECK-ARM64: ubfx w9, w10, #0, #1 // encoding: [0x49,0x01,0x00,0x53] -// CHECK-ARM64: lsl x2, x3, #63 // encoding: [0x62,0x00,0x41,0xd3] -// CHECK-ARM64: lsr x19, x20, #0 // encoding: [0x93,0xfe,0x40,0xd3] -// CHECK-ARM64: lsl x9, x10, #5 // encoding: [0x49,0xe9,0x7b,0xd3] -// CHECK-ARM64: lsr w9, w10, #0 // encoding: [0x49,0x7d,0x00,0x53] -// CHECK-ARM64: lsl w11, w12, #31 // encoding: [0x8b,0x01,0x01,0x53] -// CHECK-ARM64: lsl w13, w14, #29 // encoding: [0xcd,0x09,0x03,0x53] -// CHECK-ARM64: ubfiz xzr, xzr, #10, #11 // encoding: [0xff,0x2b,0x76,0xd3] +// CHECK: ubfx w9, w10, #0, #1 // encoding: [0x49,0x01,0x00,0x53] +// CHECK: lsl x2, x3, #63 // encoding: [0x62,0x00,0x41,0xd3] +// CHECK: lsr x19, x20, #0 // encoding: [0x93,0xfe,0x40,0xd3] +// CHECK: lsl x9, x10, #5 // encoding: [0x49,0xe9,0x7b,0xd3] +// CHECK: lsr w9, w10, #0 // encoding: [0x49,0x7d,0x00,0x53] +// CHECK: lsl w11, w12, #31 // encoding: [0x8b,0x01,0x01,0x53] +// CHECK: lsl w13, w14, #29 // encoding: [0xcd,0x09,0x03,0x53] +// CHECK: ubfiz xzr, xzr, #10, #11 // encoding: [0xff,0x2b,0x76,0xd3] ubfx w9, w10, #0, #1 ubfx x2, x3, #63, #1 @@ -1104,14 +1104,14 @@ _func: ubfx w13, w14, #29, #3 ubfx xzr, xzr, #10, #11 -// CHECK-ARM64: ubfx w9, w10, #0, #1 // encoding: [0x49,0x01,0x00,0x53] -// CHECK-ARM64: lsr x2, x3, #63 // encoding: [0x62,0xfc,0x7f,0xd3] -// CHECK-ARM64: lsr x19, x20, #0 // encoding: [0x93,0xfe,0x40,0xd3] -// CHECK-ARM64: lsr x9, x10, #5 // encoding: [0x49,0xfd,0x45,0xd3] -// CHECK-ARM64: lsr w9, w10, #0 // encoding: [0x49,0x7d,0x00,0x53] -// CHECK-ARM64: lsr w11, w12, #31 // encoding: [0x8b,0x7d,0x1f,0x53] -// CHECK-ARM64: lsr w13, w14, #29 // encoding: [0xcd,0x7d,0x1d,0x53] -// CHECK-ARM64: ubfx xzr, xzr, #10, #11 // encoding: [0xff,0x53,0x4a,0xd3] +// CHECK: ubfx w9, w10, #0, #1 // encoding: [0x49,0x01,0x00,0x53] +// CHECK: lsr x2, x3, #63 // encoding: [0x62,0xfc,0x7f,0xd3] +// CHECK: lsr x19, x20, #0 // encoding: [0x93,0xfe,0x40,0xd3] +// CHECK: lsr x9, x10, #5 // encoding: [0x49,0xfd,0x45,0xd3] +// CHECK: lsr w9, w10, #0 // encoding: [0x49,0x7d,0x00,0x53] +// CHECK: lsr w11, w12, #31 // encoding: [0x8b,0x7d,0x1f,0x53] +// CHECK: lsr w13, w14, #29 // encoding: [0xcd,0x7d,0x1d,0x53] +// CHECK: ubfx xzr, xzr, #10, #11 // encoding: [0xff,0x53,0x4a,0xd3] //------------------------------------------------------------------------------ // Compare & branch (immediate) //------------------------------------------------------------------------------ @@ -1120,22 +1120,22 @@ _func: cbz x5, lbl cbnz x2, lbl cbnz x26, lbl -// CHECK-ARM64: cbz w5, lbl // encoding: [0bAAA00101,A,A,0x34] -// CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 -// CHECK-ARM64: cbz x5, lbl // encoding: [0bAAA00101,A,A,0xb4] -// CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 -// CHECK-ARM64: cbnz x2, lbl // encoding: [0bAAA00010,A,A,0xb5] -// CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 -// CHECK-ARM64: cbnz x26, lbl // encoding: [0bAAA11010,A,A,0xb5] -// CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 +// CHECK: cbz w5, lbl // encoding: [0bAAA00101,A,A,0x34] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: cbz x5, lbl // encoding: [0bAAA00101,A,A,0xb4] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: cbnz x2, lbl // encoding: [0bAAA00010,A,A,0xb5] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: cbnz x26, lbl // encoding: [0bAAA11010,A,A,0xb5] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 cbz wzr, lbl cbnz xzr, lbl -// CHECK-ARM64: cbz wzr, lbl // encoding: [0bAAA11111,A,A,0x34] -// CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 -// CHECK-ARM64: cbnz xzr, lbl // encoding: [0bAAA11111,A,A,0xb5] -// CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 +// CHECK: cbz wzr, lbl // encoding: [0bAAA11111,A,A,0x34] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: cbnz xzr, lbl // encoding: [0bAAA11111,A,A,0xb5] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 cbz w5, #0 cbnz x3, #-4 @@ -1168,40 +1168,40 @@ _func: b.le lbl b.al lbl -// CHECK-ARM64: b.eq lbl // encoding: [0bAAA00000,A,A,0x54] -// CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 -// CHECK-ARM64: b.ne lbl // encoding: [0bAAA00001,A,A,0x54] -// CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 -// CHECK-ARM64: b.hs lbl // encoding: [0bAAA00010,A,A,0x54] -// CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 -// CHECK-ARM64: b.hs lbl // encoding: [0bAAA00010,A,A,0x54] -// CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 -// CHECK-ARM64: b.lo lbl // encoding: [0bAAA00011,A,A,0x54] -// CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 -// CHECK-ARM64: b.lo lbl // encoding: [0bAAA00011,A,A,0x54] -// CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 -// CHECK-ARM64: b.mi lbl // encoding: [0bAAA00100,A,A,0x54] -// CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 -// CHECK-ARM64: b.pl lbl // encoding: [0bAAA00101,A,A,0x54] -// CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 -// CHECK-ARM64: b.vs lbl // encoding: [0bAAA00110,A,A,0x54] -// CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 -// CHECK-ARM64: b.vc lbl // encoding: [0bAAA00111,A,A,0x54] -// CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 -// CHECK-ARM64: b.hi lbl // encoding: [0bAAA01000,A,A,0x54] -// CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 -// CHECK-ARM64: b.ls lbl // encoding: [0bAAA01001,A,A,0x54] -// CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 -// CHECK-ARM64: b.ge lbl // encoding: [0bAAA01010,A,A,0x54] -// CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 -// CHECK-ARM64: b.lt lbl // encoding: [0bAAA01011,A,A,0x54] -// CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 -// CHECK-ARM64: b.gt lbl // encoding: [0bAAA01100,A,A,0x54] -// CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 -// CHECK-ARM64: b.le lbl // encoding: [0bAAA01101,A,A,0x54] -// CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 -// CHECK-ARM64: b.al lbl // encoding: [0bAAA01110,A,A,0x54] -// CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 +// CHECK: b.eq lbl // encoding: [0bAAA00000,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: b.ne lbl // encoding: [0bAAA00001,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: b.hs lbl // encoding: [0bAAA00010,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: b.hs lbl // encoding: [0bAAA00010,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: b.lo lbl // encoding: [0bAAA00011,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: b.lo lbl // encoding: [0bAAA00011,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: b.mi lbl // encoding: [0bAAA00100,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: b.pl lbl // encoding: [0bAAA00101,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: b.vs lbl // encoding: [0bAAA00110,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: b.vc lbl // encoding: [0bAAA00111,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: b.hi lbl // encoding: [0bAAA01000,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: b.ls lbl // encoding: [0bAAA01001,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: b.ge lbl // encoding: [0bAAA01010,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: b.lt lbl // encoding: [0bAAA01011,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: b.gt lbl // encoding: [0bAAA01100,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: b.le lbl // encoding: [0bAAA01101,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: b.al lbl // encoding: [0bAAA01110,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 // ARM64 has these in a separate file beq lbl @@ -2186,23 +2186,23 @@ _func: ldr x29, there ldrsw xzr, everywhere -// CHECK-ARM64: ldr w3, here // encoding: [0bAAA00011,A,A,0x18] -// CHECK-ARM64: // fixup A - offset: 0, value: here, kind: fixup_arm64_ldr_pcrel_imm19 -// CHECK-ARM64: ldr x29, there // encoding: [0bAAA11101,A,A,0x58] -// CHECK-ARM64: // fixup A - offset: 0, value: there, kind: fixup_arm64_ldr_pcrel_imm19 -// CHECK-ARM64: ldrsw xzr, everywhere // encoding: [0bAAA11111,A,A,0x98] -// CHECK-ARM64: // fixup A - offset: 0, value: everywhere, kind: fixup_arm64_ldr_pcrel_imm19 +// CHECK: ldr w3, here // encoding: [0bAAA00011,A,A,0x18] +// CHECK: // fixup A - offset: 0, value: here, kind: fixup_aarch64_ldr_pcrel_imm19 +// CHECK: ldr x29, there // encoding: [0bAAA11101,A,A,0x58] +// CHECK: // fixup A - offset: 0, value: there, kind: fixup_aarch64_ldr_pcrel_imm19 +// CHECK: ldrsw xzr, everywhere // encoding: [0bAAA11111,A,A,0x98] +// CHECK: // fixup A - offset: 0, value: everywhere, kind: fixup_aarch64_ldr_pcrel_imm19 ldr s0, who_knows ldr d0, i_dont ldr q0, there_must_be_a_better_way -// CHECK-ARM64: ldr s0, who_knows // encoding: [0bAAA00000,A,A,0x1c] -// CHECK-ARM64: // fixup A - offset: 0, value: who_knows, kind: fixup_arm64_ldr_pcrel_imm19 -// CHECK-ARM64: ldr d0, i_dont // encoding: [0bAAA00000,A,A,0x5c] -// CHECK-ARM64: // fixup A - offset: 0, value: i_dont, kind: fixup_arm64_ldr_pcrel_imm19 -// CHECK-ARM64: ldr q0, there_must_be_a_better_way // encoding: [0bAAA00000,A,A,0x9c] -// CHECK-ARM64: // fixup A - offset: 0, value: there_must_be_a_better_way, kind: fixup_arm64_ldr_pcrel_imm19 +// CHECK: ldr s0, who_knows // encoding: [0bAAA00000,A,A,0x1c] +// CHECK: // fixup A - offset: 0, value: who_knows, kind: fixup_aarch64_ldr_pcrel_imm19 +// CHECK: ldr d0, i_dont // encoding: [0bAAA00000,A,A,0x5c] +// CHECK: // fixup A - offset: 0, value: i_dont, kind: fixup_aarch64_ldr_pcrel_imm19 +// CHECK: ldr q0, there_must_be_a_better_way // encoding: [0bAAA00000,A,A,0x9c] +// CHECK: // fixup A - offset: 0, value: there_must_be_a_better_way, kind: fixup_aarch64_ldr_pcrel_imm19 ldr w0, #1048572 ldr x10, #-1048576 @@ -2212,10 +2212,10 @@ _func: prfm pldl1strm, nowhere prfm #22, somewhere -// CHECK-ARM64: prfm pldl1strm, nowhere // encoding: [0bAAA00001,A,A,0xd8] -// CHECK-ARM64: // fixup A - offset: 0, value: nowhere, kind: fixup_arm64_ldr_pcrel_imm19 -// CHECK-ARM64: prfm #22, somewhere // encoding: [0bAAA10110,A,A,0xd8] -// CHECK-ARM64: // fixup A - offset: 0, value: somewhere, kind: fixup_arm64_ldr_pcrel_imm19 +// CHECK: prfm pldl1strm, nowhere // encoding: [0bAAA00001,A,A,0xd8] +// CHECK: // fixup A - offset: 0, value: nowhere, kind: fixup_aarch64_ldr_pcrel_imm19 +// CHECK: prfm #22, somewhere // encoding: [0bAAA10110,A,A,0xd8] +// CHECK: // fixup A - offset: 0, value: somewhere, kind: fixup_aarch64_ldr_pcrel_imm19 //------------------------------------------------------------------------------ // Load/store exclusive @@ -2431,18 +2431,18 @@ _func: ldr x15, [x5, #:lo12:sym] ldr q3, [x2, #:lo12:sym] -// CHECK-ARM64: str x15, [x5, :lo12:sym] // encoding: [0xaf,0bAAAAAA00,0b00AAAAAA,0xf9] -// CHECK-ARM64: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_arm64_ldst_imm12_scale8 -// CHECK-ARM64: ldrb w15, [x5, :lo12:sym] // encoding: [0xaf,0bAAAAAA00,0b01AAAAAA,0x39] -// CHECK-ARM64: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_arm64_ldst_imm12_scale1 -// CHECK-ARM64: ldrsh x15, [x5, :lo12:sym] // encoding: [0xaf,0bAAAAAA00,0b10AAAAAA,0x79] -// CHECK-ARM64: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_arm64_ldst_imm12_scale2 -// CHECK-ARM64: ldrsw x15, [x5, :lo12:sym] // encoding: [0xaf,0bAAAAAA00,0b10AAAAAA,0xb9] -// CHECK-ARM64: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_arm64_ldst_imm12_scale4 -// CHECK-ARM64: ldr x15, [x5, :lo12:sym] // encoding: [0xaf,0bAAAAAA00,0b01AAAAAA,0xf9] -// CHECK-ARM64: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_arm64_ldst_imm12_scale8 -// CHECK-ARM64: ldr q3, [x2, :lo12:sym] // encoding: [0x43,0bAAAAAA00,0b11AAAAAA,0x3d] -// CHECK-ARM64: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_arm64_ldst_imm12_scale16 +// CHECK: str x15, [x5, :lo12:sym] // encoding: [0xaf,0bAAAAAA00,0b00AAAAAA,0xf9] +// CHECK: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_aarch64_ldst_imm12_scale8 +// CHECK: ldrb w15, [x5, :lo12:sym] // encoding: [0xaf,0bAAAAAA00,0b01AAAAAA,0x39] +// CHECK: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_aarch64_ldst_imm12_scale1 +// CHECK: ldrsh x15, [x5, :lo12:sym] // encoding: [0xaf,0bAAAAAA00,0b10AAAAAA,0x79] +// CHECK: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_aarch64_ldst_imm12_scale2 +// CHECK: ldrsw x15, [x5, :lo12:sym] // encoding: [0xaf,0bAAAAAA00,0b10AAAAAA,0xb9] +// CHECK: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_aarch64_ldst_imm12_scale4 +// CHECK: ldr x15, [x5, :lo12:sym] // encoding: [0xaf,0bAAAAAA00,0b01AAAAAA,0xf9] +// CHECK: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_aarch64_ldst_imm12_scale8 +// CHECK: ldr q3, [x2, :lo12:sym] // encoding: [0x43,0bAAAAAA00,0b11AAAAAA,0x3d] +// CHECK: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_aarch64_ldst_imm12_scale16 prfm pldl1keep, [sp, #8] prfm pldl1strm, [x3] @@ -3323,34 +3323,34 @@ _func: movz x2, #:abs_g0:sym movk w3, #:abs_g0_nc:sym -// CHECK-ARM64: movz x2, #:abs_g0:sym // encoding: [0bAAA00010,A,0b100AAAAA,0xd2] -// CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g0:sym, kind: fixup_arm64_movw -// CHECK-ARM64: movk w3, #:abs_g0_nc:sym // encoding: [0bAAA00011,A,0b100AAAAA,0x72] -// CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g0_nc:sym, kind: fixup_arm64_movw +// CHECK: movz x2, #:abs_g0:sym // encoding: [0bAAA00010,A,0b100AAAAA,0xd2] +// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g0:sym, kind: fixup_aarch64_movw +// CHECK: movk w3, #:abs_g0_nc:sym // encoding: [0bAAA00011,A,0b100AAAAA,0x72] +// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g0_nc:sym, kind: fixup_aarch64_movw movz x4, #:abs_g1:sym movk w5, #:abs_g1_nc:sym -// CHECK-ARM64: movz x4, #:abs_g1:sym // encoding: [0bAAA00100,A,0b101AAAAA,0xd2] -// CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g1:sym, kind: fixup_arm64_movw -// CHECK-ARM64: movk w5, #:abs_g1_nc:sym // encoding: [0bAAA00101,A,0b101AAAAA,0x72] -// CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g1_nc:sym, kind: fixup_arm64_movw +// CHECK: movz x4, #:abs_g1:sym // encoding: [0bAAA00100,A,0b101AAAAA,0xd2] +// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g1:sym, kind: fixup_aarch64_movw +// CHECK: movk w5, #:abs_g1_nc:sym // encoding: [0bAAA00101,A,0b101AAAAA,0x72] +// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g1_nc:sym, kind: fixup_aarch64_movw movz x6, #:abs_g2:sym movk x7, #:abs_g2_nc:sym -// CHECK-ARM64: movz x6, #:abs_g2:sym // encoding: [0bAAA00110,A,0b110AAAAA,0xd2] -// CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g2:sym, kind: fixup_arm64_movw -// CHECK-ARM64: movk x7, #:abs_g2_nc:sym // encoding: [0bAAA00111,A,0b110AAAAA,0xf2] -// CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g2_nc:sym, kind: fixup_arm64_movw +// CHECK: movz x6, #:abs_g2:sym // encoding: [0bAAA00110,A,0b110AAAAA,0xd2] +// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g2:sym, kind: fixup_aarch64_movw +// CHECK: movk x7, #:abs_g2_nc:sym // encoding: [0bAAA00111,A,0b110AAAAA,0xf2] +// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g2_nc:sym, kind: fixup_aarch64_movw movz x8, #:abs_g3:sym movk x9, #:abs_g3:sym -// CHECK-ARM64: movz x8, #:abs_g3:sym // encoding: [0bAAA01000,A,0b111AAAAA,0xd2] -// CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_arm64_movw -// CHECK-ARM64: movk x9, #:abs_g3:sym // encoding: [0bAAA01001,A,0b111AAAAA,0xf2] -// CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_arm64_movw +// CHECK: movz x8, #:abs_g3:sym // encoding: [0bAAA01000,A,0b111AAAAA,0xd2] +// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_aarch64_movw +// CHECK: movk x9, #:abs_g3:sym // encoding: [0bAAA01001,A,0b111AAAAA,0xf2] +// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_aarch64_movw movn x30, #:abs_g0_s:sym @@ -3358,36 +3358,36 @@ _func: movn w10, #:abs_g0_s:sym movz w25, #:abs_g0_s:sym -// CHECK-ARM64: movn x30, #:abs_g0_s:sym // encoding: [0bAAA11110,A,0b100AAAAA,0x92] -// CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_arm64_movw -// CHECK-ARM64: movz x19, #:abs_g0_s:sym // encoding: [0bAAA10011,A,0b100AAAAA,0xd2] -// CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_arm64_movw -// CHECK-ARM64: movn w10, #:abs_g0_s:sym // encoding: [0bAAA01010,A,0b100AAAAA,0x12] -// CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_arm64_movw -// CHECK-ARM64: movz w25, #:abs_g0_s:sym // encoding: [0bAAA11001,A,0b100AAAAA,0x52] -// CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_arm64_movw +// CHECK: movn x30, #:abs_g0_s:sym // encoding: [0bAAA11110,A,0b100AAAAA,0x92] +// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_aarch64_movw +// CHECK: movz x19, #:abs_g0_s:sym // encoding: [0bAAA10011,A,0b100AAAAA,0xd2] +// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_aarch64_movw +// CHECK: movn w10, #:abs_g0_s:sym // encoding: [0bAAA01010,A,0b100AAAAA,0x12] +// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_aarch64_movw +// CHECK: movz w25, #:abs_g0_s:sym // encoding: [0bAAA11001,A,0b100AAAAA,0x52] +// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_aarch64_movw movn x30, #:abs_g1_s:sym movz x19, #:abs_g1_s:sym movn w10, #:abs_g1_s:sym movz w25, #:abs_g1_s:sym -// CHECK-ARM64: movn x30, #:abs_g1_s:sym // encoding: [0bAAA11110,A,0b101AAAAA,0x92] -// CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_arm64_movw -// CHECK-ARM64: movz x19, #:abs_g1_s:sym // encoding: [0bAAA10011,A,0b101AAAAA,0xd2] -// CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_arm64_movw -// CHECK-ARM64: movn w10, #:abs_g1_s:sym // encoding: [0bAAA01010,A,0b101AAAAA,0x12] -// CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_arm64_movw -// CHECK-ARM64: movz w25, #:abs_g1_s:sym // encoding: [0bAAA11001,A,0b101AAAAA,0x52] -// CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_arm64_movw +// CHECK: movn x30, #:abs_g1_s:sym // encoding: [0bAAA11110,A,0b101AAAAA,0x92] +// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_aarch64_movw +// CHECK: movz x19, #:abs_g1_s:sym // encoding: [0bAAA10011,A,0b101AAAAA,0xd2] +// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_aarch64_movw +// CHECK: movn w10, #:abs_g1_s:sym // encoding: [0bAAA01010,A,0b101AAAAA,0x12] +// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_aarch64_movw +// CHECK: movz w25, #:abs_g1_s:sym // encoding: [0bAAA11001,A,0b101AAAAA,0x52] +// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_aarch64_movw movn x30, #:abs_g2_s:sym movz x19, #:abs_g2_s:sym -// CHECK-ARM64: movn x30, #:abs_g2_s:sym // encoding: [0bAAA11110,A,0b110AAAAA,0x92] -// CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g2_s:sym, kind: fixup_arm64_movw -// CHECK-ARM64: movz x19, #:abs_g2_s:sym // encoding: [0bAAA10011,A,0b110AAAAA,0xd2] -// CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g2_s:sym, kind: fixup_arm64_movw +// CHECK: movn x30, #:abs_g2_s:sym // encoding: [0bAAA11110,A,0b110AAAAA,0x92] +// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g2_s:sym, kind: fixup_aarch64_movw +// CHECK: movz x19, #:abs_g2_s:sym // encoding: [0bAAA10011,A,0b110AAAAA,0xd2] +// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g2_s:sym, kind: fixup_aarch64_movw //------------------------------------------------------------------------------ // PC-relative addressing @@ -3396,15 +3396,15 @@ _func: adr x2, loc adr xzr, loc -// CHECK-ARM64: adr x2, loc // encoding: [0x02'A',A,A,0x10'A'] -// CHECK-ARM64: // fixup A - offset: 0, value: loc, kind: fixup_arm64_pcrel_adr_imm21 -// CHECK-ARM64: adr xzr, loc // encoding: [0x1f'A',A,A,0x10'A'] -// CHECK-ARM64: // fixup A - offset: 0, value: loc, kind: fixup_arm64_pcrel_adr_imm21 +// CHECK: adr x2, loc // encoding: [0x02'A',A,A,0x10'A'] +// CHECK: // fixup A - offset: 0, value: loc, kind: fixup_aarch64_pcrel_adr_imm21 +// CHECK: adr xzr, loc // encoding: [0x1f'A',A,A,0x10'A'] +// CHECK: // fixup A - offset: 0, value: loc, kind: fixup_aarch64_pcrel_adr_imm21 adrp x29, loc -// CHECK-ARM64: adrp x29, loc // encoding: [0x1d'A',A,A,0x90'A'] -// CHECK-ARM64: // fixup A - offset: 0, value: loc, kind: fixup_arm64_pcrel_adrp_imm21 +// CHECK: adrp x29, loc // encoding: [0x1d'A',A,A,0x90'A'] +// CHECK: // fixup A - offset: 0, value: loc, kind: fixup_aarch64_pcrel_adrp_imm21 adrp x30, #4096 adr x20, #0 adr x9, #-1 @@ -4782,24 +4782,24 @@ _func: tbz xzr, #63, elsewhere tbnz x5, #45, nowhere -// CHECK-ARM64: tbz w5, #0, somewhere // encoding: [0bAAA00101,A,0b00000AAA,0x36] -// CHECK-ARM64: // fixup A - offset: 0, value: somewhere, kind: fixup_arm64_pcrel_branch14 -// CHECK-ARM64: tbz xzr, #63, elsewhere // encoding: [0bAAA11111,A,0b11111AAA,0xb6] -// CHECK-ARM64: // fixup A - offset: 0, value: elsewhere, kind: fixup_arm64_pcrel_branch14 -// CHECK-ARM64: tbnz x5, #45, nowhere // encoding: [0bAAA00101,A,0b01101AAA,0xb7] -// CHECK-ARM64: // fixup A - offset: 0, value: nowhere, kind: fixup_arm64_pcrel_branch14 +// CHECK: tbz w5, #0, somewhere // encoding: [0bAAA00101,A,0b00000AAA,0x36] +// CHECK: // fixup A - offset: 0, value: somewhere, kind: fixup_aarch64_pcrel_branch14 +// CHECK: tbz xzr, #63, elsewhere // encoding: [0bAAA11111,A,0b11111AAA,0xb6] +// CHECK: // fixup A - offset: 0, value: elsewhere, kind: fixup_aarch64_pcrel_branch14 +// CHECK: tbnz x5, #45, nowhere // encoding: [0bAAA00101,A,0b01101AAA,0xb7] +// CHECK: // fixup A - offset: 0, value: nowhere, kind: fixup_aarch64_pcrel_branch14 tbnz w3, #2, there tbnz wzr, #31, nowhere tbz w5, #12, anywhere -// CHECK-ARM64: tbnz w3, #2, there // encoding: [0bAAA00011,A,0b00010AAA,0x37] -// CHECK-ARM64: // fixup A - offset: 0, value: there, kind: fixup_arm64_pcrel_branch14 -// CHECK-ARM64: tbnz wzr, #31, nowhere // encoding: [0bAAA11111,A,0b11111AAA,0x37] -// CHECK-ARM64: // fixup A - offset: 0, value: nowhere, kind: fixup_arm64_pcrel_branch14 -// CHECK-ARM64: tbz w5, #12, anywhere // encoding: [0bAAA00101,A,0b01100AAA,0x36] -// CHECK-ARM64: // fixup A - offset: 0, value: anywhere, kind: fixup_arm64_pcrel_branch14 +// CHECK: tbnz w3, #2, there // encoding: [0bAAA00011,A,0b00010AAA,0x37] +// CHECK: // fixup A - offset: 0, value: there, kind: fixup_aarch64_pcrel_branch14 +// CHECK: tbnz wzr, #31, nowhere // encoding: [0bAAA11111,A,0b11111AAA,0x37] +// CHECK: // fixup A - offset: 0, value: nowhere, kind: fixup_aarch64_pcrel_branch14 +// CHECK: tbz w5, #12, anywhere // encoding: [0bAAA00101,A,0b01100AAA,0x36] +// CHECK: // fixup A - offset: 0, value: anywhere, kind: fixup_aarch64_pcrel_branch14 //------------------------------------------------------------------------------ // Unconditional branch (immediate) @@ -4808,10 +4808,10 @@ _func: b somewhere bl elsewhere -// CHECK-ARM64: b somewhere // encoding: [A,A,A,0b000101AA] -// CHECK-ARM64: // fixup A - offset: 0, value: somewhere, kind: fixup_arm64_pcrel_branch26 -// CHECK-ARM64: bl elsewhere // encoding: [A,A,A,0b100101AA] -// CHECK-ARM64: // fixup A - offset: 0, value: elsewhere, kind: fixup_arm64_pcrel_call26 +// CHECK: b somewhere // encoding: [A,A,A,0b000101AA] +// CHECK: // fixup A - offset: 0, value: somewhere, kind: fixup_aarch64_pcrel_branch26 +// CHECK: bl elsewhere // encoding: [A,A,A,0b100101AA] +// CHECK: // fixup A - offset: 0, value: elsewhere, kind: fixup_aarch64_pcrel_call26 b #4 bl #0 diff --git a/test/MC/AArch64/basic-pic.s b/test/MC/AArch64/basic-pic.s index 6bb6aaa7de13..a10874dcca09 100644 --- a/test/MC/AArch64/basic-pic.s +++ b/test/MC/AArch64/basic-pic.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o -| llvm-objdump -r - | FileCheck %s +// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o -| llvm-objdump -r - | FileCheck %s // CHECK: RELOCATION RECORDS FOR [.rela.text] diff --git a/test/MC/AArch64/elf-extern.s b/test/MC/AArch64/elf-extern.s index 3d84bde052ff..dfa3fb002ed5 100644 --- a/test/MC/AArch64/elf-extern.s +++ b/test/MC/AArch64/elf-extern.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc < %s -triple=arm64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s +// RUN: llvm-mc < %s -triple=aarch64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s // External symbols are a different concept to global variables but should still // get relocations and so on when used. diff --git a/test/MC/AArch64/elf-objdump.s b/test/MC/AArch64/elf-objdump.s index b69926efbc2b..3b3aa65819d8 100644 --- a/test/MC/AArch64/elf-objdump.s +++ b/test/MC/AArch64/elf-objdump.s @@ -1,5 +1,5 @@ // 64 bit little endian -// RUN: llvm-mc -filetype=obj -triple arm64-none-linux-gnu %s -o - | llvm-objdump -d - +// RUN: llvm-mc -filetype=obj -triple aarch64-none-linux-gnu %s -o - | llvm-objdump -d - // We just want to see if llvm-objdump works at all. // CHECK: .text diff --git a/test/MC/AArch64/elf-reloc-addsubimm.s b/test/MC/AArch64/elf-reloc-addsubimm.s index cc5c3f7f25b0..e37991bfba1c 100644 --- a/test/MC/AArch64/elf-reloc-addsubimm.s +++ b/test/MC/AArch64/elf-reloc-addsubimm.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \ +// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \ // RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s add x2, x3, #:lo12:some_label diff --git a/test/MC/AArch64/elf-reloc-ldrlit.s b/test/MC/AArch64/elf-reloc-ldrlit.s index 3554ef3ae423..d4c3a4eb50d0 100644 --- a/test/MC/AArch64/elf-reloc-ldrlit.s +++ b/test/MC/AArch64/elf-reloc-ldrlit.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \ +// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \ // RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s ldr x0, some_label diff --git a/test/MC/AArch64/elf-reloc-ldstunsimm.s b/test/MC/AArch64/elf-reloc-ldstunsimm.s index 196f65fd2999..371e7e51f245 100644 --- a/test/MC/AArch64/elf-reloc-ldstunsimm.s +++ b/test/MC/AArch64/elf-reloc-ldstunsimm.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=arm64-none-linux-gnu -mattr=+fp-armv8 -filetype=obj %s -o - | \ +// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+fp-armv8 -filetype=obj %s -o - | \ // RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s ldrb w0, [sp, #:lo12:some_label] diff --git a/test/MC/AArch64/elf-reloc-movw.s b/test/MC/AArch64/elf-reloc-movw.s index dc7dbb0c156a..333159562c0f 100644 --- a/test/MC/AArch64/elf-reloc-movw.s +++ b/test/MC/AArch64/elf-reloc-movw.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \ +// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \ // RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s movz x0, #:abs_g0:some_label diff --git a/test/MC/AArch64/elf-reloc-pcreladdressing.s b/test/MC/AArch64/elf-reloc-pcreladdressing.s index 652011318c39..093891d931aa 100644 --- a/test/MC/AArch64/elf-reloc-pcreladdressing.s +++ b/test/MC/AArch64/elf-reloc-pcreladdressing.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \ +// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \ // RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s adr x2, some_label diff --git a/test/MC/AArch64/elf-reloc-tstb.s b/test/MC/AArch64/elf-reloc-tstb.s index 9cbe3a53fb7f..25c98163b584 100644 --- a/test/MC/AArch64/elf-reloc-tstb.s +++ b/test/MC/AArch64/elf-reloc-tstb.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \ +// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \ // RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s tbz x6, #45, somewhere diff --git a/test/MC/AArch64/elf-reloc-uncondbrimm.s b/test/MC/AArch64/elf-reloc-uncondbrimm.s index 8f3915afab79..9ac66bd876a7 100644 --- a/test/MC/AArch64/elf-reloc-uncondbrimm.s +++ b/test/MC/AArch64/elf-reloc-uncondbrimm.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \ +// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \ // RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s b somewhere diff --git a/test/MC/AArch64/gicv3-regs-diagnostics.s b/test/MC/AArch64/gicv3-regs-diagnostics.s index 6f4f5ee66c65..bc005b1d5304 100644 --- a/test/MC/AArch64/gicv3-regs-diagnostics.s +++ b/test/MC/AArch64/gicv3-regs-diagnostics.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -triple arm64-none-linux-gnu < %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2>&1 | FileCheck %s // Write-only mrs x10, icc_eoir1_el1 diff --git a/test/MC/AArch64/gicv3-regs.s b/test/MC/AArch64/gicv3-regs.s index b9eac1a56951..0f5742ee5435 100644 --- a/test/MC/AArch64/gicv3-regs.s +++ b/test/MC/AArch64/gicv3-regs.s @@ -1,4 +1,4 @@ - // RUN: llvm-mc -triple arm64-none-linux-gnu -show-encoding < %s | FileCheck %s + // RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s mrs x8, icc_iar1_el1 mrs x26, icc_iar0_el1 diff --git a/test/MC/AArch64/inline-asm-modifiers.s b/test/MC/AArch64/inline-asm-modifiers.s index 33d5bf519f92..cf34a952e90c 100644 --- a/test/MC/AArch64/inline-asm-modifiers.s +++ b/test/MC/AArch64/inline-asm-modifiers.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj -mattr=+fp-armv8 < %s | llvm-objdump -r - | FileCheck %s +// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj -mattr=+fp-armv8 < %s | llvm-objdump -r - | FileCheck %s .file "" .text diff --git a/test/MC/AArch64/jump-table.s b/test/MC/AArch64/jump-table.s index 439ecd90de34..578ebf4e6608 100644 --- a/test/MC/AArch64/jump-table.s +++ b/test/MC/AArch64/jump-table.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc < %s -triple=arm64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s +// RUN: llvm-mc < %s -triple=aarch64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s .file "" .text diff --git a/test/MC/AArch64/lit.local.cfg b/test/MC/AArch64/lit.local.cfg index 17a6b7ab033d..1be70c04415b 100644 --- a/test/MC/AArch64/lit.local.cfg +++ b/test/MC/AArch64/lit.local.cfg @@ -1,3 +1,3 @@ targets = set(config.root.targets_to_build.split()) -if 'ARM64' not in targets: +if 'AArch64' not in targets: config.unsupported = True diff --git a/test/MC/AArch64/mapping-across-sections.s b/test/MC/AArch64/mapping-across-sections.s index 00b324cb8264..3d32c1dfb400 100644 --- a/test/MC/AArch64/mapping-across-sections.s +++ b/test/MC/AArch64/mapping-across-sections.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj < %s | llvm-objdump -t - | FileCheck %s +// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -t - | FileCheck %s .text add w0, w0, w0 diff --git a/test/MC/AArch64/mapping-within-section.s b/test/MC/AArch64/mapping-within-section.s index f515cb9a5c0b..c8bd804fa0e3 100644 --- a/test/MC/AArch64/mapping-within-section.s +++ b/test/MC/AArch64/mapping-within-section.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj < %s | llvm-objdump -t - | FileCheck %s +// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -t - | FileCheck %s .text // $x at 0x0000 diff --git a/test/MC/AArch64/neon-3vdiff.s b/test/MC/AArch64/neon-3vdiff.s index 3ffc38fc69c4..fc3215b4b671 100644 --- a/test/MC/AArch64/neon-3vdiff.s +++ b/test/MC/AArch64/neon-3vdiff.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=arm64-none-linux-gnu -mattr=+crypto -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+crypto -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-aba-abd.s b/test/MC/AArch64/neon-aba-abd.s index e79648341468..178eb26f64c2 100644 --- a/test/MC/AArch64/neon-aba-abd.s +++ b/test/MC/AArch64/neon-aba-abd.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-add-pairwise.s b/test/MC/AArch64/neon-add-pairwise.s index 0b9e4d3146b8..df9938b07e52 100644 --- a/test/MC/AArch64/neon-add-pairwise.s +++ b/test/MC/AArch64/neon-add-pairwise.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-add-sub-instructions.s b/test/MC/AArch64/neon-add-sub-instructions.s index 7d11d70bb90c..68f169b3dd90 100644 --- a/test/MC/AArch64/neon-add-sub-instructions.s +++ b/test/MC/AArch64/neon-add-sub-instructions.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-bitwise-instructions.s b/test/MC/AArch64/neon-bitwise-instructions.s index ec192aa2d8af..79d0a9b70b54 100644 --- a/test/MC/AArch64/neon-bitwise-instructions.s +++ b/test/MC/AArch64/neon-bitwise-instructions.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-compare-instructions.s b/test/MC/AArch64/neon-compare-instructions.s index 4d3daf066ed3..19cfaf1f4d36 100644 --- a/test/MC/AArch64/neon-compare-instructions.s +++ b/test/MC/AArch64/neon-compare-instructions.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s index 46ae311f5f8b..fa1f3caf5ad3 100644 --- a/test/MC/AArch64/neon-diagnostics.s +++ b/test/MC/AArch64/neon-diagnostics.s @@ -1,6 +1,5 @@ - -// RUN: not llvm-mc -triple arm64-none-linux-gnu -mattr=+neon < %s 2> %t -// RUN: FileCheck --check-prefix=CHECK-ERROR --check-prefix=CHECK-ARM64-ERROR < %t %s +// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s //------------------------------------------------------------------------------ // Vector Integer Add/sub @@ -589,12 +588,12 @@ // CHECK-ERROR: fcmgt v0.2d, v31.2s, v16.2s // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: invalid operand for instruction -// CHECK-ARM64-ERROR: fcmgt v4.4s, v7.4s, v15.4h -// CHECK-ARM64-ERROR: ^ -// CHECK-ARM64-ERROR: error: invalid operand for instruction -// CHECK-ARM64-ERROR: fcmlt v29.2d, v5.2d, v2.16b -// CHECK-ARM64-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcmgt v4.4s, v7.4s, v15.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcmlt v29.2d, v5.2d, v2.16b +// CHECK-ERROR: ^ //---------------------------------------------------------------------- // Vector Compare Mask Equal to Zero (Integer) @@ -684,12 +683,12 @@ // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: expected floating-point constant #0.0 -// CHECK-ARM64-ERROR: fcmeq v0.8b, v1.4h, #1.0 -// CHECK-ARM64-ERROR: ^ -// CHECK-ARM64-ERROR: error: invalid operand for instruction -// CHECK-ARM64-ERROR: fcmeq v0.8b, v1.4h, #1 -// CHECK-ARM64-ERROR: ^ +// CHECK-ERROR: error: expected floating-point constant #0.0 +// CHECK-ERROR: fcmeq v0.8b, v1.4h, #1.0 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcmeq v0.8b, v1.4h, #1 +// CHECK-ERROR: ^ //---------------------------------------------------------------------- // Vector Compare Mask Greater Than or Equal to Zero (Floating Point) @@ -709,12 +708,12 @@ // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: expected floating-point constant #0.0 -// CHECK-ARM64-ERROR: fcmle v17.8h, v15.2d, #-1.0 -// CHECK-ARM64-ERROR: ^ -// CHECK-ARM64-ERROR: error: invalid operand for instruction -// CHECK-ARM64-ERROR: fcmle v17.8h, v15.2d, #2 -// CHECK-ARM64-ERROR: ^ +// CHECK-ERROR: error: expected floating-point constant #0.0 +// CHECK-ERROR: fcmle v17.8h, v15.2d, #-1.0 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcmle v17.8h, v15.2d, #2 +// CHECK-ERROR: ^ //---------------------------------------------------------------------- // Vector Compare Mask Greater Than Zero (Floating Point) @@ -733,12 +732,12 @@ // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: expected floating-point constant #0.0 -// CHECK-ARM64-ERROR: fcmlt v29.2d, v5.2d, #255.0 -// CHECK-ARM64-ERROR: ^ -// CHECK-ARM64-ERROR: error: invalid operand for instruction -// CHECK-ARM64-ERROR: fcmlt v29.2d, v5.2d, #255 -// CHECK-ARM64-ERROR: ^ +// CHECK-ERROR: error: expected floating-point constant #0.0 +// CHECK-ERROR: fcmlt v29.2d, v5.2d, #255.0 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcmlt v29.2d, v5.2d, #255 +// CHECK-ERROR: ^ //---------------------------------------------------------------------- // Vector Compare Mask Less Than or Equal To Zero (Floating Point) @@ -757,12 +756,12 @@ // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: expected floating-point constant #0.0 -// CHECK-ARM64-ERROR: fcmle v17.2d, v15.2d, #15.0 -// CHECK-ARM64-ERROR: ^ -// CHECK-ARM64-ERROR: error: invalid operand for instruction -// CHECK-ARM64-ERROR: fcmle v17.2d, v15.2d, #15 -// CHECK-ARM64-ERROR: ^ +// CHECK-ERROR: error: expected floating-point constant #0.0 +// CHECK-ERROR: fcmle v17.2d, v15.2d, #15.0 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcmle v17.2d, v15.2d, #15 +// CHECK-ERROR: ^ //---------------------------------------------------------------------- // Vector Compare Mask Less Than Zero (Floating Point) @@ -781,12 +780,12 @@ // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: expected floating-point constant #0.0 -// CHECK-ARM64-ERROR: fcmlt v29.2d, v5.2d, #16.0 -// CHECK-ARM64-ERROR: ^ -// CHECK-ARM64-ERROR: error: invalid operand for instruction -// CHECK-ARM64-ERROR: fcmlt v29.2d, v5.2d, #2 -// CHECK-ARM64-ERROR: ^ +// CHECK-ERROR: error: expected floating-point constant #0.0 +// CHECK-ERROR: fcmlt v29.2d, v5.2d, #16.0 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcmlt v29.2d, v5.2d, #2 +// CHECK-ERROR: ^ /----------------------------------------------------------------------- // Vector Integer Halving Add (Signed) @@ -1300,9 +1299,9 @@ shl v0.2d, v1.2d, #64 -// CHECK-ARM64-ERROR: error: unexpected token in argument list -// CHECK-ARM64-ERROR: shl v0.4s, v15,2s, #3 -// CHECK-ARM64-ERROR: ^ +// CHECK-ERROR: error: unexpected token in argument list +// CHECK-ERROR: shl v0.4s, v15,2s, #3 +// CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: shl v0.2d, v17.4s, #3 @@ -2633,9 +2632,9 @@ pmull2 v0.2d, v1.4s, v2.4s -// CHECK-ARM64-ERROR: error: unexpected token in argument list -// CHECK-ARM64-ERROR: pmull2 v0.4s, v1.8h v2.8h -// CHECK-ARM64-ERROR: ^ +// CHECK-ERROR: error: unexpected token in argument list +// CHECK-ERROR: pmull2 v0.4s, v1.8h v2.8h +// CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: pmull2 v0.2d, v1.4s, v2.4s @@ -2959,19 +2958,19 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: mla v0.2d, v1.2d, v16.d[1] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: mla v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: mla v0.4s, v1.4s, v2.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: mla v0.2h, v1.2h, v2.h[1] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: mla v0.4h, v1.4h, v2.h[8] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: mla v0.8h, v1.8h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -2993,19 +2992,19 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: mls v0.2d, v1.2d, v16.d[1] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: mls v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: mls v0.4s, v1.4s, v2.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: mls v0.2h, v1.2h, v2.h[1] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: mls v0.4h, v1.4h, v2.h[8] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: mls v0.8h, v1.8h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -3030,22 +3029,22 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmla v0.8h, v1.8h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmla v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmla v0.2s, v1.2s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmla v3.4s, v8.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmla v3.4s, v8.4s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmla v0.2d, v1.2d, v2.d[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmla v0.2d, v1.2d, v22.d[2] // CHECK-ERROR: ^ @@ -3064,22 +3063,22 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmls v0.8h, v1.8h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmls v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmls v0.2s, v1.2s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmls v3.4s, v8.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmls v3.4s, v8.4s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmls v0.2d, v1.2d, v2.d[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmls v0.2d, v1.2d, v22.d[2] // CHECK-ERROR: ^ @@ -3099,7 +3098,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smlal v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlal v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -3108,16 +3107,16 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smlal v0.2s, v1.2s, v2.s[1] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlal v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlal v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smlal2 v0.4h, v1.8h, v1.h[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlal2 v0.4s, v1.8h, v1.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -3126,10 +3125,10 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smlal2 v0.2s, v1.4s, v1.s[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlal2 v0.2d, v1.4s, v1.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlal2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3149,7 +3148,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smlsl v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlsl v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -3158,16 +3157,16 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smlsl v0.2s, v1.2s, v2.s[1] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlsl v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlsl v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smlsl2 v0.4h, v1.8h, v1.h[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlsl2 v0.4s, v1.8h, v1.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -3176,10 +3175,10 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smlsl2 v0.2s, v1.4s, v1.s[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlsl2 v0.2d, v1.4s, v1.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlsl2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3199,7 +3198,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umlal v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlal v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -3208,16 +3207,16 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umlal v0.2s, v1.2s, v2.s[1] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlal v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlal v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umlal2 v0.4h, v1.8h, v1.h[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlal2 v0.4s, v1.8h, v1.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -3226,10 +3225,10 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umlal2 v0.2s, v1.4s, v1.s[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlal2 v0.2d, v1.4s, v1.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlal2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3249,7 +3248,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umlsl v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlsl v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -3258,16 +3257,16 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umlsl v0.2s, v1.2s, v2.s[3] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlsl v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlsl v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umlsl2 v0.4h, v1.8h, v1.h[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlsl2 v0.4s, v1.8h, v1.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -3276,10 +3275,10 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umlsl2 v0.2s, v1.4s, v1.s[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlsl2 v0.2d, v1.4s, v1.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlsl2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3299,7 +3298,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlal v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlal v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -3308,16 +3307,16 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlal v0.2s, v1.2s, v2.s[3] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlal v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlal v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlal2 v0.4h, v1.8h, v1.h[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlal2 v0.4s, v1.8h, v1.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -3326,10 +3325,10 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlal2 v0.2s, v1.4s, v1.s[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlal2 v0.2d, v1.4s, v1.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlal2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3349,7 +3348,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlsl v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlsl v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -3358,16 +3357,16 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlsl v0.2s, v1.2s, v2.s[3] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlsl v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlsl v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlsl2 v0.4h, v1.8h, v1.h[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlsl2 v0.4s, v1.8h, v1.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -3376,10 +3375,10 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlsl2 v0.2s, v1.4s, v1.s[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlsl2 v0.2d, v1.4s, v1.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlsl2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3393,28 +3392,28 @@ mul v0.4s, v1.4s, v22.s[4] mul v0.2d, v1.2d, v2.d[1] -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: mul v0.4h, v1.4h, v2.h[8] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: mul v0.4h, v1.4h, v16.h[8] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: mul v0.8h, v1.8h, v2.h[8] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: invalid operand for instruction +// CHECK-ERROR: invalid operand for instruction // CHECK-ERROR: mul v0.8h, v1.8h, v16.h[8] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: mul v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: mul v0.2s, v1.2s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: mul v0.4s, v1.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: mul v0.4s, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3432,22 +3431,22 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmul v0.4h, v1.4h, v2.h[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmul v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmul v0.2s, v1.2s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmul v0.4s, v1.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmul v0.4s, v1.4s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmul v0.2d, v1.2d, v2.d[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmul v0.2d, v1.2d, v22.d[2] // CHECK-ERROR: ^ @@ -3462,22 +3461,22 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmulx v0.4h, v1.4h, v2.h[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmulx v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmulx v0.2s, v1.2s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmulx v0.4s, v1.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmulx v0.4s, v1.4s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmulx v0.2d, v1.2d, v2.d[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmulx v0.2d, v1.2d, v22.d[2] // CHECK-ERROR: ^ @@ -3497,7 +3496,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smull v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: smull v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -3506,16 +3505,16 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smull v0.2s, v1.2s, v2.s[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: smull v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: smull v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smull2 v0.4h, v1.8h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: smull2 v0.4s, v1.8h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -3524,10 +3523,10 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smull2 v0.2s, v1.4s, v2.s[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: smull2 v0.2d, v1.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: smull2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3547,7 +3546,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umull v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: umull v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -3556,16 +3555,16 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umull v0.2s, v1.2s, v2.s[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: umull v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: umull v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umull2 v0.4h, v1.8h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: umull2 v0.4s, v1.8h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -3574,10 +3573,10 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umull2 v0.2s, v1.4s, v2.s[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: umull2 v0.2d, v1.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: umull2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3597,7 +3596,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmull v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmull v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -3606,16 +3605,16 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmull v0.2s, v1.2s, v2.s[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmull v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmull v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmull2 v0.4h, v1.8h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmull2 v0.4s, v1.8h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -3624,10 +3623,10 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmull2 v0.2s, v1.4s, v2.s[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmull2 v0.2d, v1.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmull2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3641,28 +3640,28 @@ sqdmulh v0.4s, v1.4s, v22.s[4] sqdmulh v0.2d, v1.2d, v22.d[1] -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmulh v0.4h, v1.4h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmulh v0.4h, v1.4h, v16.h[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmulh v0.8h, v1.8h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmulh v0.8h, v1.8h, v16.h[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmulh v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmulh v0.2s, v1.2s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmulh v0.4s, v1.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmulh v0.4s, v1.4s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -3679,28 +3678,28 @@ sqrdmulh v0.4s, v1.4s, v22.s[4] sqrdmulh v0.2d, v1.2d, v22.d[1] -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqrdmulh v0.4h, v1.4h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqrdmulh v0.4h, v1.4h, v16.h[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqrdmulh v0.8h, v1.8h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqrdmulh v0.8h, v1.8h, v16.h[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqrdmulh v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqrdmulh v0.2s, v1.2s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqrdmulh v0.4s, v1.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqrdmulh v0.4s, v1.4s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -3918,13 +3917,13 @@ ld1 {v4}, [x0] ld1 {v32.16b}, [x0] ld1 {v15.8h}, [x32] -// CHECK-ARM64-ERROR: error: vector register expected +// CHECK-ERROR: error: vector register expected // CHECK-ERROR: ld1 {x3}, [x2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: ld1 {v4}, [x0] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: vector register expected +// CHECK-ERROR: error: vector register expected // CHECK-ERROR: ld1 {v32.16b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -3938,13 +3937,13 @@ ld1 {v1.8h-v1.8h}, [x0] ld1 {v15.8h-v17.4h}, [x15] ld1 {v0.8b-v2.8b, [x0] -// CHECK-ARM64-ERROR: error: registers must be sequential +// CHECK-ERROR: error: registers must be sequential // CHECK-ERROR: ld1 {v0.16b, v2.16b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid number of vectors // CHECK-ERROR: ld1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: unexpected token in argument list +// CHECK-ERROR: error: unexpected token in argument list // CHECK-ERROR: ld1 v0.8b, v1.8b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid number of vectors @@ -3953,7 +3952,7 @@ // CHECK-ERROR: error: invalid number of vectors // CHECK-ERROR: ld1 {v1.8h-v1.8h}, [x0] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: mismatched register size suffix +// CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld1 {v15.8h-v17.4h}, [x15] // CHECK-ERROR: ^ // CHECK-ERROR: error: '}' expected @@ -3965,15 +3964,15 @@ ld2 {v15.4h, v16.4h, v17.4h}, [x32] ld2 {v15.8h-v16.4h}, [x15] ld2 {v0.2d-v2.2d}, [x0] -// CHECK-ARM64-ERROR: error: mismatched register size suffix +// CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld2 {v15.8h, v16.4h}, [x15] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: registers must be sequential +// CHECK-ERROR: error: registers must be sequential // CHECK-ERROR: ld2 {v0.8b, v2.8b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: ld2 {v15.4h, v16.4h, v17.4h}, [x32] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: mismatched register size suffix +// CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld2 {v15.8h-v16.4h}, [x15] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -3985,16 +3984,16 @@ ld3 {v0.8b, v2.8b, v3.8b}, [x0] ld3 {v15.8h-v17.4h}, [x15] ld3 {v31.4s-v2.4s}, [sp] -// CHECK-ARM64-ERROR: error: mismatched register size suffix +// CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld3 {v15.8h, v16.8h, v17.4h}, [x15] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: mismatched register size suffix +// CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: registers must be sequential +// CHECK-ERROR: error: registers must be sequential // CHECK-ERROR: ld3 {v0.8b, v2.8b, v3.8b}, [x0] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: mismatched register size suffix +// CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld3 {v15.8h-v17.4h}, [x15] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -4006,16 +4005,16 @@ ld4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31] ld4 {v15.8h-v18.4h}, [x15] ld4 {v31.2s-v1.2s}, [x31] -// CHECK-ARM64-ERROR: error: mismatched register size suffix +// CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: registers must be sequential +// CHECK-ERROR: error: registers must be sequential // CHECK-ERROR: ld4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid number of vectors // CHECK-ERROR: ld4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: mismatched register size suffix +// CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld4 {v15.8h-v18.4h}, [x15] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -4026,13 +4025,13 @@ st1 {v4}, [x0] st1 {v32.16b}, [x0] st1 {v15.8h}, [x32] -// CHECK-ARM64-ERROR: error: vector register expected +// CHECK-ERROR: error: vector register expected // CHECK-ERROR: st1 {x3}, [x2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: st1 {v4}, [x0] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: vector register expected +// CHECK-ERROR: error: vector register expected // CHECK-ERROR: st1 {v32.16b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -4046,13 +4045,13 @@ st1 {v1.8h-v1.8h}, [x0] st1 {v15.8h-v17.4h}, [x15] st1 {v0.8b-v2.8b, [x0] -// CHECK-ARM64-ERROR: error: registers must be sequential +// CHECK-ERROR: error: registers must be sequential // CHECK-ERROR: st1 {v0.16b, v2.16b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid number of vectors // CHECK-ERROR: st1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: unexpected token in argument list +// CHECK-ERROR: error: unexpected token in argument list // CHECK-ERROR: st1 v0.8b, v1.8b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid number of vectors @@ -4061,7 +4060,7 @@ // CHECK-ERROR: error: invalid number of vectors // CHECK-ERROR: st1 {v1.8h-v1.8h}, [x0] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: mismatched register size suffix +// CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: st1 {v15.8h-v17.4h}, [x15] // CHECK-ERROR: ^ // CHECK-ERROR: error: '}' expected @@ -4073,16 +4072,16 @@ st2 {v15.4h, v16.4h, v17.4h}, [x30] st2 {v15.8h-v16.4h}, [x15] st2 {v0.2d-v2.2d}, [x0] -// CHECK-ARM64-ERROR: error: mismatched register size suffix +// CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: st2 {v15.8h, v16.4h}, [x15] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: registers must be sequential +// CHECK-ERROR: error: registers must be sequential // CHECK-ERROR: st2 {v0.8b, v2.8b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: st2 {v15.4h, v16.4h, v17.4h}, [x30] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: mismatched register size suffix +// CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: st2 {v15.8h-v16.4h}, [x15] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -4094,16 +4093,16 @@ st3 {v0.8b, v2.8b, v3.8b}, [x0] st3 {v15.8h-v17.4h}, [x15] st3 {v31.4s-v2.4s}, [sp] -// CHECK-ARM64-ERROR: error: mismatched register size suffix +// CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: st3 {v15.8h, v16.8h, v17.4h}, [x15] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: mismatched register size suffix +// CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: st3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: registers must be sequential +// CHECK-ERROR: error: registers must be sequential // CHECK-ERROR: st3 {v0.8b, v2.8b, v3.8b}, [x0] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: mismatched register size suffix +// CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: st3 {v15.8h-v17.4h}, [x15] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -4115,16 +4114,16 @@ st4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31] st4 {v15.8h-v18.4h}, [x15] st4 {v31.2s-v1.2s}, [x31] -// CHECK-ARM64-ERROR: error: mismatched register size suffix +// CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: st4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: registers must be sequential +// CHECK-ERROR: error: registers must be sequential // CHECK-ERROR: st4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid number of vectors // CHECK-ERROR: st4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: mismatched register size suffix +// CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: st4 {v15.8h-v18.4h}, [x15] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -4141,7 +4140,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: ld1 {v0.16b}, [x0], #8 // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: error: invalid vector kind qualifier // CHECK-ERROR: ld1 {v0.8h, v1.16h}, [x0], x1 // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -4157,7 +4156,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: ld3 {v5.2s, v6.2s, v7.2s}, [x1], #48 // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: mismatched register size suffix +// CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld4 {v31.2d, v0.2d, v1.2d, v2.1d}, [x3], x1 // CHECK-ERROR: ^ @@ -4167,7 +4166,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: st1 {v0.16b}, [x0], #8 // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: error: invalid vector kind qualifier // CHECK-ERROR: st1 {v0.8h, v1.16h}, [x0], x1 // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -4183,7 +4182,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: st3 {v5.2s, v6.2s, v7.2s}, [x1], #48 // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: mismatched register size suffix +// CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: st4 {v31.2d, v0.2d, v1.2d, v2.1d}, [x3], x1 // CHECK-ERROR: ^ @@ -4195,16 +4194,16 @@ ld2r {v31.4s, v0.2s}, [sp] ld3r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0] ld4r {v31.2s, v0.2s, v1.2d, v2.2s}, [sp] -// CHECK-ARM64-ERROR: error: vector register expected +// CHECK-ERROR: error: vector register expected // CHECK-ERROR: ld1r {x1}, [x0] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: mismatched register size suffix +// CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld2r {v31.4s, v0.2s}, [sp] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: ld3r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: mismatched register size suffix +// CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld4r {v31.2s, v0.2s, v1.2d, v2.2s}, [sp] // CHECK-ERROR: ^ @@ -4216,16 +4215,16 @@ ld2 {v15.h, v16.h}[8], [x15] ld3 {v31.s, v0.s, v1.s}[-1], [sp] ld4 {v0.d, v1.d, v2.d, v3.d}[2], [x0] -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: ld1 {v0.b}[16], [x0] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: ld2 {v15.h, v16.h}[8], [x15] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: vector lane must be an integer in range +// CHECK-ERROR: error: vector lane must be an integer in range // CHECK-ERROR: ld3 {v31.s, v0.s, v1.s}[-1], [sp] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: ld4 {v0.d, v1.d, v2.d, v3.d}[2], [x0] // CHECK-ERROR: ^ @@ -4233,16 +4232,16 @@ st2 {v31.s, v0.s}[3], [8] st3 {v15.h, v16.h, v17.h}[-1], [x15] st4 {v0.d, v1.d, v2.d, v3.d}[2], [x0] -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: st1 {v0.d}[16], [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: st2 {v31.s, v0.s}[3], [8] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: vector lane must be an integer in range +// CHECK-ERROR: error: vector lane must be an integer in range // CHECK-ERROR: st3 {v15.h, v16.h, v17.h}[-1], [x15] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: st4 {v0.d, v1.d, v2.d, v3.d}[2], [x0] // CHECK-ERROR: ^ @@ -4281,7 +4280,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: ld2 {v15.h, v16.h}[0], [x15], #3 // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: mismatched register size suffix +// CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld3 {v31.s, v0.s, v1.d}[0], [sp], x9 // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -4315,16 +4314,16 @@ ins v20.s[1], s30 ins v1.d[0], d7 -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: ins v2.b[16], w1 // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: ins v7.h[8], w14 // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: ins v20.s[5], w30 // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: ins v1.d[2], x7 // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -4351,19 +4350,19 @@ smov x14, v6.d[1] smov x20, v9.d[0] -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR smov w1, v0.b[16] // CHECK-ERROR ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR smov w14, v6.h[8] // CHECK-ERROR ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR smov x1, v0.b[16] // CHECK-ERROR ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR smov x14, v6.h[8] // CHECK-ERROR ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR smov x20, v9.s[5] // CHECK-ERROR ^ // CHECK-ERROR error: invalid operand for instruction @@ -4390,16 +4389,16 @@ umov s20, v9.s[2] umov d7, v18.d[1] -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR umov w1, v0.b[16] // CHECK-ERROR ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR umov w14, v6.h[8] // CHECK-ERROR ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR umov w20, v9.s[5] // CHECK-ERROR ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR umov x7, v18.d[3] // CHECK-ERROR ^ // CHECK-ERROR error: invalid operand for instruction @@ -4815,7 +4814,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlal s17, h27, s12 // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlal d19, s24, d12 // CHECK-ERROR: ^ @@ -4829,7 +4828,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlsl s14, h12, s25 // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlsl d12, s23, d13 // CHECK-ERROR: ^ @@ -4843,7 +4842,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmull s12, h22, s12 // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmull d15, s22, d12 // CHECK-ERROR: ^ @@ -6885,7 +6884,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmul h0, h1, v1.s[0] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmul s2, s29, v10.s[4] // CHECK-ERROR: ^ @@ -6904,7 +6903,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmulx h0, h1, v1.d[0] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmulx d2, d29, v10.d[3] // CHECK-ERROR: ^ @@ -6923,7 +6922,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmla d30, s11, v1.d[1] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmla s16, s22, v16.s[5] // CHECK-ERROR: ^ @@ -6942,7 +6941,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmls h7, h17, v26.s[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: error: vector lane must be an integer in range [0, 1] +// CHECK-ERROR: error: vector lane must be an integer in range [0, 1] // CHECK-ERROR: fmls d16, d22, v16.d[-1] // CHECK-ERROR: ^ @@ -6964,7 +6963,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlal s8, s9, v14.s[1] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlal d4, s5, v1.s[5] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -6989,7 +6988,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlsl d1, h1, v13.s[0] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlsl d1, s1, v13.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -7016,7 +7015,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmull s1, s1, v4.s[0] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmull s12, h17, v9.h[9] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -7041,7 +7040,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmulh s25, s26, v27.h[3] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmulh s25, s26, v27.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -7066,7 +7065,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqrdmulh s5, h6, v7.s[2] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqrdmulh h31, h30, v14.h[9] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -7098,16 +7097,16 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: dup d0, v17.s[3] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: dup d0, v17.d[4] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: dup s0, v1.s[7] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: dup h0, v31.h[16] // CHECK-ERROR: ^ -// CHECK-ARM64-ERROR: vector lane must be an integer in range +// CHECK-ERROR: vector lane must be an integer in range // CHECK-ERROR: dup b1, v3.b[16] // CHECK-ERROR: ^ diff --git a/test/MC/AArch64/neon-facge-facgt.s b/test/MC/AArch64/neon-facge-facgt.s index 799b85ff42f5..212eda2f2092 100644 --- a/test/MC/AArch64/neon-facge-facgt.s +++ b/test/MC/AArch64/neon-facge-facgt.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-frsqrt-frecp.s b/test/MC/AArch64/neon-frsqrt-frecp.s index 56bc47154a06..79fe5da5e76f 100644 --- a/test/MC/AArch64/neon-frsqrt-frecp.s +++ b/test/MC/AArch64/neon-frsqrt-frecp.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-halving-add-sub.s b/test/MC/AArch64/neon-halving-add-sub.s index 19b56ced3e6a..555f1b83b4f3 100644 --- a/test/MC/AArch64/neon-halving-add-sub.s +++ b/test/MC/AArch64/neon-halving-add-sub.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-max-min-pairwise.s b/test/MC/AArch64/neon-max-min-pairwise.s index e48f97535865..8d2dadb1997f 100644 --- a/test/MC/AArch64/neon-max-min-pairwise.s +++ b/test/MC/AArch64/neon-max-min-pairwise.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-max-min.s b/test/MC/AArch64/neon-max-min.s index 8cc4ac86e650..6d1efde5077f 100644 --- a/test/MC/AArch64/neon-max-min.s +++ b/test/MC/AArch64/neon-max-min.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-mla-mls-instructions.s b/test/MC/AArch64/neon-mla-mls-instructions.s index 5c8b7d8788a4..3072e6f1200d 100644 --- a/test/MC/AArch64/neon-mla-mls-instructions.s +++ b/test/MC/AArch64/neon-mla-mls-instructions.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-mov.s b/test/MC/AArch64/neon-mov.s index 6231ffe49c51..567a5ecc5412 100644 --- a/test/MC/AArch64/neon-mov.s +++ b/test/MC/AArch64/neon-mov.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-mul-div-instructions.s b/test/MC/AArch64/neon-mul-div-instructions.s index 2601d50f1319..1fe6d2b819ce 100644 --- a/test/MC/AArch64/neon-mul-div-instructions.s +++ b/test/MC/AArch64/neon-mul-div-instructions.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-rounding-halving-add.s b/test/MC/AArch64/neon-rounding-halving-add.s index 55c9f921da75..47ac21268020 100644 --- a/test/MC/AArch64/neon-rounding-halving-add.s +++ b/test/MC/AArch64/neon-rounding-halving-add.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-rounding-shift.s b/test/MC/AArch64/neon-rounding-shift.s index 38924e7c4bd9..e70f766f2b62 100644 --- a/test/MC/AArch64/neon-rounding-shift.s +++ b/test/MC/AArch64/neon-rounding-shift.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-saturating-add-sub.s b/test/MC/AArch64/neon-saturating-add-sub.s index d39997901f7b..4a7ed1094262 100644 --- a/test/MC/AArch64/neon-saturating-add-sub.s +++ b/test/MC/AArch64/neon-saturating-add-sub.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-saturating-rounding-shift.s b/test/MC/AArch64/neon-saturating-rounding-shift.s index 702b9d2c60e7..9215c1cabefd 100644 --- a/test/MC/AArch64/neon-saturating-rounding-shift.s +++ b/test/MC/AArch64/neon-saturating-rounding-shift.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-saturating-shift.s b/test/MC/AArch64/neon-saturating-shift.s index d03172b1788e..9ae393a040b6 100644 --- a/test/MC/AArch64/neon-saturating-shift.s +++ b/test/MC/AArch64/neon-saturating-shift.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-abs.s b/test/MC/AArch64/neon-scalar-abs.s index 897c93506e1b..d08756c0c10c 100644 --- a/test/MC/AArch64/neon-scalar-abs.s +++ b/test/MC/AArch64/neon-scalar-abs.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-add-sub.s b/test/MC/AArch64/neon-scalar-add-sub.s index 955c30716b4e..0a3eba732122 100644 --- a/test/MC/AArch64/neon-scalar-add-sub.s +++ b/test/MC/AArch64/neon-scalar-add-sub.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ // Scalar Integer Add diff --git a/test/MC/AArch64/neon-scalar-by-elem-mla.s b/test/MC/AArch64/neon-scalar-by-elem-mla.s index d4f3682dc2bc..fec9d12d8b8d 100644 --- a/test/MC/AArch64/neon-scalar-by-elem-mla.s +++ b/test/MC/AArch64/neon-scalar-by-elem-mla.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ // Floating Point fused multiply-add (scalar, by element) diff --git a/test/MC/AArch64/neon-scalar-by-elem-mul.s b/test/MC/AArch64/neon-scalar-by-elem-mul.s index d22aa9b15b29..8b8a3f57a9ca 100644 --- a/test/MC/AArch64/neon-scalar-by-elem-mul.s +++ b/test/MC/AArch64/neon-scalar-by-elem-mul.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ // Floating Point multiply (scalar, by element) diff --git a/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s b/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s index dadb8db99368..e3d7e0514f9f 100644 --- a/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s +++ b/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //----------------------------------------------------------------------------- // Signed saturating doubling multiply-add long (scalar, by element) diff --git a/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s b/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s index 90eeb5e64c01..8a8405ef282e 100644 --- a/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s +++ b/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //----------------------------------------------------------------------------- // Signed saturating doubling multiply long (scalar, by element) diff --git a/test/MC/AArch64/neon-scalar-compare.s b/test/MC/AArch64/neon-scalar-compare.s index 16ba92e07974..28de46a7733a 100644 --- a/test/MC/AArch64/neon-scalar-compare.s +++ b/test/MC/AArch64/neon-scalar-compare.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-cvt.s b/test/MC/AArch64/neon-scalar-cvt.s index 047495276fb4..97416daf0801 100644 --- a/test/MC/AArch64/neon-scalar-cvt.s +++ b/test/MC/AArch64/neon-scalar-cvt.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-dup.s b/test/MC/AArch64/neon-scalar-dup.s index ba4f3c2ad797..db11ea2aa086 100644 --- a/test/MC/AArch64/neon-scalar-dup.s +++ b/test/MC/AArch64/neon-scalar-dup.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ // Duplicate element (scalar) diff --git a/test/MC/AArch64/neon-scalar-extract-narrow.s b/test/MC/AArch64/neon-scalar-extract-narrow.s index e6167930d1ca..e25224e386f0 100644 --- a/test/MC/AArch64/neon-scalar-extract-narrow.s +++ b/test/MC/AArch64/neon-scalar-extract-narrow.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-fp-compare.s b/test/MC/AArch64/neon-scalar-fp-compare.s index cb9e7a7a66e0..b798b3410670 100644 --- a/test/MC/AArch64/neon-scalar-fp-compare.s +++ b/test/MC/AArch64/neon-scalar-fp-compare.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-mul.s b/test/MC/AArch64/neon-scalar-mul.s index 21be537cbb7c..e33bdad91a94 100644 --- a/test/MC/AArch64/neon-scalar-mul.s +++ b/test/MC/AArch64/neon-scalar-mul.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-neg.s b/test/MC/AArch64/neon-scalar-neg.s index e902c2307a1d..8e5d61dd2459 100644 --- a/test/MC/AArch64/neon-scalar-neg.s +++ b/test/MC/AArch64/neon-scalar-neg.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-recip.s b/test/MC/AArch64/neon-scalar-recip.s index dde26b557be5..7a886f3b4a73 100644 --- a/test/MC/AArch64/neon-scalar-recip.s +++ b/test/MC/AArch64/neon-scalar-recip.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-reduce-pairwise.s b/test/MC/AArch64/neon-scalar-reduce-pairwise.s index cb7564ac68d1..403a940ec2f2 100644 --- a/test/MC/AArch64/neon-scalar-reduce-pairwise.s +++ b/test/MC/AArch64/neon-scalar-reduce-pairwise.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //---------------------------------------------------------------------- // Scalar Reduce Add Pairwise (Integer) diff --git a/test/MC/AArch64/neon-scalar-rounding-shift.s b/test/MC/AArch64/neon-scalar-rounding-shift.s index 2594c2f2ac54..6113e09af388 100644 --- a/test/MC/AArch64/neon-scalar-rounding-shift.s +++ b/test/MC/AArch64/neon-scalar-rounding-shift.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ diff --git a/test/MC/AArch64/neon-scalar-saturating-add-sub.s b/test/MC/AArch64/neon-scalar-saturating-add-sub.s index d5cd838a92bb..0bf243495999 100644 --- a/test/MC/AArch64/neon-scalar-saturating-add-sub.s +++ b/test/MC/AArch64/neon-scalar-saturating-add-sub.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ // Scalar Integer Saturating Add (Signed) diff --git a/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s b/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s index 83bd59f50c84..b09a58923445 100644 --- a/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s +++ b/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ // Scalar Integer Saturating Rounding Shift Lef (Signed) diff --git a/test/MC/AArch64/neon-scalar-saturating-shift.s b/test/MC/AArch64/neon-scalar-saturating-shift.s index 679f1f4052c9..b53c9f072f35 100644 --- a/test/MC/AArch64/neon-scalar-saturating-shift.s +++ b/test/MC/AArch64/neon-scalar-saturating-shift.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ // Scalar Integer Saturating Shift Lef (Signed) diff --git a/test/MC/AArch64/neon-scalar-shift-imm.s b/test/MC/AArch64/neon-scalar-shift-imm.s index 47a8dec212b1..96cb815eafa8 100644 --- a/test/MC/AArch64/neon-scalar-shift-imm.s +++ b/test/MC/AArch64/neon-scalar-shift-imm.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-shift.s b/test/MC/AArch64/neon-scalar-shift.s index 98aa51a63da2..366840a93159 100644 --- a/test/MC/AArch64/neon-scalar-shift.s +++ b/test/MC/AArch64/neon-scalar-shift.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ // Scalar Integer Shift Lef (Signed) diff --git a/test/MC/AArch64/neon-shift-left-long.s b/test/MC/AArch64/neon-shift-left-long.s index 87204683104e..97604587424e 100644 --- a/test/MC/AArch64/neon-shift-left-long.s +++ b/test/MC/AArch64/neon-shift-left-long.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-shift.s b/test/MC/AArch64/neon-shift.s index dcff992a7824..614e6de16222 100644 --- a/test/MC/AArch64/neon-shift.s +++ b/test/MC/AArch64/neon-shift.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-simd-copy.s b/test/MC/AArch64/neon-simd-copy.s index 917f7cb524ed..4837a4cb9ee8 100644 --- a/test/MC/AArch64/neon-simd-copy.s +++ b/test/MC/AArch64/neon-simd-copy.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-simd-shift.s b/test/MC/AArch64/neon-simd-shift.s index 1c1ad7489d59..a16432324efc 100644 --- a/test/MC/AArch64/neon-simd-shift.s +++ b/test/MC/AArch64/neon-simd-shift.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-sxtl.s b/test/MC/AArch64/neon-sxtl.s index 363796ee3341..0fe26cb5e8e5 100644 --- a/test/MC/AArch64/neon-sxtl.s +++ b/test/MC/AArch64/neon-sxtl.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-uxtl.s b/test/MC/AArch64/neon-uxtl.s index 46c56625c0f7..685b6362bcb1 100644 --- a/test/MC/AArch64/neon-uxtl.s +++ b/test/MC/AArch64/neon-uxtl.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/noneon-diagnostics.s b/test/MC/AArch64/noneon-diagnostics.s index 470a74d5b317..60a5fd208af9 100644 --- a/test/MC/AArch64/noneon-diagnostics.s +++ b/test/MC/AArch64/noneon-diagnostics.s @@ -1,48 +1,29 @@ -// RUN: not llvm-mc -triple arm64-none-linux-gnu -mattr=-neon < %s 2> %t -// RUN: FileCheck --check-prefix=CHECK-ARM64-ERROR < %t %s +// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=-neon < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s fmla v3.4s, v12.4s, v17.4s fmla v1.2d, v30.2d, v20.2d fmla v9.2s, v9.2s, v0.2s -// CHECK-ERROR: error: instruction requires a CPU feature not currently enabled +// CHECK-ERROR: error: instruction requires: neon // CHECK-ERROR-NEXT: fmla v3.4s, v12.4s, v17.4s // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-NEXT: error: instruction requires a CPU feature not currently enabled +// CHECK-ERROR-NEXT: error: instruction requires: neon // CHECK-ERROR-NEXT: fmla v1.2d, v30.2d, v20.2d // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-NEXT: error: instruction requires a CPU feature not currently enabled +// CHECK-ERROR-NEXT: error: instruction requires: neon // CHECK-ERROR-NEXT: fmla v9.2s, v9.2s, v0.2s // CHECK-ERROR-NEXT: ^ -// CHECK-ARM64-ERROR: error: instruction requires: neon -// CHECK-ARM64-ERROR-NEXT: fmla v3.4s, v12.4s, v17.4s -// CHECK-ARM64-ERROR-NEXT: ^ -// CHECK-ARM64-ERROR-NEXT: error: instruction requires: neon -// CHECK-ARM64-ERROR-NEXT: fmla v1.2d, v30.2d, v20.2d -// CHECK-ARM64-ERROR-NEXT: ^ -// CHECK-ARM64-ERROR-NEXT: error: instruction requires: neon -// CHECK-ARM64-ERROR-NEXT: fmla v9.2s, v9.2s, v0.2s -// CHECK-ARM64-ERROR-NEXT: ^ - fmls v3.4s, v12.4s, v17.4s fmls v1.2d, v30.2d, v20.2d fmls v9.2s, v9.2s, v0.2s -// CHECK-ERROR: error: instruction requires a CPU feature not currently enabled + +// CHECK-ERROR: error: instruction requires: neon // CHECK-ERROR-NEXT: fmls v3.4s, v12.4s, v17.4s // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-NEXT: error: instruction requires a CPU feature not currently enabled +// CHECK-ERROR-NEXT: error: instruction requires: neon // CHECK-ERROR-NEXT: fmls v1.2d, v30.2d, v20.2d // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-NEXT: error: instruction requires a CPU feature not currently enabled +// CHECK-ERROR-NEXT: error: instruction requires: neon // CHECK-ERROR-NEXT: fmls v9.2s, v9.2s, v0.2s // CHECK-ERROR-NEXT: ^ - -// CHECK-ARM64-ERROR: error: instruction requires: neon -// CHECK-ARM64-ERROR-NEXT: fmls v3.4s, v12.4s, v17.4s -// CHECK-ARM64-ERROR-NEXT: ^ -// CHECK-ARM64-ERROR-NEXT: error: instruction requires: neon -// CHECK-ARM64-ERROR-NEXT: fmls v1.2d, v30.2d, v20.2d -// CHECK-ARM64-ERROR-NEXT: ^ -// CHECK-ARM64-ERROR-NEXT: error: instruction requires: neon -// CHECK-ARM64-ERROR-NEXT: fmls v9.2s, v9.2s, v0.2s -// CHECK-ARM64-ERROR-NEXT: ^ diff --git a/test/MC/AArch64/optional-hash.s b/test/MC/AArch64/optional-hash.s index 7ae1aa490476..3922b5be34a1 100644 --- a/test/MC/AArch64/optional-hash.s +++ b/test/MC/AArch64/optional-hash.s @@ -1,5 +1,5 @@ // PR18929 -// RUN: llvm-mc < %s -triple=arm64-linux-gnueabi -mattr=+fp-armv8,+neon -filetype=obj -o - \ +// RUN: llvm-mc < %s -triple=aarch64-linux-gnueabi -mattr=+fp-armv8,+neon -filetype=obj -o - \ // RUN: | llvm-objdump --disassemble -arch=arm64 -mattr=+fp-armv8,+neon - | FileCheck %s .text diff --git a/test/MC/AArch64/tls-relocs.s b/test/MC/AArch64/tls-relocs.s index ae7b20cefd56..ebf02167a8f3 100644 --- a/test/MC/AArch64/tls-relocs.s +++ b/test/MC/AArch64/tls-relocs.s @@ -1,5 +1,5 @@ -// RUN: llvm-mc -triple=arm64-none-linux-gnu -show-encoding < %s | FileCheck %s --check-prefix=CHECK-ARM64 -// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj < %s -o - | \ +// RUN: llvm-mc -triple=aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj < %s -o - | \ // RUN: llvm-readobj -r -t | FileCheck --check-prefix=CHECK-ELF %s // TLS local-dynamic forms @@ -8,14 +8,14 @@ movz x3, #:dtprel_g2:var movn x4, #:dtprel_g2:var -// CHECK-ARM64: movz x1, #:dtprel_g2:var // encoding: [0bAAA00001,A,0b110AAAAA,0x92] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_arm64_movw -// CHECK-ARM64: movn x2, #:dtprel_g2:var // encoding: [0bAAA00010,A,0b110AAAAA,0x92] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_arm64_movw -// CHECK-ARM64: movz x3, #:dtprel_g2:var // encoding: [0bAAA00011,A,0b110AAAAA,0x92] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_arm64_movw -// CHECK-ARM64: movn x4, #:dtprel_g2:var // encoding: [0bAAA00100,A,0b110AAAAA,0x92] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_arm64_movw +// CHECK: movz x1, #:dtprel_g2:var // encoding: [0bAAA00001,A,0b110AAAAA,0x92] +// CHECK: // fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_aarch64_movw +// CHECK: movn x2, #:dtprel_g2:var // encoding: [0bAAA00010,A,0b110AAAAA,0x92] +// CHECK: // fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_aarch64_movw +// CHECK: movz x3, #:dtprel_g2:var // encoding: [0bAAA00011,A,0b110AAAAA,0x92] +// CHECK: // fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_aarch64_movw +// CHECK: movn x4, #:dtprel_g2:var // encoding: [0bAAA00100,A,0b110AAAAA,0x92] +// CHECK: // fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_aarch64_movw // CHECK-ELF: Relocations [ // CHECK-ELF-NEXT: Section (2) .rela.text { @@ -30,14 +30,14 @@ movz w7, #:dtprel_g1:var movn w8, #:dtprel_g1:var -// CHECK-ARM64: movz x5, #:dtprel_g1:var // encoding: [0bAAA00101,A,0b101AAAAA,0x92] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_arm64_movw -// CHECK-ARM64: movn x6, #:dtprel_g1:var // encoding: [0bAAA00110,A,0b101AAAAA,0x92] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_arm64_movw -// CHECK-ARM64: movz w7, #:dtprel_g1:var // encoding: [0bAAA00111,A,0b101AAAAA,0x12] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_arm64_movw -// CHECK-ARM64: movn w8, #:dtprel_g1:var // encoding: [0bAAA01000,A,0b101AAAAA,0x12] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_arm64_movw +// CHECK: movz x5, #:dtprel_g1:var // encoding: [0bAAA00101,A,0b101AAAAA,0x92] +// CHECK: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw +// CHECK: movn x6, #:dtprel_g1:var // encoding: [0bAAA00110,A,0b101AAAAA,0x92] +// CHECK: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw +// CHECK: movz w7, #:dtprel_g1:var // encoding: [0bAAA00111,A,0b101AAAAA,0x12] +// CHECK: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw +// CHECK: movn w8, #:dtprel_g1:var // encoding: [0bAAA01000,A,0b101AAAAA,0x12] +// CHECK: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw // CHECK-ELF-NEXT: 0x10 R_AARCH64_TLSLD_MOVW_DTPREL_G1 [[VARSYM]] // CHECK-ELF-NEXT: 0x14 R_AARCH64_TLSLD_MOVW_DTPREL_G1 [[VARSYM]] @@ -48,10 +48,10 @@ movk x9, #:dtprel_g1_nc:var movk w10, #:dtprel_g1_nc:var -// CHECK-ARM64: movk x9, #:dtprel_g1_nc:var // encoding: [0bAAA01001,A,0b101AAAAA,0xf2] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_arm64_movw -// CHECK-ARM64: movk w10, #:dtprel_g1_nc:var // encoding: [0bAAA01010,A,0b101AAAAA,0x72] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_arm64_movw +// CHECK: movk x9, #:dtprel_g1_nc:var // encoding: [0bAAA01001,A,0b101AAAAA,0xf2] +// CHECK: // fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_aarch64_movw +// CHECK: movk w10, #:dtprel_g1_nc:var // encoding: [0bAAA01010,A,0b101AAAAA,0x72] +// CHECK: // fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_aarch64_movw // CHECK-ELF-NEXT: 0x20 R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC [[VARSYM]] // CHECK-ELF-NEXT: 0x24 R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC [[VARSYM]] @@ -62,14 +62,14 @@ movz w13, #:dtprel_g0:var movn w14, #:dtprel_g0:var -// CHECK-ARM64: movz x11, #:dtprel_g0:var // encoding: [0bAAA01011,A,0b100AAAAA,0x92] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_arm64_movw -// CHECK-ARM64: movn x12, #:dtprel_g0:var // encoding: [0bAAA01100,A,0b100AAAAA,0x92] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_arm64_movw -// CHECK-ARM64: movz w13, #:dtprel_g0:var // encoding: [0bAAA01101,A,0b100AAAAA,0x12] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_arm64_movw -// CHECK-ARM64: movn w14, #:dtprel_g0:var // encoding: [0bAAA01110,A,0b100AAAAA,0x12] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_arm64_movw +// CHECK: movz x11, #:dtprel_g0:var // encoding: [0bAAA01011,A,0b100AAAAA,0x92] +// CHECK: // fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw +// CHECK: movn x12, #:dtprel_g0:var // encoding: [0bAAA01100,A,0b100AAAAA,0x92] +// CHECK: // fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw +// CHECK: movz w13, #:dtprel_g0:var // encoding: [0bAAA01101,A,0b100AAAAA,0x12] +// CHECK: // fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw +// CHECK: movn w14, #:dtprel_g0:var // encoding: [0bAAA01110,A,0b100AAAAA,0x12] +// CHECK: // fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw // CHECK-ELF-NEXT: 0x28 R_AARCH64_TLSLD_MOVW_DTPREL_G0 [[VARSYM]] // CHECK-ELF-NEXT: 0x2C R_AARCH64_TLSLD_MOVW_DTPREL_G0 [[VARSYM]] @@ -80,10 +80,10 @@ movk x15, #:dtprel_g0_nc:var movk w16, #:dtprel_g0_nc:var -// CHECK-ARM64: movk x15, #:dtprel_g0_nc:var // encoding: [0bAAA01111,A,0b100AAAAA,0xf2] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_arm64_movw -// CHECK-ARM64: movk w16, #:dtprel_g0_nc:var // encoding: [0bAAA10000,A,0b100AAAAA,0x72] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_arm64_movw +// CHECK: movk x15, #:dtprel_g0_nc:var // encoding: [0bAAA01111,A,0b100AAAAA,0xf2] +// CHECK: // fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_aarch64_movw +// CHECK: movk w16, #:dtprel_g0_nc:var // encoding: [0bAAA10000,A,0b100AAAAA,0x72] +// CHECK: // fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_aarch64_movw // CHECK-ELF-NEXT: 0x38 R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC [[VARSYM]] // CHECK-ELF-NEXT: 0x3C R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC [[VARSYM]] @@ -92,10 +92,10 @@ add x17, x18, #:dtprel_hi12:var, lsl #12 add w19, w20, #:dtprel_hi12:var, lsl #12 -// CHECK-ARM64: add x17, x18, :dtprel_hi12:var, lsl #12 // encoding: [0x51,0bAAAAAA10,0b00AAAAAA,0x91] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_arm64_add_imm12 -// CHECK-ARM64: add w19, w20, :dtprel_hi12:var, lsl #12 // encoding: [0x93,0bAAAAAA10,0b00AAAAAA,0x11] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_arm64_add_imm12 +// CHECK: add x17, x18, :dtprel_hi12:var, lsl #12 // encoding: [0x51,0bAAAAAA10,0b00AAAAAA,0x91] +// CHECK: // fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_aarch64_add_imm12 +// CHECK: add w19, w20, :dtprel_hi12:var, lsl #12 // encoding: [0x93,0bAAAAAA10,0b00AAAAAA,0x11] +// CHECK: // fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_aarch64_add_imm12 // CHECK-ELF-NEXT: 0x40 R_AARCH64_TLSLD_ADD_DTPREL_HI12 [[VARSYM]] // CHECK-ELF-NEXT: 0x44 R_AARCH64_TLSLD_ADD_DTPREL_HI12 [[VARSYM]] @@ -104,10 +104,10 @@ add x21, x22, #:dtprel_lo12:var add w23, w24, #:dtprel_lo12:var -// CHECK-ARM64: add x21, x22, :dtprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_add_imm12 -// CHECK-ARM64: add w23, w24, :dtprel_lo12:var // encoding: [0x17,0bAAAAAA11,0b00AAAAAA,0x11] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_add_imm12 +// CHECK: add x21, x22, :dtprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91] +// CHECK: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_add_imm12 +// CHECK: add w23, w24, :dtprel_lo12:var // encoding: [0x17,0bAAAAAA11,0b00AAAAAA,0x11] +// CHECK: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_add_imm12 // CHECK-ELF-NEXT: 0x48 R_AARCH64_TLSLD_ADD_DTPREL_LO12 [[VARSYM]] // CHECK-ELF-NEXT: 0x4C R_AARCH64_TLSLD_ADD_DTPREL_LO12 [[VARSYM]] @@ -116,10 +116,10 @@ add x25, x26, #:dtprel_lo12_nc:var add w27, w28, #:dtprel_lo12_nc:var -// CHECK-ARM64: add x25, x26, :dtprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_add_imm12 -// CHECK-ARM64: add w27, w28, :dtprel_lo12_nc:var // encoding: [0x9b,0bAAAAAA11,0b00AAAAAA,0x11] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_add_imm12 +// CHECK: add x25, x26, :dtprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91] +// CHECK: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_add_imm12 +// CHECK: add w27, w28, :dtprel_lo12_nc:var // encoding: [0x9b,0bAAAAAA11,0b00AAAAAA,0x11] +// CHECK: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_add_imm12 // CHECK-ELF-NEXT: 0x50 R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC [[VARSYM]] // CHECK-ELF-NEXT: 0x54 R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC [[VARSYM]] @@ -128,10 +128,10 @@ ldrb w29, [x30, #:dtprel_lo12:var] ldrsb x29, [x28, #:dtprel_lo12_nc:var] -// CHECK-ARM64: ldrb w29, [x30, :dtprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale1 -// CHECK-ARM64: ldrsb x29, [x28, :dtprel_lo12_nc:var] // encoding: [0x9d,0bAAAAAA11,0b10AAAAAA,0x39] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale1 +// CHECK: ldrb w29, [x30, :dtprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39] +// CHECK: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale1 +// CHECK: ldrsb x29, [x28, :dtprel_lo12_nc:var] // encoding: [0x9d,0bAAAAAA11,0b10AAAAAA,0x39] +// CHECK: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale1 // CHECK-ELF-NEXT: 0x58 R_AARCH64_TLSLD_LDST8_DTPREL_LO12 [[VARSYM]] // CHECK-ELF-NEXT: 0x5C R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC [[VARSYM]] @@ -140,10 +140,10 @@ strh w27, [x26, #:dtprel_lo12:var] ldrsh x25, [x24, #:dtprel_lo12_nc:var] -// CHECK-ARM64: strh w27, [x26, :dtprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale2 -// CHECK-ARM64: ldrsh x25, [x24, :dtprel_lo12_nc:var] // encoding: [0x19,0bAAAAAA11,0b10AAAAAA,0x79] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale2 +// CHECK: strh w27, [x26, :dtprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79] +// CHECK: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale2 +// CHECK: ldrsh x25, [x24, :dtprel_lo12_nc:var] // encoding: [0x19,0bAAAAAA11,0b10AAAAAA,0x79] +// CHECK: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale2 // CHECK-ELF-NEXT: 0x60 R_AARCH64_TLSLD_LDST16_DTPREL_LO12 [[VARSYM]] // CHECK-ELF-NEXT: 0x64 R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC [[VARSYM]] @@ -152,10 +152,10 @@ ldr w23, [x22, #:dtprel_lo12:var] ldrsw x21, [x20, #:dtprel_lo12_nc:var] -// CHECK-ARM64: ldr w23, [x22, :dtprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale4 -// CHECK-ARM64: ldrsw x21, [x20, :dtprel_lo12_nc:var] // encoding: [0x95,0bAAAAAA10,0b10AAAAAA,0xb9] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale4 +// CHECK: ldr w23, [x22, :dtprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9] +// CHECK: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale4 +// CHECK: ldrsw x21, [x20, :dtprel_lo12_nc:var] // encoding: [0x95,0bAAAAAA10,0b10AAAAAA,0xb9] +// CHECK: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale4 // CHECK-ELF-NEXT: 0x68 R_AARCH64_TLSLD_LDST32_DTPREL_LO12 [[VARSYM]] // CHECK-ELF-NEXT: 0x6C R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC [[VARSYM]] @@ -164,10 +164,10 @@ ldr x19, [x18, #:dtprel_lo12:var] str x17, [x16, #:dtprel_lo12_nc:var] -// CHECK-ARM64: ldr x19, [x18, :dtprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale8 -// CHECK-ARM64: str x17, [x16, :dtprel_lo12_nc:var] // encoding: [0x11,0bAAAAAA10,0b00AAAAAA,0xf9] -// CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale8 +// CHECK: ldr x19, [x18, :dtprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9] +// CHECK: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8 +// CHECK: str x17, [x16, :dtprel_lo12_nc:var] // encoding: [0x11,0bAAAAAA10,0b00AAAAAA,0xf9] +// CHECK: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale8 // CHECK-ELF-NEXT: 0x70 R_AARCH64_TLSLD_LDST64_DTPREL_LO12 [[VARSYM]] // CHECK-ELF-NEXT: 0x74 R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC [[VARSYM]] @@ -177,10 +177,10 @@ movz x15, #:gottprel_g1:var movz w14, #:gottprel_g1:var -// CHECK-ARM64: movz x15, #:gottprel_g1:var // encoding: [0bAAA01111,A,0b101AAAAA,0x92] -// CHECK-ARM64: // fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_arm64_movw -// CHECK-ARM64: movz w14, #:gottprel_g1:var // encoding: [0bAAA01110,A,0b101AAAAA,0x12] -// CHECK-ARM64: // fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_arm64_movw +// CHECK: movz x15, #:gottprel_g1:var // encoding: [0bAAA01111,A,0b101AAAAA,0x92] +// CHECK: // fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_aarch64_movw +// CHECK: movz w14, #:gottprel_g1:var // encoding: [0bAAA01110,A,0b101AAAAA,0x12] +// CHECK: // fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_aarch64_movw // CHECK-ELF-NEXT: 0x78 R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 [[VARSYM]] // CHECK-ELF-NEXT: 0x7C R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 [[VARSYM]] @@ -189,10 +189,10 @@ movk x13, #:gottprel_g0_nc:var movk w12, #:gottprel_g0_nc:var -// CHECK-ARM64: movk x13, #:gottprel_g0_nc:var // encoding: [0bAAA01101,A,0b100AAAAA,0xf2] -// CHECK-ARM64: // fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_arm64_movw -// CHECK-ARM64: movk w12, #:gottprel_g0_nc:var // encoding: [0bAAA01100,A,0b100AAAAA,0x72] -// CHECK-ARM64: // fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_arm64_movw +// CHECK: movk x13, #:gottprel_g0_nc:var // encoding: [0bAAA01101,A,0b100AAAAA,0xf2] +// CHECK: // fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_aarch64_movw +// CHECK: movk w12, #:gottprel_g0_nc:var // encoding: [0bAAA01100,A,0b100AAAAA,0x72] +// CHECK: // fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_aarch64_movw // CHECK-ELF-NEXT: 0x80 R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC [[VARSYM]] // CHECK-ELF-NEXT: 0x84 R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC [[VARSYM]] @@ -202,12 +202,12 @@ ldr x10, [x0, #:gottprel_lo12:var] ldr x9, :gottprel:var -// CHECK-ARM64: adrp x11, :gottprel:var // encoding: [0x0b'A',A,A,0x90'A'] -// CHECK-ARM64: // fixup A - offset: 0, value: :gottprel:var, kind: fixup_arm64_pcrel_adrp_imm21 -// CHECK-ARM64: ldr x10, [x0, :gottprel_lo12:var] // encoding: [0x0a,0bAAAAAA00,0b01AAAAAA,0xf9] -// CHECK-ARM64: // fixup A - offset: 0, value: :gottprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale8 -// CHECK-ARM64: ldr x9, :gottprel:var // encoding: [0bAAA01001,A,A,0x58] -// CHECK-ARM64: // fixup A - offset: 0, value: :gottprel:var, kind: fixup_arm64_ldr_pcrel_imm19 +// CHECK: adrp x11, :gottprel:var // encoding: [0x0b'A',A,A,0x90'A'] +// CHECK: // fixup A - offset: 0, value: :gottprel:var, kind: fixup_aarch64_pcrel_adrp_imm21 +// CHECK: ldr x10, [x0, :gottprel_lo12:var] // encoding: [0x0a,0bAAAAAA00,0b01AAAAAA,0xf9] +// CHECK: // fixup A - offset: 0, value: :gottprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8 +// CHECK: ldr x9, :gottprel:var // encoding: [0bAAA01001,A,A,0x58] +// CHECK: // fixup A - offset: 0, value: :gottprel:var, kind: fixup_aarch64_ldr_pcrel_imm19 // CHECK-ELF-NEXT: 0x88 R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 [[VARSYM]] // CHECK-ELF-NEXT: 0x8C R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC [[VARSYM]] @@ -218,10 +218,10 @@ movz x3, #:tprel_g2:var movn x4, #:tprel_g2:var -// CHECK-ARM64: movz x3, #:tprel_g2:var // encoding: [0bAAA00011,A,0b110AAAAA,0x92] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_arm64_movw -// CHECK-ARM64: movn x4, #:tprel_g2:var // encoding: [0bAAA00100,A,0b110AAAAA,0x92] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_arm64_movw +// CHECK: movz x3, #:tprel_g2:var // encoding: [0bAAA00011,A,0b110AAAAA,0x92] +// CHECK: // fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_aarch64_movw +// CHECK: movn x4, #:tprel_g2:var // encoding: [0bAAA00100,A,0b110AAAAA,0x92] +// CHECK: // fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_aarch64_movw // CHECK-ELF-NEXT: 0x94 R_AARCH64_TLSLE_MOVW_TPREL_G2 [[VARSYM]] // CHECK-ELF-NEXT: 0x98 R_AARCH64_TLSLE_MOVW_TPREL_G2 [[VARSYM]] @@ -232,14 +232,14 @@ movz w7, #:tprel_g1:var movn w8, #:tprel_g1:var -// CHECK-ARM64: movz x5, #:tprel_g1:var // encoding: [0bAAA00101,A,0b101AAAAA,0x92] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_arm64_movw -// CHECK-ARM64: movn x6, #:tprel_g1:var // encoding: [0bAAA00110,A,0b101AAAAA,0x92] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_arm64_movw -// CHECK-ARM64: movz w7, #:tprel_g1:var // encoding: [0bAAA00111,A,0b101AAAAA,0x12] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_arm64_movw -// CHECK-ARM64: movn w8, #:tprel_g1:var // encoding: [0bAAA01000,A,0b101AAAAA,0x12] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_arm64_movw +// CHECK: movz x5, #:tprel_g1:var // encoding: [0bAAA00101,A,0b101AAAAA,0x92] +// CHECK: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw +// CHECK: movn x6, #:tprel_g1:var // encoding: [0bAAA00110,A,0b101AAAAA,0x92] +// CHECK: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw +// CHECK: movz w7, #:tprel_g1:var // encoding: [0bAAA00111,A,0b101AAAAA,0x12] +// CHECK: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw +// CHECK: movn w8, #:tprel_g1:var // encoding: [0bAAA01000,A,0b101AAAAA,0x12] +// CHECK: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw // CHECK-ELF-NEXT: 0x9C R_AARCH64_TLSLE_MOVW_TPREL_G1 [[VARSYM]] // CHECK-ELF-NEXT: 0xA0 R_AARCH64_TLSLE_MOVW_TPREL_G1 [[VARSYM]] @@ -250,10 +250,10 @@ movk x9, #:tprel_g1_nc:var movk w10, #:tprel_g1_nc:var -// CHECK-ARM64: movk x9, #:tprel_g1_nc:var // encoding: [0bAAA01001,A,0b101AAAAA,0xf2] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_arm64_movw -// CHECK-ARM64: movk w10, #:tprel_g1_nc:var // encoding: [0bAAA01010,A,0b101AAAAA,0x72] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_arm64_movw +// CHECK: movk x9, #:tprel_g1_nc:var // encoding: [0bAAA01001,A,0b101AAAAA,0xf2] +// CHECK: // fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_aarch64_movw +// CHECK: movk w10, #:tprel_g1_nc:var // encoding: [0bAAA01010,A,0b101AAAAA,0x72] +// CHECK: // fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_aarch64_movw // CHECK-ELF-NEXT: 0xAC R_AARCH64_TLSLE_MOVW_TPREL_G1_NC [[VARSYM]] // CHECK-ELF-NEXT: 0xB0 R_AARCH64_TLSLE_MOVW_TPREL_G1_NC [[VARSYM]] @@ -264,14 +264,14 @@ movz w13, #:tprel_g0:var movn w14, #:tprel_g0:var -// CHECK-ARM64: movz x11, #:tprel_g0:var // encoding: [0bAAA01011,A,0b100AAAAA,0x92] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_arm64_movw -// CHECK-ARM64: movn x12, #:tprel_g0:var // encoding: [0bAAA01100,A,0b100AAAAA,0x92] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_arm64_movw -// CHECK-ARM64: movz w13, #:tprel_g0:var // encoding: [0bAAA01101,A,0b100AAAAA,0x12] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_arm64_movw -// CHECK-ARM64: movn w14, #:tprel_g0:var // encoding: [0bAAA01110,A,0b100AAAAA,0x12] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_arm64_movw +// CHECK: movz x11, #:tprel_g0:var // encoding: [0bAAA01011,A,0b100AAAAA,0x92] +// CHECK: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw +// CHECK: movn x12, #:tprel_g0:var // encoding: [0bAAA01100,A,0b100AAAAA,0x92] +// CHECK: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw +// CHECK: movz w13, #:tprel_g0:var // encoding: [0bAAA01101,A,0b100AAAAA,0x12] +// CHECK: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw +// CHECK: movn w14, #:tprel_g0:var // encoding: [0bAAA01110,A,0b100AAAAA,0x12] +// CHECK: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw // CHECK-ELF-NEXT: 0xB4 R_AARCH64_TLSLE_MOVW_TPREL_G0 [[VARSYM]] // CHECK-ELF-NEXT: 0xB8 R_AARCH64_TLSLE_MOVW_TPREL_G0 [[VARSYM]] @@ -282,10 +282,10 @@ movk x15, #:tprel_g0_nc:var movk w16, #:tprel_g0_nc:var -// CHECK-ARM64: movk x15, #:tprel_g0_nc:var // encoding: [0bAAA01111,A,0b100AAAAA,0xf2] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_arm64_movw -// CHECK-ARM64: movk w16, #:tprel_g0_nc:var // encoding: [0bAAA10000,A,0b100AAAAA,0x72] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_arm64_movw +// CHECK: movk x15, #:tprel_g0_nc:var // encoding: [0bAAA01111,A,0b100AAAAA,0xf2] +// CHECK: // fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_aarch64_movw +// CHECK: movk w16, #:tprel_g0_nc:var // encoding: [0bAAA10000,A,0b100AAAAA,0x72] +// CHECK: // fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_aarch64_movw // CHECK-ELF-NEXT: 0xC4 R_AARCH64_TLSLE_MOVW_TPREL_G0_NC [[VARSYM]] // CHECK-ELF-NEXT: 0xC8 R_AARCH64_TLSLE_MOVW_TPREL_G0_NC [[VARSYM]] @@ -294,10 +294,10 @@ add x17, x18, #:tprel_hi12:var, lsl #12 add w19, w20, #:tprel_hi12:var, lsl #12 -// CHECK-ARM64: add x17, x18, :tprel_hi12:var, lsl #12 // encoding: [0x51,0bAAAAAA10,0b00AAAAAA,0x91] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_arm64_add_imm12 -// CHECK-ARM64: add w19, w20, :tprel_hi12:var, lsl #12 // encoding: [0x93,0bAAAAAA10,0b00AAAAAA,0x11] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_arm64_add_imm12 +// CHECK: add x17, x18, :tprel_hi12:var, lsl #12 // encoding: [0x51,0bAAAAAA10,0b00AAAAAA,0x91] +// CHECK: // fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_aarch64_add_imm12 +// CHECK: add w19, w20, :tprel_hi12:var, lsl #12 // encoding: [0x93,0bAAAAAA10,0b00AAAAAA,0x11] +// CHECK: // fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_aarch64_add_imm12 // CHECK-ELF-NEXT: 0xCC R_AARCH64_TLSLE_ADD_TPREL_HI12 [[VARSYM]] // CHECK-ELF-NEXT: 0xD0 R_AARCH64_TLSLE_ADD_TPREL_HI12 [[VARSYM]] @@ -306,10 +306,10 @@ add x21, x22, #:tprel_lo12:var add w23, w24, #:tprel_lo12:var -// CHECK-ARM64: add x21, x22, :tprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_add_imm12 -// CHECK-ARM64: add w23, w24, :tprel_lo12:var // encoding: [0x17,0bAAAAAA11,0b00AAAAAA,0x11] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_add_imm12 +// CHECK: add x21, x22, :tprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91] +// CHECK: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_add_imm12 +// CHECK: add w23, w24, :tprel_lo12:var // encoding: [0x17,0bAAAAAA11,0b00AAAAAA,0x11] +// CHECK: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_add_imm12 // CHECK-ELF-NEXT: 0xD4 R_AARCH64_TLSLE_ADD_TPREL_LO12 [[VARSYM]] // CHECK-ELF-NEXT: 0xD8 R_AARCH64_TLSLE_ADD_TPREL_LO12 [[VARSYM]] @@ -318,10 +318,10 @@ add x25, x26, #:tprel_lo12_nc:var add w27, w28, #:tprel_lo12_nc:var -// CHECK-ARM64: add x25, x26, :tprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_add_imm12 -// CHECK-ARM64: add w27, w28, :tprel_lo12_nc:var // encoding: [0x9b,0bAAAAAA11,0b00AAAAAA,0x11] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_add_imm12 +// CHECK: add x25, x26, :tprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91] +// CHECK: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_add_imm12 +// CHECK: add w27, w28, :tprel_lo12_nc:var // encoding: [0x9b,0bAAAAAA11,0b00AAAAAA,0x11] +// CHECK: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_add_imm12 // CHECK-ELF-NEXT: 0xDC R_AARCH64_TLSLE_ADD_TPREL_LO12_NC [[VARSYM]] // CHECK-ELF-NEXT: 0xE0 R_AARCH64_TLSLE_ADD_TPREL_LO12_NC [[VARSYM]] @@ -330,10 +330,10 @@ ldrb w29, [x30, #:tprel_lo12:var] ldrsb x29, [x28, #:tprel_lo12_nc:var] -// CHECK-ARM64: ldrb w29, [x30, :tprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale1 -// CHECK-ARM64: ldrsb x29, [x28, :tprel_lo12_nc:var] // encoding: [0x9d,0bAAAAAA11,0b10AAAAAA,0x39] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale1 +// CHECK: ldrb w29, [x30, :tprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39] +// CHECK: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale1 +// CHECK: ldrsb x29, [x28, :tprel_lo12_nc:var] // encoding: [0x9d,0bAAAAAA11,0b10AAAAAA,0x39] +// CHECK: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale1 // CHECK-ELF-NEXT: 0xE4 R_AARCH64_TLSLE_LDST8_TPREL_LO12 [[VARSYM]] // CHECK-ELF-NEXT: 0xE8 R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC [[VARSYM]] @@ -342,10 +342,10 @@ strh w27, [x26, #:tprel_lo12:var] ldrsh x25, [x24, #:tprel_lo12_nc:var] -// CHECK-ARM64: strh w27, [x26, :tprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale2 -// CHECK-ARM64: ldrsh x25, [x24, :tprel_lo12_nc:var] // encoding: [0x19,0bAAAAAA11,0b10AAAAAA,0x79] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale2 +// CHECK: strh w27, [x26, :tprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79] +// CHECK: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale2 +// CHECK: ldrsh x25, [x24, :tprel_lo12_nc:var] // encoding: [0x19,0bAAAAAA11,0b10AAAAAA,0x79] +// CHECK: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale2 // CHECK-ELF-NEXT: 0xEC R_AARCH64_TLSLE_LDST16_TPREL_LO12 [[VARSYM]] // CHECK-ELF-NEXT: 0xF0 R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC [[VARSYM]] @@ -354,10 +354,10 @@ ldr w23, [x22, #:tprel_lo12:var] ldrsw x21, [x20, #:tprel_lo12_nc:var] -// CHECK-ARM64: ldr w23, [x22, :tprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale4 -// CHECK-ARM64: ldrsw x21, [x20, :tprel_lo12_nc:var] // encoding: [0x95,0bAAAAAA10,0b10AAAAAA,0xb9] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale4 +// CHECK: ldr w23, [x22, :tprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9] +// CHECK: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale4 +// CHECK: ldrsw x21, [x20, :tprel_lo12_nc:var] // encoding: [0x95,0bAAAAAA10,0b10AAAAAA,0xb9] +// CHECK: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale4 // CHECK-ELF-NEXT: 0xF4 R_AARCH64_TLSLE_LDST32_TPREL_LO12 [[VARSYM]] // CHECK-ELF-NEXT: 0xF8 R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC [[VARSYM]] @@ -365,10 +365,10 @@ ldr x19, [x18, #:tprel_lo12:var] str x17, [x16, #:tprel_lo12_nc:var] -// CHECK-ARM64: ldr x19, [x18, :tprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale8 -// CHECK-ARM64: str x17, [x16, :tprel_lo12_nc:var] // encoding: [0x11,0bAAAAAA10,0b00AAAAAA,0xf9] -// CHECK-ARM64: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale8 +// CHECK: ldr x19, [x18, :tprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9] +// CHECK: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8 +// CHECK: str x17, [x16, :tprel_lo12_nc:var] // encoding: [0x11,0bAAAAAA10,0b00AAAAAA,0xf9] +// CHECK: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale8 // CHECK-ELF-NEXT: 0xFC R_AARCH64_TLSLE_LDST64_TPREL_LO12 [[VARSYM]] // CHECK-ELF-NEXT: 0x100 R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC [[VARSYM]] @@ -381,15 +381,15 @@ blr x3 -// CHECK-ARM64: adrp x8, :tlsdesc:var // encoding: [0x08'A',A,A,0x90'A'] -// CHECK-ARM64: // fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_arm64_pcrel_adrp_imm21 -// CHECK-ARM64: ldr x7, [x6, :tlsdesc_lo12:var] // encoding: [0xc7,0bAAAAAA00,0b01AAAAAA,0xf9] -// CHECK-ARM64: // fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_arm64_ldst_imm12_scale8 -// CHECK-ARM64: add x5, x4, :tlsdesc_lo12:var // encoding: [0x85,0bAAAAAA00,0b00AAAAAA,0x91] -// CHECK-ARM64: // fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_arm64_add_imm12 -// CHECK-ARM64: .tlsdesccall var // encoding: [] -// CHECK-ARM64: // fixup A - offset: 0, value: var, kind: fixup_arm64_tlsdesc_call -// CHECK-ARM64: blr x3 // encoding: [0x60,0x00,0x3f,0xd6] +// CHECK: adrp x8, :tlsdesc:var // encoding: [0x08'A',A,A,0x90'A'] +// CHECK: // fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_aarch64_pcrel_adrp_imm21 +// CHECK: ldr x7, [x6, :tlsdesc_lo12:var] // encoding: [0xc7,0bAAAAAA00,0b01AAAAAA,0xf9] +// CHECK: // fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8 +// CHECK: add x5, x4, :tlsdesc_lo12:var // encoding: [0x85,0bAAAAAA00,0b00AAAAAA,0x91] +// CHECK: // fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_aarch64_add_imm12 +// CHECK: .tlsdesccall var // encoding: [] +// CHECK: // fixup A - offset: 0, value: var, kind: fixup_aarch64_tlsdesc_call +// CHECK: blr x3 // encoding: [0x60,0x00,0x3f,0xd6] // CHECK-ELF-NEXT: 0x104 R_AARCH64_TLSDESC_ADR_PAGE [[VARSYM]] // CHECK-ELF-NEXT: 0x108 R_AARCH64_TLSDESC_LD64_LO12_NC [[VARSYM]] diff --git a/test/MC/AArch64/trace-regs-diagnostics.s b/test/MC/AArch64/trace-regs-diagnostics.s index fa57817dd38a..41331e7703c8 100644 --- a/test/MC/AArch64/trace-regs-diagnostics.s +++ b/test/MC/AArch64/trace-regs-diagnostics.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -triple arm64-none-linux-gnu < %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2>&1 | FileCheck %s // Write-only mrs x12, trcoslar mrs x10, trclar diff --git a/test/MC/AArch64/trace-regs.s b/test/MC/AArch64/trace-regs.s index be25f08947b6..92f16cd54f31 100644 --- a/test/MC/AArch64/trace-regs.s +++ b/test/MC/AArch64/trace-regs.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=arm64-none-linux-gnu -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple=aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s mrs x8, trcstatr mrs x9, trcidr8 diff --git a/test/MC/ARM64/lit.local.cfg b/test/MC/ARM64/lit.local.cfg deleted file mode 100644 index 4d6d8826b531..000000000000 --- a/test/MC/ARM64/lit.local.cfg +++ /dev/null @@ -1,6 +0,0 @@ -config.suffixes = ['.ll', '.s'] - -targets = set(config.root.targets_to_build.split()) -if not 'ARM64' in targets: - config.unsupported = True - diff --git a/test/MC/Disassembler/ARM64/advsimd.txt b/test/MC/Disassembler/AArch64/arm64-advsimd.txt similarity index 100% rename from test/MC/Disassembler/ARM64/advsimd.txt rename to test/MC/Disassembler/AArch64/arm64-advsimd.txt diff --git a/test/MC/Disassembler/ARM64/arithmetic.txt b/test/MC/Disassembler/AArch64/arm64-arithmetic.txt similarity index 100% rename from test/MC/Disassembler/ARM64/arithmetic.txt rename to test/MC/Disassembler/AArch64/arm64-arithmetic.txt diff --git a/test/MC/Disassembler/ARM64/basic-a64-undefined.txt b/test/MC/Disassembler/AArch64/arm64-basic-a64-undefined.txt similarity index 100% rename from test/MC/Disassembler/ARM64/basic-a64-undefined.txt rename to test/MC/Disassembler/AArch64/arm64-basic-a64-undefined.txt diff --git a/test/MC/Disassembler/ARM64/bitfield.txt b/test/MC/Disassembler/AArch64/arm64-bitfield.txt similarity index 100% rename from test/MC/Disassembler/ARM64/bitfield.txt rename to test/MC/Disassembler/AArch64/arm64-bitfield.txt diff --git a/test/MC/Disassembler/ARM64/branch.txt b/test/MC/Disassembler/AArch64/arm64-branch.txt similarity index 100% rename from test/MC/Disassembler/ARM64/branch.txt rename to test/MC/Disassembler/AArch64/arm64-branch.txt diff --git a/test/MC/Disassembler/ARM64/canonical-form.txt b/test/MC/Disassembler/AArch64/arm64-canonical-form.txt similarity index 100% rename from test/MC/Disassembler/ARM64/canonical-form.txt rename to test/MC/Disassembler/AArch64/arm64-canonical-form.txt diff --git a/test/MC/Disassembler/ARM64/crc32.txt b/test/MC/Disassembler/AArch64/arm64-crc32.txt similarity index 100% rename from test/MC/Disassembler/ARM64/crc32.txt rename to test/MC/Disassembler/AArch64/arm64-crc32.txt diff --git a/test/MC/Disassembler/ARM64/crypto.txt b/test/MC/Disassembler/AArch64/arm64-crypto.txt similarity index 100% rename from test/MC/Disassembler/ARM64/crypto.txt rename to test/MC/Disassembler/AArch64/arm64-crypto.txt diff --git a/test/MC/Disassembler/ARM64/invalid-logical.txt b/test/MC/Disassembler/AArch64/arm64-invalid-logical.txt similarity index 100% rename from test/MC/Disassembler/ARM64/invalid-logical.txt rename to test/MC/Disassembler/AArch64/arm64-invalid-logical.txt diff --git a/test/MC/Disassembler/ARM64/logical.txt b/test/MC/Disassembler/AArch64/arm64-logical.txt similarity index 100% rename from test/MC/Disassembler/ARM64/logical.txt rename to test/MC/Disassembler/AArch64/arm64-logical.txt diff --git a/test/MC/Disassembler/ARM64/memory.txt b/test/MC/Disassembler/AArch64/arm64-memory.txt similarity index 100% rename from test/MC/Disassembler/ARM64/memory.txt rename to test/MC/Disassembler/AArch64/arm64-memory.txt diff --git a/test/MC/Disassembler/ARM64/non-apple-fmov.txt b/test/MC/Disassembler/AArch64/arm64-non-apple-fmov.txt similarity index 100% rename from test/MC/Disassembler/ARM64/non-apple-fmov.txt rename to test/MC/Disassembler/AArch64/arm64-non-apple-fmov.txt diff --git a/test/MC/Disassembler/ARM64/scalar-fp.txt b/test/MC/Disassembler/AArch64/arm64-scalar-fp.txt similarity index 100% rename from test/MC/Disassembler/ARM64/scalar-fp.txt rename to test/MC/Disassembler/AArch64/arm64-scalar-fp.txt diff --git a/test/MC/Disassembler/ARM64/system.txt b/test/MC/Disassembler/AArch64/arm64-system.txt similarity index 100% rename from test/MC/Disassembler/ARM64/system.txt rename to test/MC/Disassembler/AArch64/arm64-system.txt diff --git a/test/MC/Disassembler/AArch64/lit.local.cfg b/test/MC/Disassembler/AArch64/lit.local.cfg index 653697414894..2c423d139bb0 100644 --- a/test/MC/Disassembler/AArch64/lit.local.cfg +++ b/test/MC/Disassembler/AArch64/lit.local.cfg @@ -1,4 +1,4 @@ targets = set(config.root.targets_to_build.split()) -if 'ARM64' not in targets: +if 'AArch64' not in targets: config.unsupported = True diff --git a/test/MC/Disassembler/ARM64/lit.local.cfg b/test/MC/Disassembler/ARM64/lit.local.cfg deleted file mode 100644 index 46a946845e1c..000000000000 --- a/test/MC/Disassembler/ARM64/lit.local.cfg +++ /dev/null @@ -1,5 +0,0 @@ -config.suffixes = ['.txt'] - -targets = set(config.root.targets_to_build.split()) -if not 'ARM64' in targets: - config.unsupported = True diff --git a/test/MC/MachO/ARM64/darwin-ARM64-local-label-diff.s b/test/MC/MachO/AArch64/darwin-ARM64-local-label-diff.s similarity index 100% rename from test/MC/MachO/ARM64/darwin-ARM64-local-label-diff.s rename to test/MC/MachO/AArch64/darwin-ARM64-local-label-diff.s diff --git a/test/MC/MachO/ARM64/darwin-ARM64-reloc.s b/test/MC/MachO/AArch64/darwin-ARM64-reloc.s similarity index 100% rename from test/MC/MachO/ARM64/darwin-ARM64-reloc.s rename to test/MC/MachO/AArch64/darwin-ARM64-reloc.s diff --git a/test/Transforms/GlobalMerge/ARM64/lit.local.cfg b/test/MC/MachO/AArch64/lit.local.cfg similarity index 74% rename from test/Transforms/GlobalMerge/ARM64/lit.local.cfg rename to test/MC/MachO/AArch64/lit.local.cfg index a75a42b6f74c..9a66a00189ea 100644 --- a/test/Transforms/GlobalMerge/ARM64/lit.local.cfg +++ b/test/MC/MachO/AArch64/lit.local.cfg @@ -1,4 +1,4 @@ targets = set(config.root.targets_to_build.split()) -if not 'ARM64' in targets: +if not 'AArch64' in targets: config.unsupported = True diff --git a/test/Transforms/ConstantHoisting/ARM64/const-addr.ll b/test/Transforms/ConstantHoisting/AArch64/const-addr.ll similarity index 100% rename from test/Transforms/ConstantHoisting/ARM64/const-addr.ll rename to test/Transforms/ConstantHoisting/AArch64/const-addr.ll diff --git a/test/Transforms/ConstantHoisting/ARM64/large-immediate.ll b/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll similarity index 100% rename from test/Transforms/ConstantHoisting/ARM64/large-immediate.ll rename to test/Transforms/ConstantHoisting/AArch64/large-immediate.ll diff --git a/test/Transforms/ConstantHoisting/ARM64/lit.local.cfg b/test/Transforms/ConstantHoisting/AArch64/lit.local.cfg similarity index 73% rename from test/Transforms/ConstantHoisting/ARM64/lit.local.cfg rename to test/Transforms/ConstantHoisting/AArch64/lit.local.cfg index 84ac9811f012..c42034979fcf 100644 --- a/test/Transforms/ConstantHoisting/ARM64/lit.local.cfg +++ b/test/Transforms/ConstantHoisting/AArch64/lit.local.cfg @@ -1,3 +1,3 @@ targets = set(config.root.targets_to_build.split()) -if not 'ARM64' in targets: +if not 'AArch64' in targets: config.unsupported = True diff --git a/test/Transforms/GlobalMerge/ARM64/arm64.ll b/test/Transforms/GlobalMerge/AArch64/arm64.ll similarity index 100% rename from test/Transforms/GlobalMerge/ARM64/arm64.ll rename to test/Transforms/GlobalMerge/AArch64/arm64.ll diff --git a/test/DebugInfo/ARM64/lit.local.cfg b/test/Transforms/GlobalMerge/AArch64/lit.local.cfg similarity index 74% rename from test/DebugInfo/ARM64/lit.local.cfg rename to test/Transforms/GlobalMerge/AArch64/lit.local.cfg index a75a42b6f74c..9a66a00189ea 100644 --- a/test/DebugInfo/ARM64/lit.local.cfg +++ b/test/Transforms/GlobalMerge/AArch64/lit.local.cfg @@ -1,4 +1,4 @@ targets = set(config.root.targets_to_build.split()) -if not 'ARM64' in targets: +if not 'AArch64' in targets: config.unsupported = True diff --git a/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll b/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll index 1883a8fc8e6e..39408a2d394c 100644 --- a/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll +++ b/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll @@ -68,7 +68,7 @@ declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind rea define <4 x i32> @mulByZeroARM64(<4 x i16> %x) nounwind readnone ssp { entry: - %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind + %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind ret <4 x i32> %a ; CHECK: entry: ; CHECK-NEXT: ret <4 x i32> zeroinitializer @@ -76,7 +76,7 @@ entry: define <4 x i32> @mulByOneARM64(<4 x i16> %x) nounwind readnone ssp { entry: - %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> ) nounwind + %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> ) nounwind ret <4 x i32> %a ; CHECK: entry: ; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32> @@ -85,7 +85,7 @@ entry: define <4 x i32> @constantMulARM64() nounwind readnone ssp { entry: - %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> , <4 x i16> ) nounwind + %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> , <4 x i16> ) nounwind ret <4 x i32> %a ; CHECK: entry: ; CHECK-NEXT: ret <4 x i32> @@ -93,7 +93,7 @@ entry: define <4 x i32> @constantMulSARM64() nounwind readnone ssp { entry: - %b = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> , <4 x i16> ) nounwind + %b = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> , <4 x i16> ) nounwind ret <4 x i32> %b ; CHECK: entry: ; CHECK-NEXT: ret <4 x i32> @@ -101,7 +101,7 @@ entry: define <4 x i32> @constantMulUARM64() nounwind readnone ssp { entry: - %b = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> , <4 x i16> ) nounwind + %b = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> , <4 x i16> ) nounwind ret <4 x i32> %b ; CHECK: entry: ; CHECK-NEXT: ret <4 x i32> @@ -109,17 +109,17 @@ entry: define <4 x i32> @complex1ARM64(<4 x i16> %x) nounwind readnone ssp { entry: - %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> , <4 x i16> %x) nounwind + %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> , <4 x i16> %x) nounwind %b = add <4 x i32> zeroinitializer, %a ret <4 x i32> %b ; CHECK: entry: -; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> , <4 x i16> %x) [[NUW:#[0-9]+]] +; CHECK-NEXT: %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> , <4 x i16> %x) [[NUW:#[0-9]+]] ; CHECK-NEXT: ret <4 x i32> %a } define <4 x i32> @complex2ARM64(<4 x i32> %x) nounwind readnone ssp { entry: - %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> , <4 x i16> ) nounwind + %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> , <4 x i16> ) nounwind %b = add <4 x i32> %x, %a ret <4 x i32> %b ; CHECK: entry: @@ -127,8 +127,8 @@ entry: ; CHECK-NEXT: ret <4 x i32> %b } -declare <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone ; CHECK: attributes #0 = { nounwind readnone ssp } ; CHECK: attributes #1 = { nounwind readnone } diff --git a/test/Transforms/LoopStrengthReduce/ARM64/lit.local.cfg b/test/Transforms/LoopStrengthReduce/AArch64/lit.local.cfg similarity index 78% rename from test/Transforms/LoopStrengthReduce/ARM64/lit.local.cfg rename to test/Transforms/LoopStrengthReduce/AArch64/lit.local.cfg index a49957999f0a..6642d2870680 100644 --- a/test/Transforms/LoopStrengthReduce/ARM64/lit.local.cfg +++ b/test/Transforms/LoopStrengthReduce/AArch64/lit.local.cfg @@ -1,5 +1,5 @@ config.suffixes = ['.ll'] targets = set(config.root.targets_to_build.split()) -if not 'ARM64' in targets: +if not 'AArch64' in targets: config.unsupported = True diff --git a/test/Transforms/LoopStrengthReduce/ARM64/lsr-memcpy.ll b/test/Transforms/LoopStrengthReduce/AArch64/lsr-memcpy.ll similarity index 100% rename from test/Transforms/LoopStrengthReduce/ARM64/lsr-memcpy.ll rename to test/Transforms/LoopStrengthReduce/AArch64/lsr-memcpy.ll diff --git a/test/Transforms/LoopStrengthReduce/ARM64/lsr-memset.ll b/test/Transforms/LoopStrengthReduce/AArch64/lsr-memset.ll similarity index 100% rename from test/Transforms/LoopStrengthReduce/ARM64/lsr-memset.ll rename to test/Transforms/LoopStrengthReduce/AArch64/lsr-memset.ll diff --git a/test/Transforms/LoopStrengthReduce/ARM64/req-regs.ll b/test/Transforms/LoopStrengthReduce/AArch64/req-regs.ll similarity index 100% rename from test/Transforms/LoopStrengthReduce/ARM64/req-regs.ll rename to test/Transforms/LoopStrengthReduce/AArch64/req-regs.ll diff --git a/test/Transforms/LoopVectorize/ARM64/arm64-unroll.ll b/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll similarity index 100% rename from test/Transforms/LoopVectorize/ARM64/arm64-unroll.ll rename to test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll diff --git a/test/Transforms/LoopVectorize/ARM64/gather-cost.ll b/test/Transforms/LoopVectorize/AArch64/gather-cost.ll similarity index 100% rename from test/Transforms/LoopVectorize/ARM64/gather-cost.ll rename to test/Transforms/LoopVectorize/AArch64/gather-cost.ll diff --git a/test/Transforms/LoopVectorize/ARM64/lit.local.cfg b/test/Transforms/LoopVectorize/ARM64/lit.local.cfg deleted file mode 100644 index f1d1f88cf399..000000000000 --- a/test/Transforms/LoopVectorize/ARM64/lit.local.cfg +++ /dev/null @@ -1,6 +0,0 @@ -config.suffixes = ['.ll'] - -targets = set(config.root.targets_to_build.split()) -if not 'ARM64' in targets: - config.unsupported = True - diff --git a/test/MC/MachO/ARM64/lit.local.cfg b/test/Transforms/SLPVectorizer/AArch64/lit.local.cfg similarity index 73% rename from test/MC/MachO/ARM64/lit.local.cfg rename to test/Transforms/SLPVectorizer/AArch64/lit.local.cfg index a75a42b6f74c..c42034979fcf 100644 --- a/test/MC/MachO/ARM64/lit.local.cfg +++ b/test/Transforms/SLPVectorizer/AArch64/lit.local.cfg @@ -1,4 +1,3 @@ targets = set(config.root.targets_to_build.split()) -if not 'ARM64' in targets: +if not 'AArch64' in targets: config.unsupported = True - diff --git a/test/Transforms/SLPVectorizer/ARM64/mismatched-intrinsics.ll b/test/Transforms/SLPVectorizer/AArch64/mismatched-intrinsics.ll similarity index 100% rename from test/Transforms/SLPVectorizer/ARM64/mismatched-intrinsics.ll rename to test/Transforms/SLPVectorizer/AArch64/mismatched-intrinsics.ll diff --git a/test/Transforms/SLPVectorizer/ARM64/lit.local.cfg b/test/Transforms/SLPVectorizer/ARM64/lit.local.cfg deleted file mode 100644 index 84ac9811f012..000000000000 --- a/test/Transforms/SLPVectorizer/ARM64/lit.local.cfg +++ /dev/null @@ -1,3 +0,0 @@ -targets = set(config.root.targets_to_build.split()) -if not 'ARM64' in targets: - config.unsupported = True From 9d934aca341a87abdbc80098c7197ba180d01d01 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 24 May 2014 13:13:17 +0000 Subject: [PATCH 134/906] CodeGen: Make MachineBasicBlock::back skip to the beginning of the last bundle. This makes front/back symmetric with begin/end, avoiding some confusion. Added instr_front/instr_back for the old behavior, corresponding to instr_begin/instr_end. Audited all three in-tree users of back(), all of them look like they don't want to look inside bundles. Fixes an assertion (PR19815) when generating debug info on mips, where a delay slot was bundled at the end of a branch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209580 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/MachineBasicBlock.h | 13 ++-- lib/CodeGen/MachineVerifier.cpp | 17 +++--- test/DebugInfo/Mips/delay-slot.ll | 75 ++++++++++++++++++++++++ test/DebugInfo/Mips/lit.local.cfg | 3 + 4 files changed, 95 insertions(+), 13 deletions(-) create mode 100644 test/DebugInfo/Mips/delay-slot.ll create mode 100644 test/DebugInfo/Mips/lit.local.cfg diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h index 8709d86fe443..90bdeee46d26 100644 --- a/include/llvm/CodeGen/MachineBasicBlock.h +++ b/include/llvm/CodeGen/MachineBasicBlock.h @@ -219,10 +219,15 @@ class MachineBasicBlock : public ilist_node { unsigned size() const { return (unsigned)Insts.size(); } bool empty() const { return Insts.empty(); } - MachineInstr& front() { return Insts.front(); } - MachineInstr& back() { return Insts.back(); } - const MachineInstr& front() const { return Insts.front(); } - const MachineInstr& back() const { return Insts.back(); } + MachineInstr &instr_front() { return Insts.front(); } + MachineInstr &instr_back() { return Insts.back(); } + const MachineInstr &instr_front() const { return Insts.front(); } + const MachineInstr &instr_back() const { return Insts.back(); } + + MachineInstr &front() { return Insts.front(); } + MachineInstr &back() { return *--end(); } + const MachineInstr &front() const { return Insts.front(); } + const MachineInstr &back() const { return *--end(); } instr_iterator instr_begin() { return Insts.begin(); } const_instr_iterator instr_begin() const { return Insts.begin(); } diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 96cf719184be..665290070273 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -33,7 +33,6 @@ #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/BasicBlock.h" @@ -578,8 +577,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB exits via unconditional fall-through but its successor " "differs from its CFG successor!", MBB); } - if (!MBB->empty() && getBundleStart(&MBB->back())->isBarrier() && - !TII->isPredicated(getBundleStart(&MBB->back()))) { + if (!MBB->empty() && (&MBB->back())->isBarrier() && + !TII->isPredicated((&MBB->back()))) { report("MBB exits via unconditional fall-through but ends with a " "barrier instruction!", MBB); } @@ -599,10 +598,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via unconditional branch but doesn't contain " "any instructions!", MBB); - } else if (!getBundleStart(&MBB->back())->isBarrier()) { + } else if (!(&MBB->back())->isBarrier()) { report("MBB exits via unconditional branch but doesn't end with a " "barrier instruction!", MBB); - } else if (!getBundleStart(&MBB->back())->isTerminator()) { + } else if (!(&MBB->back())->isTerminator()) { report("MBB exits via unconditional branch but the branch isn't a " "terminator instruction!", MBB); } @@ -630,10 +629,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via conditional branch/fall-through but doesn't " "contain any instructions!", MBB); - } else if (getBundleStart(&MBB->back())->isBarrier()) { + } else if ((&MBB->back())->isBarrier()) { report("MBB exits via conditional branch/fall-through but ends with a " "barrier instruction!", MBB); - } else if (!getBundleStart(&MBB->back())->isTerminator()) { + } else if (!(&MBB->back())->isTerminator()) { report("MBB exits via conditional branch/fall-through but the branch " "isn't a terminator instruction!", MBB); } @@ -658,10 +657,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via conditional branch/branch but doesn't " "contain any instructions!", MBB); - } else if (!getBundleStart(&MBB->back())->isBarrier()) { + } else if (!MBB->back().isBarrier()) { report("MBB exits via conditional branch/branch but doesn't end with a " "barrier instruction!", MBB); - } else if (!getBundleStart(&MBB->back())->isTerminator()) { + } else if (!MBB->back().isTerminator()) { report("MBB exits via conditional branch/branch but the branch " "isn't a terminator instruction!", MBB); } diff --git a/test/DebugInfo/Mips/delay-slot.ll b/test/DebugInfo/Mips/delay-slot.ll new file mode 100644 index 000000000000..9bce4ba6c9d8 --- /dev/null +++ b/test/DebugInfo/Mips/delay-slot.ll @@ -0,0 +1,75 @@ +; RUN: llc -filetype=obj -O0 < %s -mtriple mips-unknown-linux-gnu | llvm-dwarfdump - | FileCheck %s +; PR19815 + +; Generated using clang -target mips-linux-gnu -g test.c -S -o - -flto|opt -sroa -S +; test.c: +; +; int foo(int x) { +; if (x) +; return 0; +; return 1; +; } + +; CHECK: Address Line Column File ISA Discriminator Flags +; CHECK: ------------------ ------ ------ ------ --- ------------- ------------- +; CHECK: 0x0000000000000000 1 0 1 0 0 is_stmt +; CHECK: 0x0000000000000000 1 0 1 0 0 is_stmt prologue_end +; CHECK: 0x0000000000000008 2 0 1 0 0 is_stmt +; CHECK: 0x0000000000000020 3 0 1 0 0 is_stmt +; CHECK: 0x0000000000000030 4 0 1 0 0 is_stmt +; CHECK: 0x0000000000000040 5 0 1 0 0 is_stmt +; CHECK: 0x0000000000000050 5 0 1 0 0 is_stmt end_sequence + +target datalayout = "E-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64" +target triple = "mips--linux-gnu" + +; Function Attrs: nounwind +define i32 @foo(i32 %x) #0 { +entry: + call void @llvm.dbg.value(metadata !{i32 %x}, i64 0, metadata !12), !dbg !13 + %tobool = icmp ne i32 %x, 0, !dbg !14 + br i1 %tobool, label %if.then, label %if.end, !dbg !14 + +if.then: ; preds = %entry + br label %return, !dbg !16 + +if.end: ; preds = %entry + br label %return, !dbg !17 + +return: ; preds = %if.end, %if.then + %retval.0 = phi i32 [ 0, %if.then ], [ 1, %if.end ] + ret i32 %retval.0, !dbg !18 +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata) #1 + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.value(metadata, i64, metadata) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!9, !10} +!llvm.ident = !{!11} + +!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/test.c] [DW_LANG_C99] +!1 = metadata !{metadata !"test.c", metadata !"/tmp"} +!2 = metadata !{} +!3 = metadata !{metadata !4} +!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo] +!5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [/tmp/test.c] +!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = metadata !{metadata !8, metadata !8} +!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} +!10 = metadata !{i32 2, metadata !"Debug Info Version", i32 1} +!11 = metadata !{metadata !"clang version 3.5.0"} +!12 = metadata !{i32 786689, metadata !4, metadata !"x", metadata !5, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [x] [line 1] +!13 = metadata !{i32 1, i32 0, metadata !4, null} +!14 = metadata !{i32 2, i32 0, metadata !15, null} +!15 = metadata !{i32 786443, metadata !1, metadata !4, i32 2, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp/test.c] +!16 = metadata !{i32 3, i32 0, metadata !15, null} +!17 = metadata !{i32 4, i32 0, metadata !4, null} +!18 = metadata !{i32 5, i32 0, metadata !4, null} diff --git a/test/DebugInfo/Mips/lit.local.cfg b/test/DebugInfo/Mips/lit.local.cfg new file mode 100644 index 000000000000..88262fb1d323 --- /dev/null +++ b/test/DebugInfo/Mips/lit.local.cfg @@ -0,0 +1,3 @@ +targets = set(config.root.targets_to_build.split()) +if not 'Mips' in targets: + config.unsupported = True From c734d38c7cfc98f50ffd4408bc4fba1d7d743c3b Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 24 May 2014 13:31:10 +0000 Subject: [PATCH 135/906] MachineVerifier: Clean up some syntactic weirdness left behind by find&replace. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209581 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineVerifier.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 665290070273..8515b0f456d8 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -577,8 +577,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB exits via unconditional fall-through but its successor " "differs from its CFG successor!", MBB); } - if (!MBB->empty() && (&MBB->back())->isBarrier() && - !TII->isPredicated((&MBB->back()))) { + if (!MBB->empty() && MBB->back().isBarrier() && + !TII->isPredicated(&MBB->back())) { report("MBB exits via unconditional fall-through but ends with a " "barrier instruction!", MBB); } @@ -598,10 +598,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via unconditional branch but doesn't contain " "any instructions!", MBB); - } else if (!(&MBB->back())->isBarrier()) { + } else if (!MBB->back().isBarrier()) { report("MBB exits via unconditional branch but doesn't end with a " "barrier instruction!", MBB); - } else if (!(&MBB->back())->isTerminator()) { + } else if (!MBB->back().isTerminator()) { report("MBB exits via unconditional branch but the branch isn't a " "terminator instruction!", MBB); } @@ -629,10 +629,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via conditional branch/fall-through but doesn't " "contain any instructions!", MBB); - } else if ((&MBB->back())->isBarrier()) { + } else if (MBB->back().isBarrier()) { report("MBB exits via conditional branch/fall-through but ends with a " "barrier instruction!", MBB); - } else if (!(&MBB->back())->isTerminator()) { + } else if (!MBB->back().isTerminator()) { report("MBB exits via conditional branch/fall-through but the branch " "isn't a terminator instruction!", MBB); } From 0fc41252ba5efc2ee5ed52bcbd9bd29561714220 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Sat, 24 May 2014 19:45:41 +0000 Subject: [PATCH 136/906] AArch64: disable FastISel for large code model. The code emitted is what would be expected for the small model, so it shouldn't be used when objects can be the full 64-bits away. This fixes MCJIT tests on Linux. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209585 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64FastISel.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 58178b1a48bb..8cc0f8a27355 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -247,6 +247,11 @@ unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) { if (const GlobalAlias *GA = dyn_cast(GV)) TLSGV = GA->getAliasee(); + // MachO still uses GOT for large code-model accesses, but ELF requires + // movz/movk sequences, which FastISel doesn't handle yet. + if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO()) + return 0; + if (const GlobalVariable *GVar = dyn_cast(TLSGV)) if (GVar->isThreadLocal()) return 0; From a054d531d727d8408ad0b718e6d23898f5678c06 Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Sat, 24 May 2014 19:54:28 +0000 Subject: [PATCH 137/906] llvm-readobj: remove some dead code git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209586 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-readobj/COFFDumper.cpp | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp index 26ac181a0be1..188e9f891339 100644 --- a/tools/llvm-readobj/COFFDumper.cpp +++ b/tools/llvm-readobj/COFFDumper.cpp @@ -80,12 +80,6 @@ class COFFDumper : public ObjDumper { void cacheRelocations(); - error_code getSectionContents( - const std::vector &Rels, - uint64_t Offset, - ArrayRef &Contents, - uint64_t &Addr); - error_code getSection( const std::vector &Rels, uint64_t Offset, @@ -96,7 +90,6 @@ class COFFDumper : public ObjDumper { const llvm::object::COFFObjectFile *Obj; RelocMapTy RelocMap; - std::vector EmptyRelocs; }; } // namespace @@ -467,27 +460,6 @@ static std::string formatSymbol(const std::vector &Rels, return Str.str(); } -// Given a vector of relocations for a section and an offset into this section -// the function resolves the symbol used for the relocation at the offset and -// returns the section content and the address inside the content pointed to -// by the symbol. -error_code COFFDumper::getSectionContents( - const std::vector &Rels, uint64_t Offset, - ArrayRef &Contents, uint64_t &Addr) { - - SymbolRef Sym; - const coff_section *Section; - - if (error_code EC = resolveSymbol(Rels, Offset, Sym)) - return EC; - if (error_code EC = resolveSectionAndAddress(Obj, Sym, Section, Addr)) - return EC; - if (error_code EC = Obj->getSectionContents(Section, Contents)) - return EC; - - return object_error::success; -} - error_code COFFDumper::getSection( const std::vector &Rels, uint64_t Offset, const coff_section **SectionPtr, uint64_t *AddrPtr) { From 11d505b678fc1f6229184b2709c44d2bc51ee51a Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Sat, 24 May 2014 20:04:21 +0000 Subject: [PATCH 138/906] build: sort llvm-readobj sources Sort the source files. NFC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209587 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-readobj/CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/llvm-readobj/CMakeLists.txt b/tools/llvm-readobj/CMakeLists.txt index 036185d48171..deef7a1629e5 100644 --- a/tools/llvm-readobj/CMakeLists.txt +++ b/tools/llvm-readobj/CMakeLists.txt @@ -5,12 +5,12 @@ set(LLVM_LINK_COMPONENTS ) add_llvm_tool(llvm-readobj - llvm-readobj.cpp - ObjDumper.cpp + ARMAttributeParser.cpp COFFDumper.cpp ELFDumper.cpp - MachODumper.cpp Error.cpp + llvm-readobj.cpp + MachODumper.cpp + ObjDumper.cpp StreamWriter.cpp - ARMAttributeParser.cpp ) From 68b0d1d2b47f1be8eec2ce57c8119906c354ccd8 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Sat, 24 May 2014 20:19:40 +0000 Subject: [PATCH 139/906] Fix some misplaced spaces around 'override' git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209589 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/LegacyPassManagers.h | 2 +- include/llvm/TableGen/Record.h | 18 +++++++++--------- lib/IR/IRPrintingPasses.cpp | 2 +- lib/Target/Sparc/SparcTargetMachine.h | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/llvm/IR/LegacyPassManagers.h b/include/llvm/IR/LegacyPassManagers.h index f4d29675ce02..f6065a4e21a6 100644 --- a/include/llvm/IR/LegacyPassManagers.h +++ b/include/llvm/IR/LegacyPassManagers.h @@ -441,7 +441,7 @@ class FPPassManager : public ModulePass, public PMDataManager { Pass *getAsPass() override { return this; } /// Pass Manager itself does not invalidate any analysis info. - void getAnalysisUsage(AnalysisUsage &Info) const override{ + void getAnalysisUsage(AnalysisUsage &Info) const override { Info.setPreservesAll(); } diff --git a/include/llvm/TableGen/Record.h b/include/llvm/TableGen/Record.h index ed1c4c90f82a..36464d75d5ab 100644 --- a/include/llvm/TableGen/Record.h +++ b/include/llvm/TableGen/Record.h @@ -198,16 +198,16 @@ class BitsRecTy : public RecTy { Init *convertValue(VarBitInit *VB) override { return nullptr; } Init *convertValue( DefInit *DI) override { return nullptr; } Init *convertValue( DagInit *DI) override { return nullptr; } - Init *convertValue( UnOpInit *UI) override { return RecTy::convertValue(UI);} + Init *convertValue( UnOpInit *UI) override { return RecTy::convertValue(UI);} Init *convertValue( BinOpInit *UI) override { return RecTy::convertValue(UI);} - Init *convertValue( TernOpInit *UI) override {return RecTy::convertValue(UI);} + Init *convertValue(TernOpInit *UI) override { return RecTy::convertValue(UI);} Init *convertValue( TypedInit *TI) override; - Init *convertValue( VarInit *VI) override{ return RecTy::convertValue(VI);} - Init *convertValue( FieldInit *FI) override{ return RecTy::convertValue(FI);} + Init *convertValue( VarInit *VI) override { return RecTy::convertValue(VI);} + Init *convertValue( FieldInit *FI) override { return RecTy::convertValue(FI);} std::string getAsString() const override; - bool typeIsConvertibleTo(const RecTy *RHS) const override{ + bool typeIsConvertibleTo(const RecTy *RHS) const override { return RHS->baseClassOf(this); } bool baseClassOf(const RecTy*) const override; @@ -313,16 +313,16 @@ class ListRecTy : public RecTy { Init *convertValue(VarBitInit *VB) override { return nullptr; } Init *convertValue( DefInit *DI) override { return nullptr; } Init *convertValue( DagInit *DI) override { return nullptr; } - Init *convertValue( UnOpInit *UI) override { return RecTy::convertValue(UI);} + Init *convertValue( UnOpInit *UI) override { return RecTy::convertValue(UI);} Init *convertValue( BinOpInit *UI) override { return RecTy::convertValue(UI);} - Init *convertValue( TernOpInit *UI) override{ return RecTy::convertValue(UI);} + Init *convertValue(TernOpInit *UI) override { return RecTy::convertValue(UI);} Init *convertValue( TypedInit *TI) override; Init *convertValue( VarInit *VI) override { return RecTy::convertValue(VI);} Init *convertValue( FieldInit *FI) override { return RecTy::convertValue(FI);} std::string getAsString() const override; - bool typeIsConvertibleTo(const RecTy *RHS) const override{ + bool typeIsConvertibleTo(const RecTy *RHS) const override { return RHS->baseClassOf(this); } @@ -360,7 +360,7 @@ class DagRecTy : public RecTy { std::string getAsString() const override { return "dag"; } - bool typeIsConvertibleTo(const RecTy *RHS) const override{ + bool typeIsConvertibleTo(const RecTy *RHS) const override { return RHS->baseClassOf(this); } }; diff --git a/lib/IR/IRPrintingPasses.cpp b/lib/IR/IRPrintingPasses.cpp index 099c27cfa5c0..c8a17479d8a0 100644 --- a/lib/IR/IRPrintingPasses.cpp +++ b/lib/IR/IRPrintingPasses.cpp @@ -94,7 +94,7 @@ class PrintBasicBlockPass : public BasicBlockPass { return false; } - void getAnalysisUsage(AnalysisUsage &AU) const override{ + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); } }; diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index b759e9ae75ed..7d043388e8cf 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -44,7 +44,7 @@ class SparcTargetMachine : public LLVMTargetMachine { const TargetFrameLowering *getFrameLowering() const override { return &FrameLowering; } - const SparcSubtarget *getSubtargetImpl() const override{ return &Subtarget; } + const SparcSubtarget *getSubtargetImpl() const override { return &Subtarget; } const SparcRegisterInfo *getRegisterInfo() const override { return &InstrInfo.getRegisterInfo(); } From 62692b442aa19ad7f110faa55577f8c1af237175 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Sun, 25 May 2014 10:27:02 +0000 Subject: [PATCH 140/906] Add an extension point for peephole optimizers. This extension point allows adding passes that perform peephole optimizations similar to the instruction combiner. These passes will be inserted after each instance of the instruction combiner pass. Differential Revision: http://reviews.llvm.org/D3905 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209595 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Transforms/IPO/PassManagerBuilder.h | 7 ++++++- lib/Transforms/IPO/PassManagerBuilder.cpp | 9 +++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/include/llvm/Transforms/IPO/PassManagerBuilder.h b/include/llvm/Transforms/IPO/PassManagerBuilder.h index bfd58f11756d..4c6f3aa1c454 100644 --- a/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -86,7 +86,12 @@ class PassManagerBuilder { /// EP_EnabledOnOptLevel0 - This extension point allows adding passes that /// should not be disabled by O0 optimization level. The passes will be /// inserted after the inlining pass. - EP_EnabledOnOptLevel0 + EP_EnabledOnOptLevel0, + + /// EP_Peephole - This extension point allows adding passes that perform + /// peephole optimizations similar to the instruction combiner. These passes + /// will be inserted after each instance of the instruction combiner pass. + EP_Peephole, }; /// The Optimization Level - Specify the basic optimization level. diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 10c51ab553a7..38e1b8e16667 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -157,6 +157,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createDeadArgEliminationPass()); // Dead argument elimination MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE + addExtensionsToPM(EP_Peephole, MPM); MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE } @@ -183,6 +184,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createInstructionCombiningPass()); // Combine silly seq's + addExtensionsToPM(EP_Peephole, MPM); if (!DisableTailCalls) MPM.add(createTailCallEliminationPass()); // Eliminate tail calls @@ -208,6 +210,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { // Run instcombine after redundancy elimination to exploit opportunities // opened up by them. MPM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, MPM); MPM.add(createJumpThreadingPass()); // Thread jumps MPM.add(createCorrelatedValuePropagationPass()); MPM.add(createDeadStoreEliminationPass()); // Delete dead stores @@ -222,6 +225,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { if (BBVectorize) { MPM.add(createBBVectorizePass()); MPM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, MPM); if (OptLevel > 1 && UseGVNAfterVectorization) MPM.add(createGVNPass()); // Remove redundancies else @@ -235,6 +239,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createAggressiveDCEPass()); // Delete dead instructions MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createInstructionCombiningPass()); // Clean up after everything. + addExtensionsToPM(EP_Peephole, MPM); // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC // pass manager that we are specifically trying to avoid. To prevent this @@ -247,6 +252,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { // as function calls, so that we can only pass them when the vectorizer // changed the code. MPM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, MPM); MPM.add(createCFGSimplificationPass()); if (!DisableUnrollLoops) @@ -299,6 +305,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, // function pointers. When this happens, we often have to resolve varargs // calls, etc, so let instcombine do this. PM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, PM); // Inline small functions if (RunInliner) @@ -317,6 +324,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, // The IPO passes may leave cruft around. Clean up after them. PM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, PM); PM.add(createJumpThreadingPass()); // Break up allocas @@ -346,6 +354,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, // Cleanup and simplify the code after the scalar optimizations. PM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, PM); PM.add(createJumpThreadingPass()); From fba226fc522a618368c852e424c554fa8af9f0c2 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Sun, 25 May 2014 12:43:13 +0000 Subject: [PATCH 141/906] Make these CHECKs a bit more strict. The " at the end of the line makes sure we matched the entire directive. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209599 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/dllexport-x86_64.ll | 56 +++++++++++------------ test/CodeGen/X86/dllexport.ll | 68 ++++++++++++++-------------- 2 files changed, 62 insertions(+), 62 deletions(-) diff --git a/test/CodeGen/X86/dllexport-x86_64.ll b/test/CodeGen/X86/dllexport-x86_64.ll index 8bd882efeeb4..bd099b042236 100644 --- a/test/CodeGen/X86/dllexport-x86_64.ll +++ b/test/CodeGen/X86/dllexport-x86_64.ll @@ -74,31 +74,31 @@ define weak_odr dllexport void @weak1() { ; CHECK: .section .drectve -; WIN32: /EXPORT:Var1,DATA -; WIN32: /EXPORT:Var2,DATA -; WIN32: /EXPORT:Var3,DATA -; WIN32: /EXPORT:WeakVar1,DATA -; WIN32: /EXPORT:WeakVar2,DATA -; WIN32: /EXPORT:f1 -; WIN32: /EXPORT:f2 -; WIN32: /EXPORT:lnk1 -; WIN32: /EXPORT:lnk2 -; WIN32: /EXPORT:weak1 -; WIN32: /EXPORT:alias -; WIN32: /EXPORT:alias2 -; WIN32: /EXPORT:alias3 -; WIN32: /EXPORT:weak_alias -; MINGW: -export:Var1,data -; MINGW: -export:Var2,data -; MINGW: -export:Var3,data -; MINGW: -export:WeakVar1,data -; MINGW: -export:WeakVar2,data -; MINGW: -export:f1 -; MINGW: -export:f2 -; MINGW: -export:lnk1 -; MINGW: -export:lnk2 -; MINGW: -export:weak1 -; MINGW: -export:alias -; MINGW: -export:alias2 -; MINGW: -export:alias3 -; MINGW: -export:weak_alias +; WIN32: /EXPORT:Var1,DATA" +; WIN32: /EXPORT:Var2,DATA" +; WIN32: /EXPORT:Var3,DATA" +; WIN32: /EXPORT:WeakVar1,DATA" +; WIN32: /EXPORT:WeakVar2,DATA" +; WIN32: /EXPORT:f1" +; WIN32: /EXPORT:f2" +; WIN32: /EXPORT:lnk1" +; WIN32: /EXPORT:lnk2" +; WIN32: /EXPORT:weak1" +; WIN32: /EXPORT:alias" +; WIN32: /EXPORT:alias2" +; WIN32: /EXPORT:alias3" +; WIN32: /EXPORT:weak_alias" +; MINGW: -export:Var1,data" +; MINGW: -export:Var2,data" +; MINGW: -export:Var3,data" +; MINGW: -export:WeakVar1,data" +; MINGW: -export:WeakVar2,data" +; MINGW: -export:f1" +; MINGW: -export:f2" +; MINGW: -export:lnk1" +; MINGW: -export:lnk2" +; MINGW: -export:weak1" +; MINGW: -export:alias" +; MINGW: -export:alias2" +; MINGW: -export:alias3" +; MINGW: -export:weak_alias" diff --git a/test/CodeGen/X86/dllexport.ll b/test/CodeGen/X86/dllexport.ll index b85df83e3e3a..0805fe2efab9 100644 --- a/test/CodeGen/X86/dllexport.ll +++ b/test/CodeGen/X86/dllexport.ll @@ -93,38 +93,38 @@ define weak_odr dllexport void @weak1() { ; CHECK: .section .drectve -; CHECK-CL: /EXPORT:_Var1,DATA -; CHECK-CL: /EXPORT:_Var2,DATA -; CHECK-CL: /EXPORT:_Var3,DATA -; CHECK-CL: /EXPORT:_WeakVar1,DATA -; CHECK-CL: /EXPORT:_WeakVar2,DATA -; CHECK-CL: /EXPORT:_f1 -; CHECK-CL: /EXPORT:_f2 -; CHECK-CL: /EXPORT:_stdfun@0 -; CHECK-CL: /EXPORT:@fastfun@0 -; CHECK-CL: /EXPORT:_thisfun -; CHECK-CL: /EXPORT:_lnk1 -; CHECK-CL: /EXPORT:_lnk2 -; CHECK-CL: /EXPORT:_weak1 -; CHECK-CL: /EXPORT:_alias -; CHECK-CL: /EXPORT:_alias2 -; CHECK-CL: /EXPORT:_alias3 -; CHECK-CL: /EXPORT:_weak_alias -; CHECK-GCC: -export:Var1,data -; CHECK-GCC: -export:Var2,data -; CHECK-GCC: -export:Var3,data -; CHECK-GCC: -export:WeakVar1,data -; CHECK-GCC: -export:WeakVar2,data -; CHECK-GCC: -export:f1 -; CHECK-GCC: -export:f2 -; CHECK-GCC: -export:stdfun@0 -; CHECK-GCC: -export:@fastfun@0 -; CHECK-GCC: -export:thisfun -; CHECK-GCC: -export:lnk1 -; CHECK-GCC: -export:lnk2 -; CHECK-GCC: -export:weak1 -; CHECK-GCC: -export:alias -; CHECK-GCC: -export:alias2 -; CHECK-GCC: -export:alias3 -; CHECK-GCC: -export:weak_alias +; CHECK-CL: /EXPORT:_Var1,DATA" +; CHECK-CL: /EXPORT:_Var2,DATA" +; CHECK-CL: /EXPORT:_Var3,DATA" +; CHECK-CL: /EXPORT:_WeakVar1,DATA" +; CHECK-CL: /EXPORT:_WeakVar2,DATA" +; CHECK-CL: /EXPORT:_f1" +; CHECK-CL: /EXPORT:_f2" +; CHECK-CL: /EXPORT:_stdfun@0" +; CHECK-CL: /EXPORT:@fastfun@0" +; CHECK-CL: /EXPORT:_thisfun" +; CHECK-CL: /EXPORT:_lnk1" +; CHECK-CL: /EXPORT:_lnk2" +; CHECK-CL: /EXPORT:_weak1" +; CHECK-CL: /EXPORT:_alias" +; CHECK-CL: /EXPORT:_alias2" +; CHECK-CL: /EXPORT:_alias3" +; CHECK-CL: /EXPORT:_weak_alias" +; CHECK-GCC: -export:Var1,data" +; CHECK-GCC: -export:Var2,data" +; CHECK-GCC: -export:Var3,data" +; CHECK-GCC: -export:WeakVar1,data" +; CHECK-GCC: -export:WeakVar2,data" +; CHECK-GCC: -export:f1" +; CHECK-GCC: -export:f2" +; CHECK-GCC: -export:stdfun@0" +; CHECK-GCC: -export:@fastfun@0" +; CHECK-GCC: -export:thisfun" +; CHECK-GCC: -export:lnk1" +; CHECK-GCC: -export:lnk2" +; CHECK-GCC: -export:weak1" +; CHECK-GCC: -export:alias" +; CHECK-GCC: -export:alias2" +; CHECK-GCC: -export:alias3" +; CHECK-GCC: -export:weak_alias" From c385367909ab343dcc0a1a63267adb4786f9ebe5 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Sun, 25 May 2014 12:49:07 +0000 Subject: [PATCH 142/906] Emit data or code export directives based on the type. Currently we look at the Aliasee to decide what type of export directive to use. It seems better to use the type of the alias directly. This is similar to how we handle the alias having the same address but other attributes (linkage, visibility) from the aliasee. With this patch it is now possible to do things like target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-windows-msvc" @foo = global [6 x i8] c"\B8*\00\00\00\C3", section ".text", align 16 @f = dllexport alias i32 (), [6 x i8]* @foo !llvm.module.flags = !{!0} !0 = metadata !{i32 6, metadata !"Linker Options", metadata !1} !1 = metadata !{metadata !2, metadata !3} !2 = metadata !{metadata !"/DEFAULTLIB:libcmt.lib"} !3 = metadata !{metadata !"/DEFAULTLIB:oldnames.lib"} git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209600 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86AsmPrinter.cpp | 10 +++------- test/CodeGen/X86/dllexport-x86_64.ll | 4 ++++ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 68e136bd776c..1dca5689adee 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -670,16 +670,12 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { DLLExportedGlobals.push_back(getSymbol(&Global)); for (const auto &Alias : M.aliases()) { - const GlobalValue *GV = &Alias; - if (!GV->hasDLLExportStorageClass()) + if (!Alias.hasDLLExportStorageClass()) continue; - while (const GlobalAlias *A = dyn_cast(GV)) - GV = A->getAliasee(); - - if (isa(GV)) + if (Alias.getType()->getElementType()->isFunctionTy()) DLLExportedFns.push_back(getSymbol(&Alias)); - else if (isa(GV)) + else DLLExportedGlobals.push_back(getSymbol(&Alias)); } diff --git a/test/CodeGen/X86/dllexport-x86_64.ll b/test/CodeGen/X86/dllexport-x86_64.ll index bd099b042236..1347664c71f1 100644 --- a/test/CodeGen/X86/dllexport-x86_64.ll +++ b/test/CodeGen/X86/dllexport-x86_64.ll @@ -72,6 +72,8 @@ define weak_odr dllexport void @weak1() { ; CHECK: weak_alias = f1 @weak_alias = dllexport alias weak_odr void()* @f1 +@blob = global [6 x i8] c"\B8*\00\00\00\C3", section ".text", align 16 +@blob_alias = dllexport alias i32 (), [6 x i8]* @blob ; CHECK: .section .drectve ; WIN32: /EXPORT:Var1,DATA" @@ -88,6 +90,7 @@ define weak_odr dllexport void @weak1() { ; WIN32: /EXPORT:alias2" ; WIN32: /EXPORT:alias3" ; WIN32: /EXPORT:weak_alias" +; WIN32: /EXPORT:blob_alias" ; MINGW: -export:Var1,data" ; MINGW: -export:Var2,data" ; MINGW: -export:Var3,data" @@ -102,3 +105,4 @@ define weak_odr dllexport void @weak1() { ; MINGW: -export:alias2" ; MINGW: -export:alias3" ; MINGW: -export:weak_alias" +; MINGW: -export:blob_alias" From 7e79a350b58a1e40d090e5ec3dc5cb339358ed5a Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sun, 25 May 2014 15:38:52 +0000 Subject: [PATCH 143/906] Streamline test case by avoiding a temporary file and piping llc output straight to llvm-dwarfdump We still do temporary files in many cases, just updating this particular one because I was debugging it and made this change while doing so. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209601 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/DebugInfo/X86/concrete_out_of_line.ll | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll index ad9d3b66696c..a8bf7ca5f9ee 100644 --- a/test/DebugInfo/X86/concrete_out_of_line.ll +++ b/test/DebugInfo/X86/concrete_out_of_line.ll @@ -1,5 +1,4 @@ -; RUN: llc -mtriple=x86_64-linux %s -o %t -filetype=obj -; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s +; RUN: llc -mtriple=x86_64-linux < %s -filetype=obj | llvm-dwarfdump -debug-dump=info - | FileCheck %s ; test that we add DW_AT_inline even when we only have concrete out of line ; instances. From cec37248b2fb801d43718c810faa1736a1c95b60 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sun, 25 May 2014 18:11:35 +0000 Subject: [PATCH 144/906] DebugInfo: Fix inlining with #file directives a little harder Seems my previous fix was insufficient - we were still not adding the inlined function to the abstract scope list. Which meant it wasn't flagged as inline, didn't have nested lexical scopes in the abstract definition, and didn't have abstract variables - so the inlined variable didn't reference an abstract variable, instead being described completely inline. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209602 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LexicalScopes.cpp | 10 +++++----- test/DebugInfo/inline-scopes.ll | 3 +++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp index d965968fb498..d12c234bf3b2 100644 --- a/lib/CodeGen/LexicalScopes.cpp +++ b/lib/CodeGen/LexicalScopes.cpp @@ -210,21 +210,21 @@ LexicalScope *LexicalScopes::getOrCreateAbstractScope(const MDNode *N) { DIDescriptor Scope(N); if (Scope.isLexicalBlockFile()) Scope = DILexicalBlockFile(Scope).getScope(); - auto I = AbstractScopeMap.find(N); + auto I = AbstractScopeMap.find(Scope); if (I != AbstractScopeMap.end()) return &I->second; LexicalScope *Parent = nullptr; if (Scope.isLexicalBlock()) { - DILexicalBlock DB(N); + DILexicalBlock DB(Scope); DIDescriptor ParentDesc = DB.getContext(); Parent = getOrCreateAbstractScope(ParentDesc); } I = AbstractScopeMap.emplace(std::piecewise_construct, - std::forward_as_tuple(N), - std::forward_as_tuple(Parent, DIDescriptor(N), + std::forward_as_tuple(Scope), + std::forward_as_tuple(Parent, Scope, nullptr, true)).first; - if (DIDescriptor(N).isSubprogram()) + if (Scope.isSubprogram()) AbstractScopesList.push_back(&I->second); return &I->second; } diff --git a/test/DebugInfo/inline-scopes.ll b/test/DebugInfo/inline-scopes.ll index 310b0404f876..36c073516c56 100644 --- a/test/DebugInfo/inline-scopes.ll +++ b/test/DebugInfo/inline-scopes.ll @@ -32,6 +32,9 @@ ; Ensure that file changes don't interfere with creating inlined subroutines. ; (see the line directive inside 'f2' in thesource) ; CHECK: DW_TAG_inlined_subroutine +; CHECK: DW_TAG_variable +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_abstract_origin ; Function Attrs: uwtable define i32 @main() #0 { From 12d17f4375df43c6d3ca16dfb7bfe2accbc9d4d4 Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Sun, 25 May 2014 20:26:33 +0000 Subject: [PATCH 145/906] tools: use references rather than out pointers in COFFDumper Switch to use references for parameters that are guaranteed to be non-null. Simplifies the code a slight bit in preparation for another change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209603 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-readobj/COFFDumper.cpp | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp index 188e9f891339..aec41c7d2ab2 100644 --- a/tools/llvm-readobj/COFFDumper.cpp +++ b/tools/llvm-readobj/COFFDumper.cpp @@ -80,11 +80,8 @@ class COFFDumper : public ObjDumper { void cacheRelocations(); - error_code getSection( - const std::vector &Rels, - uint64_t Offset, - const coff_section **Section, - uint64_t *AddrPtr); + error_code getSection(const std::vector &Rels, uint64_t Offset, + const coff_section *&Section, uint64_t &AddrPtr); typedef DenseMap > RelocMapTy; @@ -460,24 +457,17 @@ static std::string formatSymbol(const std::vector &Rels, return Str.str(); } -error_code COFFDumper::getSection( - const std::vector &Rels, uint64_t Offset, - const coff_section **SectionPtr, uint64_t *AddrPtr) { - +error_code COFFDumper::getSection(const std::vector &Rels, + uint64_t Offset, + const coff_section *&SectionPtr, + uint64_t &AddrPtr) { SymbolRef Sym; if (error_code EC = resolveSymbol(Rels, Offset, Sym)) return EC; - const coff_section *Section; - uint64_t Addr; - if (error_code EC = resolveSectionAndAddress(Obj, Sym, Section, Addr)) + if (error_code EC = resolveSectionAndAddress(Obj, Sym, SectionPtr, AddrPtr)) return EC; - if (SectionPtr) - *SectionPtr = Section; - if (AddrPtr) - *AddrPtr = Addr; - return object_error::success; } @@ -1063,7 +1053,7 @@ void COFFDumper::printRuntimeFunction( const coff_section* XData = nullptr; uint64_t UnwindInfoOffset = 0; - if (error(getSection(Rels, OffsetInSection + 8, &XData, &UnwindInfoOffset))) + if (error(getSection(Rels, OffsetInSection + 8, XData, UnwindInfoOffset))) return; ArrayRef XContents; From ec944ceaf5eeb4b16bf1798dc958c0d90090e52f Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Sun, 25 May 2014 20:26:37 +0000 Subject: [PATCH 146/906] tools: refactor COFFDumper symbol resolution logic Make the use of the cache more transparent to the users. There is no reason that the cached entries really need to be passed along. The overhead for doing so is minimal: a single extra parameter. This requires that some standalone functions be brought into the COFFDumper class so that they may access the cache. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209604 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-readobj/COFFDumper.cpp | 130 ++++++++++++++++-------------- 1 file changed, 69 insertions(+), 61 deletions(-) diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp index aec41c7d2ab2..9f00fe1984d5 100644 --- a/tools/llvm-readobj/COFFDumper.cpp +++ b/tools/llvm-readobj/COFFDumper.cpp @@ -64,15 +64,12 @@ class COFFDumper : public ObjDumper { void printBaseOfDataField(const pe32_header *Hdr); void printBaseOfDataField(const pe32plus_header *Hdr); - void printRuntimeFunction( - const RuntimeFunction& RTF, - uint64_t OffsetInSection, - const std::vector &Rels); + void printRuntimeFunction(const RuntimeFunction& RTF, + const coff_section *Section, + uint64_t SectionOffset); - void printUnwindInfo( - const Win64EH::UnwindInfo& UI, - uint64_t OffsetInSection, - const std::vector &Rels); + void printUnwindInfo(const Win64EH::UnwindInfo& UI, + const coff_section *Section, uint64_t SectionOffset); void printUnwindCode(const Win64EH::UnwindInfo &UI, ArrayRef UCs); @@ -80,8 +77,16 @@ class COFFDumper : public ObjDumper { void cacheRelocations(); - error_code getSection(const std::vector &Rels, uint64_t Offset, - const coff_section *&Section, uint64_t &AddrPtr); + error_code resolveRelocation(const coff_section *Section, uint64_t Offset, + const coff_section *&ReesolvedSection, + uint64_t &ResolvedAddress); + + error_code resolveSymbol(const coff_section *Section, uint64_t Offset, + SymbolRef &Sym); + error_code resolveSymbolName(const coff_section *Section, uint64_t Offset, + StringRef &Name); + std::string formatSymbol(const coff_section *Section, uint64_t Offset, + uint32_t Disp); typedef DenseMap > RelocMapTy; @@ -182,32 +187,33 @@ static error_code resolveSectionAndAddress(const COFFObjectFile *Obj, return object_error::success; } -// Given a vector of relocations for a section and an offset into this section -// the function returns the symbol used for the relocation at the offset. -static error_code resolveSymbol(const std::vector &Rels, - uint64_t Offset, SymbolRef &Sym) { - for (const auto &Relocation : Rels) { - uint64_t Ofs; - if (error_code EC = Relocation.getOffset(Ofs)) +// Given a a section and an offset into this section the function returns the +// symbol used for the relocation at the offset. +error_code COFFDumper::resolveSymbol(const coff_section *Section, + uint64_t Offset, SymbolRef &Sym) { + const auto &Relocations = RelocMap[Section]; + for (const auto &Relocation : Relocations) { + uint64_t RelocationOffset; + if (error_code EC = Relocation.getOffset(RelocationOffset)) return EC; - if (Ofs == Offset) { + if (RelocationOffset == Offset) { Sym = *Relocation.getSymbol(); return readobj_error::success; } } - return readobj_error::unknown_symbol; } -// Given a vector of relocations for a section and an offset into this section -// the function returns the name of the symbol used for the relocation at the -// offset. -static error_code resolveSymbolName(const std::vector &Rels, - uint64_t Offset, StringRef &Name) { - SymbolRef Sym; - if (error_code EC = resolveSymbol(Rels, Offset, Sym)) return EC; - if (error_code EC = Sym.getName(Name)) return EC; +// Given a section and an offset into this section the function returns the name +// of the symbol used for the relocation at the offset. +error_code COFFDumper::resolveSymbolName(const coff_section *Section, + uint64_t Offset, StringRef &Name) { + SymbolRef Symbol; + if (error_code EC = resolveSymbol(Section, Offset, Symbol)) + return EC; + if (error_code EC = Symbol.getName(Name)) + return EC; return object_error::success; } @@ -436,13 +442,13 @@ static error_code getSymbolAuxData(const COFFObjectFile *Obj, return readobj_error::success; } -static std::string formatSymbol(const std::vector &Rels, - uint64_t Offset, uint32_t Disp) { +std::string COFFDumper::formatSymbol(const coff_section *Section, + uint64_t Offset, uint32_t Disp) { std::string Buffer; raw_string_ostream Str(Buffer); StringRef Sym; - if (resolveSymbolName(Rels, Offset, Sym)) { + if (resolveSymbolName(Section, Offset, Sym)) { Str << format(" (0x%" PRIX64 ")", Offset); return Str.str(); } @@ -457,15 +463,16 @@ static std::string formatSymbol(const std::vector &Rels, return Str.str(); } -error_code COFFDumper::getSection(const std::vector &Rels, - uint64_t Offset, - const coff_section *&SectionPtr, - uint64_t &AddrPtr) { +error_code COFFDumper::resolveRelocation(const coff_section *Section, + uint64_t Offset, + const coff_section *&ResolvedSection, + uint64_t &ResolvedAddress) { SymbolRef Sym; - if (error_code EC = resolveSymbol(Rels, Offset, Sym)) + if (error_code EC = resolveSymbol(Section, Offset, Sym)) return EC; - if (error_code EC = resolveSectionAndAddress(Obj, Sym, SectionPtr, AddrPtr)) + if (error_code EC = resolveSectionAndAddress(Obj, Sym, ResolvedSection, + ResolvedAddress)) return EC; return object_error::success; @@ -638,8 +645,8 @@ void COFFDumper::printCodeViewLineTables(const SectionRef &Section) { } StringRef FunctionName; - if (error(resolveSymbolName(RelocMap[Obj->getCOFFSection(Section)], - Offset, FunctionName))) + if (error(resolveSymbolName(Obj->getCOFFSection(Section), Offset, + FunctionName))) return; W.printString("FunctionName", FunctionName); if (FunctionLineTables.count(FunctionName) != 0) { @@ -1033,27 +1040,27 @@ void COFFDumper::printX64UnwindInfo() { const uint64_t OffsetInSection = std::distance(RFs.begin(), I) * sizeof(RuntimeFunction); - printRuntimeFunction(*I, OffsetInSection, RelocMap[PData]); + printRuntimeFunction(*I, PData, OffsetInSection); } } } -void COFFDumper::printRuntimeFunction( - const RuntimeFunction& RTF, - uint64_t OffsetInSection, - const std::vector &Rels) { +void COFFDumper::printRuntimeFunction(const RuntimeFunction& RTF, + const coff_section *Section, + uint64_t SectionOffset) { DictScope D(W, "RuntimeFunction"); W.printString("StartAddress", - formatSymbol(Rels, OffsetInSection + 0, RTF.StartAddress)); + formatSymbol(Section, SectionOffset + 0, RTF.StartAddress)); W.printString("EndAddress", - formatSymbol(Rels, OffsetInSection + 4, RTF.EndAddress)); + formatSymbol(Section, SectionOffset + 4, RTF.EndAddress)); W.printString("UnwindInfoAddress", - formatSymbol(Rels, OffsetInSection + 8, RTF.UnwindInfoOffset)); + formatSymbol(Section, SectionOffset + 8, RTF.UnwindInfoOffset)); const coff_section* XData = nullptr; uint64_t UnwindInfoOffset = 0; - if (error(getSection(Rels, OffsetInSection + 8, XData, UnwindInfoOffset))) + if (error(getSectionFromRelocation(Section, SectionOffset + 8, + XData, UnwindInfoOffset))) return; ArrayRef XContents; @@ -1068,13 +1075,12 @@ void COFFDumper::printRuntimeFunction( reinterpret_cast( XContents.data() + UnwindInfoOffset); - printUnwindInfo(*UI, UnwindInfoOffset, RelocMap[XData]); + printUnwindInfo(*UI, XData, UnwindInfoOffset); } -void COFFDumper::printUnwindInfo( - const Win64EH::UnwindInfo& UI, - uint64_t OffsetInSection, - const std::vector &Rels) { +void COFFDumper::printUnwindInfo(const Win64EH::UnwindInfo& UI, + const coff_section *Section, + uint64_t SectionOffset) { DictScope D(W, "UnwindInfo"); W.printNumber("Version", UI.getVersion()); W.printFlags("Flags", UI.getFlags(), makeArrayRef(UnwindFlags)); @@ -1103,20 +1109,22 @@ void COFFDumper::printUnwindInfo( } } - uint64_t LSDAOffset = OffsetInSection + getOffsetOfLSDA(UI); + uint64_t LSDAOffset = SectionOffset + getOffsetOfLSDA(UI); if (UI.getFlags() & (UNW_ExceptionHandler | UNW_TerminateHandler)) { - W.printString("Handler", formatSymbol(Rels, LSDAOffset, - UI.getLanguageSpecificHandlerOffset())); + W.printString("Handler", + formatSymbol(Section, LSDAOffset, + UI.getLanguageSpecificHandlerOffset())); } else if (UI.getFlags() & UNW_ChainInfo) { const RuntimeFunction *Chained = UI.getChainedFunctionEntry(); if (Chained) { DictScope D(W, "Chained"); - W.printString("StartAddress", formatSymbol(Rels, LSDAOffset + 0, - Chained->StartAddress)); - W.printString("EndAddress", formatSymbol(Rels, LSDAOffset + 4, - Chained->EndAddress)); - W.printString("UnwindInfoAddress", formatSymbol(Rels, LSDAOffset + 8, - Chained->UnwindInfoOffset)); + W.printString("StartAddress", formatSymbol(Section, LSDAOffset + 0, + Chained->StartAddress)); + W.printString("EndAddress", formatSymbol(Section, LSDAOffset + 4, + Chained->EndAddress)); + W.printString("UnwindInfoAddress", + formatSymbol(Section, LSDAOffset + 8, + Chained->UnwindInfoOffset)); } } } From 53046f3c5f5a747bfcb4e5700ed5e3b5b2506afe Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Sun, 25 May 2014 20:26:40 +0000 Subject: [PATCH 147/906] tools: inline simple single-use function This inlines the single use function in preparation for splitting the Win64EH printing out of the COFFDumper into its own entity. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209605 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-readobj/COFFDumper.cpp | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp index 9f00fe1984d5..1a360f5bed01 100644 --- a/tools/llvm-readobj/COFFDumper.cpp +++ b/tools/llvm-readobj/COFFDumper.cpp @@ -171,22 +171,6 @@ static unsigned getNumUsedSlots(const UnwindCode &UnwindCode) { } } -// Given a symbol sym this functions returns the address and section of it. -static error_code resolveSectionAndAddress(const COFFObjectFile *Obj, - const SymbolRef &Sym, - const coff_section *&ResolvedSection, - uint64_t &ResolvedAddr) { - if (error_code EC = Sym.getAddress(ResolvedAddr)) - return EC; - - section_iterator iter(Obj->section_begin()); - if (error_code EC = Sym.getSection(iter)) - return EC; - - ResolvedSection = Obj->getCOFFSection(*iter); - return object_error::success; -} - // Given a a section and an offset into this section the function returns the // symbol used for the relocation at the offset. error_code COFFDumper::resolveSymbol(const coff_section *Section, @@ -471,10 +455,14 @@ error_code COFFDumper::resolveRelocation(const coff_section *Section, if (error_code EC = resolveSymbol(Section, Offset, Sym)) return EC; - if (error_code EC = resolveSectionAndAddress(Obj, Sym, ResolvedSection, - ResolvedAddress)) + if (error_code EC = Sym.getAddress(ResolvedAddr)) + return EC; + + section_iterator SI(Obj->section_begin()); + if (error_code EC = Sym.getSection(SI)) return EC; + ResolvedSection = Obj->getCOFFSection(*SI); return object_error::success; } From b6a667f0335027a8ed673d29af11fc6653781c11 Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Sun, 25 May 2014 20:26:45 +0000 Subject: [PATCH 148/906] tools: split out Win64EHDumper from COFFDumper Move the implementation of the Win64 EH printer from the COFFDumper into its own class. This is in preparation for adding support to print ARM EH information. The only real change here is in printUnwindInfo where we now lambda lift the implicit this parameter for the resolveFunction. Also setup the printing to handle ARM. This now has set the stage to introduce ARM EH printing. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209606 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-readobj/CMakeLists.txt | 1 + tools/llvm-readobj/COFFDumper.cpp | 344 ++------------------------- tools/llvm-readobj/Win64EHDumper.cpp | 327 +++++++++++++++++++++++++ tools/llvm-readobj/Win64EHDumper.h | 62 +++++ 4 files changed, 406 insertions(+), 328 deletions(-) create mode 100644 tools/llvm-readobj/Win64EHDumper.cpp create mode 100644 tools/llvm-readobj/Win64EHDumper.h diff --git a/tools/llvm-readobj/CMakeLists.txt b/tools/llvm-readobj/CMakeLists.txt index deef7a1629e5..b057dcdc121f 100644 --- a/tools/llvm-readobj/CMakeLists.txt +++ b/tools/llvm-readobj/CMakeLists.txt @@ -13,4 +13,5 @@ add_llvm_tool(llvm-readobj MachODumper.cpp ObjDumper.cpp StreamWriter.cpp + Win64EHDumper.cpp ) diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp index 1a360f5bed01..7b9595f84c5b 100644 --- a/tools/llvm-readobj/COFFDumper.cpp +++ b/tools/llvm-readobj/COFFDumper.cpp @@ -16,6 +16,7 @@ #include "Error.h" #include "ObjDumper.h" #include "StreamWriter.h" +#include "Win64EHDumper.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallString.h" #include "llvm/Object/COFF.h" @@ -58,35 +59,19 @@ class COFFDumper : public ObjDumper { void printSymbol(const SymbolRef &Sym); void printRelocation(const SectionRef &Section, const RelocationRef &Reloc); void printDataDirectory(uint32_t Index, const std::string &FieldName); - void printX64UnwindInfo(); template void printPEHeader(const PEHeader *Hdr); void printBaseOfDataField(const pe32_header *Hdr); void printBaseOfDataField(const pe32plus_header *Hdr); - void printRuntimeFunction(const RuntimeFunction& RTF, - const coff_section *Section, - uint64_t SectionOffset); - - void printUnwindInfo(const Win64EH::UnwindInfo& UI, - const coff_section *Section, uint64_t SectionOffset); - - void printUnwindCode(const Win64EH::UnwindInfo &UI, ArrayRef UCs); - void printCodeViewLineTables(const SectionRef &Section); void cacheRelocations(); - error_code resolveRelocation(const coff_section *Section, uint64_t Offset, - const coff_section *&ReesolvedSection, - uint64_t &ResolvedAddress); - error_code resolveSymbol(const coff_section *Section, uint64_t Offset, SymbolRef &Sym); error_code resolveSymbolName(const coff_section *Section, uint64_t Offset, StringRef &Name); - std::string formatSymbol(const coff_section *Section, uint64_t Offset, - uint32_t Disp); typedef DenseMap > RelocMapTy; @@ -111,66 +96,6 @@ error_code createCOFFDumper(const object::ObjectFile *Obj, StreamWriter &Writer, } // namespace llvm - -// Returns the name of the unwind code. -static StringRef getUnwindCodeTypeName(uint8_t Code) { - switch(Code) { - default: llvm_unreachable("Invalid unwind code"); - case UOP_PushNonVol: return "PUSH_NONVOL"; - case UOP_AllocLarge: return "ALLOC_LARGE"; - case UOP_AllocSmall: return "ALLOC_SMALL"; - case UOP_SetFPReg: return "SET_FPREG"; - case UOP_SaveNonVol: return "SAVE_NONVOL"; - case UOP_SaveNonVolBig: return "SAVE_NONVOL_FAR"; - case UOP_SaveXMM128: return "SAVE_XMM128"; - case UOP_SaveXMM128Big: return "SAVE_XMM128_FAR"; - case UOP_PushMachFrame: return "PUSH_MACHFRAME"; - } -} - -// Returns the name of a referenced register. -static StringRef getUnwindRegisterName(uint8_t Reg) { - switch(Reg) { - default: llvm_unreachable("Invalid register"); - case 0: return "RAX"; - case 1: return "RCX"; - case 2: return "RDX"; - case 3: return "RBX"; - case 4: return "RSP"; - case 5: return "RBP"; - case 6: return "RSI"; - case 7: return "RDI"; - case 8: return "R8"; - case 9: return "R9"; - case 10: return "R10"; - case 11: return "R11"; - case 12: return "R12"; - case 13: return "R13"; - case 14: return "R14"; - case 15: return "R15"; - } -} - -// Calculates the number of array slots required for the unwind code. -static unsigned getNumUsedSlots(const UnwindCode &UnwindCode) { - switch (UnwindCode.getUnwindOp()) { - default: llvm_unreachable("Invalid unwind code"); - case UOP_PushNonVol: - case UOP_AllocSmall: - case UOP_SetFPReg: - case UOP_PushMachFrame: - return 1; - case UOP_SaveNonVol: - case UOP_SaveXMM128: - return 2; - case UOP_SaveNonVolBig: - case UOP_SaveXMM128Big: - return 3; - case UOP_AllocLarge: - return (UnwindCode.getOpInfo() == 0) ? 2 : 3; - } -} - // Given a a section and an offset into this section the function returns the // symbol used for the relocation at the offset. error_code COFFDumper::resolveSymbol(const coff_section *Section, @@ -381,43 +306,6 @@ WeakExternalCharacteristics[] = { { "Alias" , COFF::IMAGE_WEAK_EXTERN_SEARCH_ALIAS } }; -static const EnumEntry UnwindFlags[] = { - { "ExceptionHandler", Win64EH::UNW_ExceptionHandler }, - { "TerminateHandler", Win64EH::UNW_TerminateHandler }, - { "ChainInfo" , Win64EH::UNW_ChainInfo } -}; - -static const EnumEntry UnwindOpInfo[] = { - { "RAX", 0 }, - { "RCX", 1 }, - { "RDX", 2 }, - { "RBX", 3 }, - { "RSP", 4 }, - { "RBP", 5 }, - { "RSI", 6 }, - { "RDI", 7 }, - { "R8", 8 }, - { "R9", 9 }, - { "R10", 10 }, - { "R11", 11 }, - { "R12", 12 }, - { "R13", 13 }, - { "R14", 14 }, - { "R15", 15 } -}; - -static uint64_t getOffsetOfLSDA(const Win64EH::UnwindInfo& UI) { - return static_cast(UI.getLanguageSpecificData()) - - reinterpret_cast(&UI); -} - -static uint32_t getLargeSlotValue(ArrayRef UCs) { - if (UCs.size() < 3) - return 0; - - return UCs[1].FrameOffset + (static_cast(UCs[2].FrameOffset) << 16); -} - template static error_code getSymbolAuxData(const COFFObjectFile *Obj, const coff_symbol *Symbol, const T* &Aux) { @@ -426,46 +314,6 @@ static error_code getSymbolAuxData(const COFFObjectFile *Obj, return readobj_error::success; } -std::string COFFDumper::formatSymbol(const coff_section *Section, - uint64_t Offset, uint32_t Disp) { - std::string Buffer; - raw_string_ostream Str(Buffer); - - StringRef Sym; - if (resolveSymbolName(Section, Offset, Sym)) { - Str << format(" (0x%" PRIX64 ")", Offset); - return Str.str(); - } - - Str << Sym; - if (Disp > 0) { - Str << format(" +0x%X (0x%" PRIX64 ")", Disp, Offset); - } else { - Str << format(" (0x%" PRIX64 ")", Offset); - } - - return Str.str(); -} - -error_code COFFDumper::resolveRelocation(const coff_section *Section, - uint64_t Offset, - const coff_section *&ResolvedSection, - uint64_t &ResolvedAddress) { - SymbolRef Sym; - if (error_code EC = resolveSymbol(Section, Offset, Sym)) - return EC; - - if (error_code EC = Sym.getAddress(ResolvedAddr)) - return EC; - - section_iterator SI(Obj->section_begin()); - if (error_code EC = Sym.getSection(SI)) - return EC; - - ResolvedSection = Obj->getCOFFSection(*SI); - return object_error::success; -} - void COFFDumper::cacheRelocations() { for (const SectionRef &S : Obj->sections()) { const coff_section *Section = Obj->getCOFFSection(S); @@ -997,182 +845,22 @@ void COFFDumper::printUnwindInfo() { return; ListScope D(W, "UnwindInformation"); - if (Header->Machine != COFF::IMAGE_FILE_MACHINE_AMD64) { - W.startLine() << "Unsupported image machine type " - "(currently only AMD64 is supported).\n"; - return; - } - - printX64UnwindInfo(); -} - -void COFFDumper::printX64UnwindInfo() { - for (const SectionRef &Section : Obj->sections()) { - StringRef Name; - if (error(Section.getName(Name))) - continue; - if (Name != ".pdata" && !Name.startswith(".pdata$")) - continue; - - const coff_section *PData = Obj->getCOFFSection(Section); - - ArrayRef Contents; - if (error(Obj->getSectionContents(PData, Contents)) || Contents.empty()) - continue; - - ArrayRef RFs( - reinterpret_cast(Contents.data()), - Contents.size() / sizeof(RuntimeFunction)); - - for (const RuntimeFunction *I = RFs.begin(), *E = RFs.end(); I < E; ++I) { - const uint64_t OffsetInSection = std::distance(RFs.begin(), I) - * sizeof(RuntimeFunction); - - printRuntimeFunction(*I, PData, OffsetInSection); - } - } -} - -void COFFDumper::printRuntimeFunction(const RuntimeFunction& RTF, - const coff_section *Section, - uint64_t SectionOffset) { - - DictScope D(W, "RuntimeFunction"); - W.printString("StartAddress", - formatSymbol(Section, SectionOffset + 0, RTF.StartAddress)); - W.printString("EndAddress", - formatSymbol(Section, SectionOffset + 4, RTF.EndAddress)); - W.printString("UnwindInfoAddress", - formatSymbol(Section, SectionOffset + 8, RTF.UnwindInfoOffset)); - - const coff_section* XData = nullptr; - uint64_t UnwindInfoOffset = 0; - if (error(getSectionFromRelocation(Section, SectionOffset + 8, - XData, UnwindInfoOffset))) - return; - - ArrayRef XContents; - if (error(Obj->getSectionContents(XData, XContents)) || XContents.empty()) - return; - - UnwindInfoOffset += RTF.UnwindInfoOffset; - if (UnwindInfoOffset > XContents.size()) - return; - - const Win64EH::UnwindInfo *UI = - reinterpret_cast( - XContents.data() + UnwindInfoOffset); - - printUnwindInfo(*UI, XData, UnwindInfoOffset); -} - -void COFFDumper::printUnwindInfo(const Win64EH::UnwindInfo& UI, - const coff_section *Section, - uint64_t SectionOffset) { - DictScope D(W, "UnwindInfo"); - W.printNumber("Version", UI.getVersion()); - W.printFlags("Flags", UI.getFlags(), makeArrayRef(UnwindFlags)); - W.printNumber("PrologSize", UI.PrologSize); - if (UI.getFrameRegister() != 0) { - W.printEnum("FrameRegister", UI.getFrameRegister(), - makeArrayRef(UnwindOpInfo)); - W.printHex("FrameOffset", UI.getFrameOffset()); - } else { - W.printString("FrameRegister", StringRef("-")); - W.printString("FrameOffset", StringRef("-")); - } - - W.printNumber("UnwindCodeCount", UI.NumCodes); - { - ListScope CodesD(W, "UnwindCodes"); - ArrayRef UCs(&UI.UnwindCodes[0], UI.NumCodes); - for (const UnwindCode *I = UCs.begin(), *E = UCs.end(); I < E; ++I) { - unsigned UsedSlots = getNumUsedSlots(*I); - if (UsedSlots > UCs.size()) { - errs() << "Corrupt unwind data"; - return; - } - printUnwindCode(UI, ArrayRef(I, E)); - I += UsedSlots - 1; - } - } - - uint64_t LSDAOffset = SectionOffset + getOffsetOfLSDA(UI); - if (UI.getFlags() & (UNW_ExceptionHandler | UNW_TerminateHandler)) { - W.printString("Handler", - formatSymbol(Section, LSDAOffset, - UI.getLanguageSpecificHandlerOffset())); - } else if (UI.getFlags() & UNW_ChainInfo) { - const RuntimeFunction *Chained = UI.getChainedFunctionEntry(); - if (Chained) { - DictScope D(W, "Chained"); - W.printString("StartAddress", formatSymbol(Section, LSDAOffset + 0, - Chained->StartAddress)); - W.printString("EndAddress", formatSymbol(Section, LSDAOffset + 4, - Chained->EndAddress)); - W.printString("UnwindInfoAddress", - formatSymbol(Section, LSDAOffset + 8, - Chained->UnwindInfoOffset)); - } - } -} - -// Prints one unwind code. Because an unwind code can occupy up to 3 slots in -// the unwind codes array, this function requires that the correct number of -// slots is provided. -void COFFDumper::printUnwindCode(const Win64EH::UnwindInfo& UI, - ArrayRef UCs) { - assert(UCs.size() >= getNumUsedSlots(UCs[0])); - - W.startLine() << format("0x%02X: ", unsigned(UCs[0].u.CodeOffset)) - << getUnwindCodeTypeName(UCs[0].getUnwindOp()); - - uint32_t AllocSize = 0; - - switch (UCs[0].getUnwindOp()) { - case UOP_PushNonVol: - outs() << " reg=" << getUnwindRegisterName(UCs[0].getOpInfo()); + switch (Header->Machine) { + case COFF::IMAGE_FILE_MACHINE_AMD64: { + Win64EH::Dumper Dumper(W); + Win64EH::Dumper::SymbolResolver Resolver = + [this](const object::coff_section *Section, uint64_t Offset, + SymbolRef &Symbol) -> error_code { + return this->resolveSymbol(Section, Offset, Symbol); + }; + Win64EH::Dumper::Context Ctx(*Obj, Resolver); + Dumper.printData(Ctx); break; - - case UOP_AllocLarge: - if (UCs[0].getOpInfo() == 0) { - AllocSize = UCs[1].FrameOffset * 8; - } else { - AllocSize = getLargeSlotValue(UCs); - } - outs() << " size=" << AllocSize; - break; - case UOP_AllocSmall: - outs() << " size=" << ((UCs[0].getOpInfo() + 1) * 8); - break; - case UOP_SetFPReg: - if (UI.getFrameRegister() == 0) { - outs() << " reg="; - } else { - outs() << " reg=" << getUnwindRegisterName(UI.getFrameRegister()) - << format(", offset=0x%X", UI.getFrameOffset() * 16); - } - break; - case UOP_SaveNonVol: - outs() << " reg=" << getUnwindRegisterName(UCs[0].getOpInfo()) - << format(", offset=0x%X", UCs[1].FrameOffset * 8); - break; - case UOP_SaveNonVolBig: - outs() << " reg=" << getUnwindRegisterName(UCs[0].getOpInfo()) - << format(", offset=0x%X", getLargeSlotValue(UCs)); - break; - case UOP_SaveXMM128: - outs() << " reg=XMM" << static_cast(UCs[0].getOpInfo()) - << format(", offset=0x%X", UCs[1].FrameOffset * 16); - break; - case UOP_SaveXMM128Big: - outs() << " reg=XMM" << static_cast(UCs[0].getOpInfo()) - << format(", offset=0x%X", getLargeSlotValue(UCs)); - break; - case UOP_PushMachFrame: - outs() << " errcode=" << (UCs[0].getOpInfo() == 0 ? "no" : "yes"); + } + default: + W.printEnum("unsupported Image Machine", Header->Machine, + makeArrayRef(ImageFileMachineType)); break; } - - outs() << "\n"; } + diff --git a/tools/llvm-readobj/Win64EHDumper.cpp b/tools/llvm-readobj/Win64EHDumper.cpp new file mode 100644 index 000000000000..449df001cc97 --- /dev/null +++ b/tools/llvm-readobj/Win64EHDumper.cpp @@ -0,0 +1,327 @@ +//===- Win64EHDumper.cpp - Win64 EH Printer ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Win64EHDumper.h" +#include "llvm-readobj.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::Win64EH; + +static const EnumEntry UnwindFlags[] = { + { "ExceptionHandler", UNW_ExceptionHandler }, + { "TerminateHandler", UNW_TerminateHandler }, + { "ChainInfo" , UNW_ChainInfo } +}; + +static const EnumEntry UnwindOpInfo[] = { + { "RAX", 0 }, + { "RCX", 1 }, + { "RDX", 2 }, + { "RBX", 3 }, + { "RSP", 4 }, + { "RBP", 5 }, + { "RSI", 6 }, + { "RDI", 7 }, + { "R8", 8 }, + { "R9", 9 }, + { "R10", 10 }, + { "R11", 11 }, + { "R12", 12 }, + { "R13", 13 }, + { "R14", 14 }, + { "R15", 15 } +}; + +static uint64_t getOffsetOfLSDA(const UnwindInfo& UI) { + return static_cast(UI.getLanguageSpecificData()) + - reinterpret_cast(&UI); +} + +static uint32_t getLargeSlotValue(ArrayRef UC) { + if (UC.size() < 3) + return 0; + return UC[1].FrameOffset + (static_cast(UC[2].FrameOffset) << 16); +} + +// Returns the name of the unwind code. +static StringRef getUnwindCodeTypeName(uint8_t Code) { + switch (Code) { + default: llvm_unreachable("Invalid unwind code"); + case UOP_PushNonVol: return "PUSH_NONVOL"; + case UOP_AllocLarge: return "ALLOC_LARGE"; + case UOP_AllocSmall: return "ALLOC_SMALL"; + case UOP_SetFPReg: return "SET_FPREG"; + case UOP_SaveNonVol: return "SAVE_NONVOL"; + case UOP_SaveNonVolBig: return "SAVE_NONVOL_FAR"; + case UOP_SaveXMM128: return "SAVE_XMM128"; + case UOP_SaveXMM128Big: return "SAVE_XMM128_FAR"; + case UOP_PushMachFrame: return "PUSH_MACHFRAME"; + } +} + +// Returns the name of a referenced register. +static StringRef getUnwindRegisterName(uint8_t Reg) { + switch (Reg) { + default: llvm_unreachable("Invalid register"); + case 0: return "RAX"; + case 1: return "RCX"; + case 2: return "RDX"; + case 3: return "RBX"; + case 4: return "RSP"; + case 5: return "RBP"; + case 6: return "RSI"; + case 7: return "RDI"; + case 8: return "R8"; + case 9: return "R9"; + case 10: return "R10"; + case 11: return "R11"; + case 12: return "R12"; + case 13: return "R13"; + case 14: return "R14"; + case 15: return "R15"; + } +} + +// Calculates the number of array slots required for the unwind code. +static unsigned getNumUsedSlots(const UnwindCode &UnwindCode) { + switch (UnwindCode.getUnwindOp()) { + default: llvm_unreachable("Invalid unwind code"); + case UOP_PushNonVol: + case UOP_AllocSmall: + case UOP_SetFPReg: + case UOP_PushMachFrame: + return 1; + case UOP_SaveNonVol: + case UOP_SaveXMM128: + return 2; + case UOP_SaveNonVolBig: + case UOP_SaveXMM128Big: + return 3; + case UOP_AllocLarge: + return (UnwindCode.getOpInfo() == 0) ? 2 : 3; + } +} + +static std::string formatSymbol(const Dumper::Context &Ctx, + const coff_section *Section, uint64_t Offset, + uint32_t Displacement) { + std::string Buffer; + raw_string_ostream OS(Buffer); + + StringRef Name; + SymbolRef Symbol; + if (Ctx.ResolveSymbol(Section, Offset, Symbol) || Symbol.getName(Name)) { + OS << format(" (0x%" PRIX64 ")", Offset); + return OS.str(); + } + + OS << Name; + if (Displacement > 0) + OS << format(" +0x%X (0x%" PRIX64 ")", Displacement, Offset); + else + OS << format(" (0x%" PRIX64 ")", Offset); + return OS.str(); +} + +static error_code resolveRelocation(const Dumper::Context &Ctx, + const coff_section *Section, + uint64_t Offset, + const coff_section *&ResolvedSection, + uint64_t &ResolvedAddress) { + SymbolRef Symbol; + if (error_code EC = Ctx.ResolveSymbol(Section, Offset, Symbol)) + return EC; + + if (error_code EC = Symbol.getAddress(ResolvedAddress)) + return EC; + + section_iterator SI = Ctx.COFF.section_begin(); + if (error_code EC = Symbol.getSection(SI)) + return EC; + + ResolvedSection = Ctx.COFF.getCOFFSection(*SI); + return object_error::success; +} + +namespace llvm { +namespace Win64EH { +void Dumper::printRuntimeFunctionEntry(const Context &Ctx, + const coff_section *Section, + uint64_t Offset, + const RuntimeFunction &RF) { + SW.printString("StartAddress", + formatSymbol(Ctx, Section, Offset + 0, RF.StartAddress)); + SW.printString("EndAddress", + formatSymbol(Ctx, Section, Offset + 4, RF.EndAddress)); + SW.printString("UnwindInfoAddress", + formatSymbol(Ctx, Section, Offset + 8, RF.UnwindInfoOffset)); +} + +// Prints one unwind code. Because an unwind code can occupy up to 3 slots in +// the unwind codes array, this function requires that the correct number of +// slots is provided. +void Dumper::printUnwindCode(const UnwindInfo& UI, ArrayRef UC) { + assert(UC.size() >= getNumUsedSlots(UC[0])); + + SW.startLine() << format("0x%02X: ", unsigned(UC[0].u.CodeOffset)) + << getUnwindCodeTypeName(UC[0].getUnwindOp()); + + switch (UC[0].getUnwindOp()) { + case UOP_PushNonVol: + OS << " reg=" << getUnwindRegisterName(UC[0].getOpInfo()); + break; + + case UOP_AllocLarge: + OS << " size=" + << ((UC[0].getOpInfo() == 0) ? UC[1].FrameOffset * 8 + : getLargeSlotValue(UC)); + break; + + case UOP_AllocSmall: + OS << " size=" << (UC[0].getOpInfo() + 1) * 8; + break; + + case UOP_SetFPReg: + if (UI.getFrameRegister() == 0) + OS << " reg="; + else + OS << " reg=" << getUnwindRegisterName(UI.getFrameRegister()) + << format(", offset=0x%X", UI.getFrameOffset() * 16); + break; + + case UOP_SaveNonVol: + OS << " reg=" << getUnwindRegisterName(UC[0].getOpInfo()) + << format(", offset=0x%X", UC[1].FrameOffset * 8); + break; + + case UOP_SaveNonVolBig: + OS << " reg=" << getUnwindRegisterName(UC[0].getOpInfo()) + << format(", offset=0x%X", getLargeSlotValue(UC)); + break; + + case UOP_SaveXMM128: + OS << " reg=XMM" << static_cast(UC[0].getOpInfo()) + << format(", offset=0x%X", UC[1].FrameOffset * 16); + break; + + case UOP_SaveXMM128Big: + OS << " reg=XMM" << static_cast(UC[0].getOpInfo()) + << format(", offset=0x%X", getLargeSlotValue(UC)); + break; + + case UOP_PushMachFrame: + OS << " errcode=" << (UC[0].getOpInfo() == 0 ? "no" : "yes"); + break; + } + + OS << "\n"; +} + +void Dumper::printUnwindInfo(const Context &Ctx, const coff_section *Section, + off_t Offset, const UnwindInfo &UI) { + DictScope UIS(SW, "UnwindInfo"); + SW.printNumber("Version", UI.getVersion()); + SW.printFlags("Flags", UI.getFlags(), makeArrayRef(UnwindFlags)); + SW.printNumber("PrologSize", UI.PrologSize); + if (UI.getFrameRegister()) { + SW.printEnum("FrameRegister", UI.getFrameRegister(), + makeArrayRef(UnwindOpInfo)); + SW.printHex("FrameOffset", UI.getFrameOffset()); + } else { + SW.printString("FrameRegister", StringRef("-")); + SW.printString("FrameOffset", StringRef("-")); + } + + SW.printNumber("UnwindCodeCount", UI.NumCodes); + { + ListScope UCS(SW, "UnwindCodes"); + ArrayRef UC(&UI.UnwindCodes[0], UI.NumCodes); + for (const UnwindCode *UCI = UC.begin(), *UCE = UC.end(); UCI < UCE; ++UCI) { + unsigned UsedSlots = getNumUsedSlots(*UCI); + if (UsedSlots > UC.size()) { + errs() << "corrupt unwind data"; + return; + } + + printUnwindCode(UI, ArrayRef(UCI, UCE)); + UCI = UCI + UsedSlots - 1; + } + } + + uint64_t LSDAOffset = Offset + getOffsetOfLSDA(UI); + if (UI.getFlags() & (UNW_ExceptionHandler | UNW_TerminateHandler)) { + SW.printString("Handler", + formatSymbol(Ctx, Section, LSDAOffset, + UI.getLanguageSpecificHandlerOffset())); + } else if (UI.getFlags() & UNW_ChainInfo) { + if (const RuntimeFunction *Chained = UI.getChainedFunctionEntry()) { + DictScope CS(SW, "Chained"); + printRuntimeFunctionEntry(Ctx, Section, LSDAOffset, *Chained); + } + } +} + +void Dumper::printRuntimeFunction(const Context &Ctx, + const coff_section *Section, + uint64_t SectionOffset, + const RuntimeFunction &RF) { + DictScope RFS(SW, "RuntimeFunction"); + printRuntimeFunctionEntry(Ctx, Section, SectionOffset, RF); + + const coff_section *XData; + uint64_t Offset; + if (error(resolveRelocation(Ctx, Section, SectionOffset + 8, XData, Offset))) + return; + + ArrayRef Contents; + if (error(Ctx.COFF.getSectionContents(XData, Contents)) || Contents.empty()) + return; + + Offset = Offset + RF.UnwindInfoOffset; + if (Offset > Contents.size()) + return; + + const auto UI = reinterpret_cast(Contents.data() + Offset); + printUnwindInfo(Ctx, XData, Offset, *UI); +} + +void Dumper::printData(const Context &Ctx) { + for (const auto &Section : Ctx.COFF.sections()) { + StringRef Name; + if (error(Section.getName(Name))) + continue; + + if (Name != ".pdata" && !Name.startswith(".pdata$")) + continue; + + const coff_section *PData = Ctx.COFF.getCOFFSection(Section); + ArrayRef Contents; + if (error(Ctx.COFF.getSectionContents(PData, Contents)) || Contents.empty()) + continue; + + const RuntimeFunction *Entries = + reinterpret_cast(Contents.data()); + const size_t Count = Contents.size() / sizeof(RuntimeFunction); + ArrayRef RuntimeFunctions(Entries, Count); + + size_t Index = 0; + for (const auto &RF : RuntimeFunctions) { + printRuntimeFunction(Ctx, Ctx.COFF.getCOFFSection(Section), + Index * sizeof(RuntimeFunction), RF); + ++Index; + } + } +} +} +} + diff --git a/tools/llvm-readobj/Win64EHDumper.h b/tools/llvm-readobj/Win64EHDumper.h new file mode 100644 index 000000000000..d0c129c68b80 --- /dev/null +++ b/tools/llvm-readobj/Win64EHDumper.h @@ -0,0 +1,62 @@ +//===- Win64EHDumper.h - Win64 EH Printing ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_READOBJ_WIN64EHPRINTER_H +#define LLVM_TOOLS_READOBJ_WIN64EHPRINTER_H + +#include "StreamWriter.h" +#include "llvm/Support/Win64EH.h" + +#include + +namespace llvm { +namespace object { +class COFFObjectFile; +class SymbolRef; +struct coff_section; +} + +namespace Win64EH { +class Dumper { + StreamWriter &SW; + raw_ostream &OS; + +public: + typedef std::function SymbolResolver; + + struct Context { + const object::COFFObjectFile &COFF; + SymbolResolver ResolveSymbol; + + Context(const object::COFFObjectFile &COFF, SymbolResolver Resolver) + : COFF(COFF), ResolveSymbol(Resolver) {} + }; + +private: + void printRuntimeFunctionEntry(const Context &Ctx, + const object::coff_section *Section, + uint64_t SectionOffset, + const RuntimeFunction &RF); + void printUnwindCode(const UnwindInfo& UI, ArrayRef UC); + void printUnwindInfo(const Context &Ctx, const object::coff_section *Section, + off_t Offset, const UnwindInfo &UI); + void printRuntimeFunction(const Context &Ctx, + const object::coff_section *Section, + uint64_t SectionOffset, const RuntimeFunction &RF); + +public: + Dumper(StreamWriter &SW) : SW(SW), OS(SW.getOStream()) {} + + void printData(const Context &Ctx); +}; +} +} + +#endif From d29bdc72bbefc938a23b51ac4282548d6e36d317 Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Sun, 25 May 2014 21:37:59 +0000 Subject: [PATCH 149/906] tools: avoid use of std::function Remove the use of the std::function and replace the capturing lambda with a non-capturing one, opting to pass the user data down to the context. This is needed as std::function is not yet available on all hosted platforms (it requires RTTI, which breaks on Windows). Thanks to Nico Rieck for pointing this out! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209607 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-readobj/COFFDumper.cpp | 9 +++++---- tools/llvm-readobj/Win64EHDumper.cpp | 5 +++-- tools/llvm-readobj/Win64EHDumper.h | 12 ++++++------ 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp index 7b9595f84c5b..91f2a57dccfc 100644 --- a/tools/llvm-readobj/COFFDumper.cpp +++ b/tools/llvm-readobj/COFFDumper.cpp @@ -849,11 +849,12 @@ void COFFDumper::printUnwindInfo() { case COFF::IMAGE_FILE_MACHINE_AMD64: { Win64EH::Dumper Dumper(W); Win64EH::Dumper::SymbolResolver Resolver = - [this](const object::coff_section *Section, uint64_t Offset, - SymbolRef &Symbol) -> error_code { - return this->resolveSymbol(Section, Offset, Symbol); + [](const object::coff_section *Section, uint64_t Offset, + SymbolRef &Symbol, void *user_data) -> error_code { + COFFDumper *Dumper = reinterpret_cast(user_data); + return Dumper->resolveSymbol(Section, Offset, Symbol); }; - Win64EH::Dumper::Context Ctx(*Obj, Resolver); + Win64EH::Dumper::Context Ctx(*Obj, Resolver, this); Dumper.printData(Ctx); break; } diff --git a/tools/llvm-readobj/Win64EHDumper.cpp b/tools/llvm-readobj/Win64EHDumper.cpp index 449df001cc97..c64d362469bf 100644 --- a/tools/llvm-readobj/Win64EHDumper.cpp +++ b/tools/llvm-readobj/Win64EHDumper.cpp @@ -120,7 +120,8 @@ static std::string formatSymbol(const Dumper::Context &Ctx, StringRef Name; SymbolRef Symbol; - if (Ctx.ResolveSymbol(Section, Offset, Symbol) || Symbol.getName(Name)) { + if (Ctx.ResolveSymbol(Section, Offset, Symbol, Ctx.UserData) || + Symbol.getName(Name)) { OS << format(" (0x%" PRIX64 ")", Offset); return OS.str(); } @@ -139,7 +140,7 @@ static error_code resolveRelocation(const Dumper::Context &Ctx, const coff_section *&ResolvedSection, uint64_t &ResolvedAddress) { SymbolRef Symbol; - if (error_code EC = Ctx.ResolveSymbol(Section, Offset, Symbol)) + if (error_code EC = Ctx.ResolveSymbol(Section, Offset, Symbol, Ctx.UserData)) return EC; if (error_code EC = Symbol.getAddress(ResolvedAddress)) diff --git a/tools/llvm-readobj/Win64EHDumper.h b/tools/llvm-readobj/Win64EHDumper.h index d0c129c68b80..2eac81048b4f 100644 --- a/tools/llvm-readobj/Win64EHDumper.h +++ b/tools/llvm-readobj/Win64EHDumper.h @@ -13,8 +13,6 @@ #include "StreamWriter.h" #include "llvm/Support/Win64EH.h" -#include - namespace llvm { namespace object { class COFFObjectFile; @@ -28,15 +26,17 @@ class Dumper { raw_ostream &OS; public: - typedef std::function SymbolResolver; + typedef error_code (*SymbolResolver)(const object::coff_section *, uint64_t, + object::SymbolRef &, void *); struct Context { const object::COFFObjectFile &COFF; SymbolResolver ResolveSymbol; + void *UserData; - Context(const object::COFFObjectFile &COFF, SymbolResolver Resolver) - : COFF(COFF), ResolveSymbol(Resolver) {} + Context(const object::COFFObjectFile &COFF, SymbolResolver Resolver, + void *UserData) + : COFF(COFF), ResolveSymbol(Resolver), UserData(UserData) {} }; private: From a9b07422765f974ed05e1f21d937f7da5e18f4a1 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Mon, 26 May 2014 00:25:09 +0000 Subject: [PATCH 150/906] Trailing whitespace. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209608 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Transforms/Scalar.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index 6aea643c42aa..b02e8f740149 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -162,7 +162,7 @@ Pass *createLoopRotatePass(); // LoopIdiom - This pass recognizes and replaces idioms in loops. // Pass *createLoopIdiomPass(); - + //===----------------------------------------------------------------------===// // // PromoteMemoryToRegister - This pass is used to promote memory references to @@ -201,7 +201,7 @@ FunctionPass *createReassociatePass(); // preds always go to some succ. // FunctionPass *createJumpThreadingPass(); - + //===----------------------------------------------------------------------===// // // CFGSimplification - Merge basic blocks, eliminate unreachable blocks, @@ -284,10 +284,10 @@ extern char &LCSSAID; // tree. // FunctionPass *createEarlyCSEPass(); - + //===----------------------------------------------------------------------===// // -// GVN - This pass performs global value numbering and redundant load +// GVN - This pass performs global value numbering and redundant load // elimination cotemporaneously. // FunctionPass *createGVNPass(bool NoLoads = false); @@ -305,7 +305,7 @@ FunctionPass *createMemCpyOptPass(); // can prove are dead. // Pass *createLoopDeletionPass(); - + //===----------------------------------------------------------------------===// // // ConstantHoisting - This pass prepares a function for expensive constants. @@ -318,7 +318,7 @@ FunctionPass *createConstantHoistingPass(); // FunctionPass *createInstructionNamerPass(); extern char &InstructionNamerID; - + //===----------------------------------------------------------------------===// // // Sink - Code Sinking From a1b1165f305a88515671c8b1cd40c06a65a95f74 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Mon, 26 May 2014 00:25:26 +0000 Subject: [PATCH 151/906] Reformat linefeeds. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209609 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Transforms/IPO.h | 3 --- include/llvm/Transforms/IPO/PassManagerBuilder.h | 3 +-- include/llvm/Transforms/ObjCARC.h | 1 - include/llvm/Transforms/Scalar.h | 2 -- 4 files changed, 1 insertion(+), 8 deletions(-) diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h index 334fb1cc4d3a..ce1a7d6a5230 100644 --- a/include/llvm/Transforms/IPO.h +++ b/include/llvm/Transforms/IPO.h @@ -58,21 +58,18 @@ ModulePass *createStripDeadDebugInfoPass(); /// ModulePass *createConstantMergePass(); - //===----------------------------------------------------------------------===// /// createGlobalOptimizerPass - This function returns a new pass that optimizes /// non-address taken internal globals. /// ModulePass *createGlobalOptimizerPass(); - //===----------------------------------------------------------------------===// /// createGlobalDCEPass - This transform is designed to eliminate unreachable /// internal globals (functions or global variables) /// ModulePass *createGlobalDCEPass(); - //===----------------------------------------------------------------------===// /// createGVExtractionPass - If deleteFn is true, this pass deletes /// the specified global values. Otherwise, it deletes as much of the module as diff --git a/include/llvm/Transforms/IPO/PassManagerBuilder.h b/include/llvm/Transforms/IPO/PassManagerBuilder.h index 4c6f3aa1c454..023de0863e3b 100644 --- a/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -55,7 +55,6 @@ using legacy::FunctionPassManager; /// ... class PassManagerBuilder { public: - /// Extensions are passed the builder itself (so they can see how it is /// configured) as well as the pass manager to add stuff to. typedef void (*ExtensionFn)(const PassManagerBuilder &Builder, @@ -135,8 +134,8 @@ class PassManagerBuilder { private: void addExtensionsToPM(ExtensionPointTy ETy, PassManagerBase &PM) const; void addInitialAliasAnalysisPasses(PassManagerBase &PM) const; -public: +public: /// populateFunctionPassManager - This fills in the function pass manager, /// which is expected to be run on each function immediately as it is /// generated. The idea is to reduce the size of the IR in memory. diff --git a/include/llvm/Transforms/ObjCARC.h b/include/llvm/Transforms/ObjCARC.h index b3c19c077eab..1897adc2ffbf 100644 --- a/include/llvm/Transforms/ObjCARC.h +++ b/include/llvm/Transforms/ObjCARC.h @@ -46,4 +46,3 @@ Pass *createObjCARCOptPass(); } // End llvm namespace #endif - diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index b02e8f740149..68d61bdbdb7a 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -344,14 +344,12 @@ Pass *createCorrelatedValuePropagationPass(); FunctionPass *createInstructionSimplifierPass(); extern char &InstructionSimplifierID; - //===----------------------------------------------------------------------===// // // LowerExpectIntrinsics - Removes llvm.expect intrinsics and creates // "block_weights" metadata. FunctionPass *createLowerExpectIntrinsicPass(); - //===----------------------------------------------------------------------===// // // PartiallyInlineLibCalls - Tries to inline the fast path of library From 524b8836c218095ad370f105e89b24588a89266d Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Mon, 26 May 2014 04:08:51 +0000 Subject: [PATCH 152/906] Just check the entire string. Thanks to David Blaikie for the suggestion. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209610 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/dllexport-x86_64.ll | 60 ++++++++++++------------ test/CodeGen/X86/dllexport.ll | 68 ++++++++++++++-------------- 2 files changed, 64 insertions(+), 64 deletions(-) diff --git a/test/CodeGen/X86/dllexport-x86_64.ll b/test/CodeGen/X86/dllexport-x86_64.ll index 1347664c71f1..f4dec4ff591b 100644 --- a/test/CodeGen/X86/dllexport-x86_64.ll +++ b/test/CodeGen/X86/dllexport-x86_64.ll @@ -76,33 +76,33 @@ define weak_odr dllexport void @weak1() { @blob_alias = dllexport alias i32 (), [6 x i8]* @blob ; CHECK: .section .drectve -; WIN32: /EXPORT:Var1,DATA" -; WIN32: /EXPORT:Var2,DATA" -; WIN32: /EXPORT:Var3,DATA" -; WIN32: /EXPORT:WeakVar1,DATA" -; WIN32: /EXPORT:WeakVar2,DATA" -; WIN32: /EXPORT:f1" -; WIN32: /EXPORT:f2" -; WIN32: /EXPORT:lnk1" -; WIN32: /EXPORT:lnk2" -; WIN32: /EXPORT:weak1" -; WIN32: /EXPORT:alias" -; WIN32: /EXPORT:alias2" -; WIN32: /EXPORT:alias3" -; WIN32: /EXPORT:weak_alias" -; WIN32: /EXPORT:blob_alias" -; MINGW: -export:Var1,data" -; MINGW: -export:Var2,data" -; MINGW: -export:Var3,data" -; MINGW: -export:WeakVar1,data" -; MINGW: -export:WeakVar2,data" -; MINGW: -export:f1" -; MINGW: -export:f2" -; MINGW: -export:lnk1" -; MINGW: -export:lnk2" -; MINGW: -export:weak1" -; MINGW: -export:alias" -; MINGW: -export:alias2" -; MINGW: -export:alias3" -; MINGW: -export:weak_alias" -; MINGW: -export:blob_alias" +; WIN32: " /EXPORT:Var1,DATA" +; WIN32: " /EXPORT:Var2,DATA" +; WIN32: " /EXPORT:Var3,DATA" +; WIN32: " /EXPORT:WeakVar1,DATA" +; WIN32: " /EXPORT:WeakVar2,DATA" +; WIN32: " /EXPORT:f1" +; WIN32: " /EXPORT:f2" +; WIN32: " /EXPORT:lnk1" +; WIN32: " /EXPORT:lnk2" +; WIN32: " /EXPORT:weak1" +; WIN32: " /EXPORT:alias" +; WIN32: " /EXPORT:alias2" +; WIN32: " /EXPORT:alias3" +; WIN32: " /EXPORT:weak_alias" +; WIN32: " /EXPORT:blob_alias" +; MINGW: " -export:Var1,data" +; MINGW: " -export:Var2,data" +; MINGW: " -export:Var3,data" +; MINGW: " -export:WeakVar1,data" +; MINGW: " -export:WeakVar2,data" +; MINGW: " -export:f1" +; MINGW: " -export:f2" +; MINGW: " -export:lnk1" +; MINGW: " -export:lnk2" +; MINGW: " -export:weak1" +; MINGW: " -export:alias" +; MINGW: " -export:alias2" +; MINGW: " -export:alias3" +; MINGW: " -export:weak_alias" +; MINGW: " -export:blob_alias" diff --git a/test/CodeGen/X86/dllexport.ll b/test/CodeGen/X86/dllexport.ll index 0805fe2efab9..e2c3f131ee06 100644 --- a/test/CodeGen/X86/dllexport.ll +++ b/test/CodeGen/X86/dllexport.ll @@ -93,38 +93,38 @@ define weak_odr dllexport void @weak1() { ; CHECK: .section .drectve -; CHECK-CL: /EXPORT:_Var1,DATA" -; CHECK-CL: /EXPORT:_Var2,DATA" -; CHECK-CL: /EXPORT:_Var3,DATA" -; CHECK-CL: /EXPORT:_WeakVar1,DATA" -; CHECK-CL: /EXPORT:_WeakVar2,DATA" -; CHECK-CL: /EXPORT:_f1" -; CHECK-CL: /EXPORT:_f2" -; CHECK-CL: /EXPORT:_stdfun@0" -; CHECK-CL: /EXPORT:@fastfun@0" -; CHECK-CL: /EXPORT:_thisfun" -; CHECK-CL: /EXPORT:_lnk1" -; CHECK-CL: /EXPORT:_lnk2" -; CHECK-CL: /EXPORT:_weak1" -; CHECK-CL: /EXPORT:_alias" -; CHECK-CL: /EXPORT:_alias2" -; CHECK-CL: /EXPORT:_alias3" -; CHECK-CL: /EXPORT:_weak_alias" -; CHECK-GCC: -export:Var1,data" -; CHECK-GCC: -export:Var2,data" -; CHECK-GCC: -export:Var3,data" -; CHECK-GCC: -export:WeakVar1,data" -; CHECK-GCC: -export:WeakVar2,data" -; CHECK-GCC: -export:f1" -; CHECK-GCC: -export:f2" -; CHECK-GCC: -export:stdfun@0" -; CHECK-GCC: -export:@fastfun@0" -; CHECK-GCC: -export:thisfun" -; CHECK-GCC: -export:lnk1" -; CHECK-GCC: -export:lnk2" -; CHECK-GCC: -export:weak1" -; CHECK-GCC: -export:alias" -; CHECK-GCC: -export:alias2" -; CHECK-GCC: -export:alias3" -; CHECK-GCC: -export:weak_alias" +; CHECK-CL: " /EXPORT:_Var1,DATA" +; CHECK-CL: " /EXPORT:_Var2,DATA" +; CHECK-CL: " /EXPORT:_Var3,DATA" +; CHECK-CL: " /EXPORT:_WeakVar1,DATA" +; CHECK-CL: " /EXPORT:_WeakVar2,DATA" +; CHECK-CL: " /EXPORT:_f1" +; CHECK-CL: " /EXPORT:_f2" +; CHECK-CL: " /EXPORT:_stdfun@0" +; CHECK-CL: " /EXPORT:@fastfun@0" +; CHECK-CL: " /EXPORT:_thisfun" +; CHECK-CL: " /EXPORT:_lnk1" +; CHECK-CL: " /EXPORT:_lnk2" +; CHECK-CL: " /EXPORT:_weak1" +; CHECK-CL: " /EXPORT:_alias" +; CHECK-CL: " /EXPORT:_alias2" +; CHECK-CL: " /EXPORT:_alias3" +; CHECK-CL: " /EXPORT:_weak_alias" +; CHECK-GCC: " -export:Var1,data" +; CHECK-GCC: " -export:Var2,data" +; CHECK-GCC: " -export:Var3,data" +; CHECK-GCC: " -export:WeakVar1,data" +; CHECK-GCC: " -export:WeakVar2,data" +; CHECK-GCC: " -export:f1" +; CHECK-GCC: " -export:f2" +; CHECK-GCC: " -export:stdfun@0" +; CHECK-GCC: " -export:@fastfun@0" +; CHECK-GCC: " -export:thisfun" +; CHECK-GCC: " -export:lnk1" +; CHECK-GCC: " -export:lnk2" +; CHECK-GCC: " -export:weak1" +; CHECK-GCC: " -export:alias" +; CHECK-GCC: " -export:alias2" +; CHECK-GCC: " -export:alias3" +; CHECK-GCC: " -export:weak_alias" From 041b4abc735b5a32e1ff2da376a87295d4d25410 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Mon, 26 May 2014 05:32:21 +0000 Subject: [PATCH 153/906] DwarfUnit: Remove some misleading no-op code introduced in r204162. Post commit review feedback from Manman called this out, but it looks like it slipped through the cracks. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209611 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 8382990a6c38..8d8c6181e1dd 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1377,10 +1377,6 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) { DIScope Context = resolve(SP.getContext()); DIE *ContextDIE = getOrCreateContextDIE(Context); - // Unique declarations based on the ODR, where applicable. - SP = DISubprogram(DD->resolve(SP.getRef())); - assert(SP.Verify()); - if (DIE *SPDie = getDIE(SP)) return SPDie; From bcc96923e014dbd496ae29f67629c62f45ec653c Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Mon, 26 May 2014 06:44:52 +0000 Subject: [PATCH 154/906] DebugInfo: Test linkonce-odr functions under LTO. This was previously regressed/broken by r192749 (reverted due to this issue in r192938) and I was about to break it again by accident with some more invasive changes that deal with the subprogram lists. So to avoid that and further issues - here's a test. It's a pretty basic test - in both r192749 and my impending case, this test would crash, but checking the basics (that we put a subprogram in just one of the two CUs) seems like a good start. We still get this wrong in weird ways if the linkonce-odr function happens to not be identical in the metadata (because it's defined in two different files (hence the # line directives in this test), etc) even though it meets the language requirements (identical token stream) for such a thing. That results in two subprogram DIEs, but only one of them gets the parameter and high/low pc information, etc. We probably need to use the DIRef infrastructure to deduplicate functions as we do types to address this issue - or perhaps teach the BC linker to remove the duplicate entries in subprogram lists? git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209614 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/DebugInfo/cross-cu-linkonce.ll | 74 +++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 test/DebugInfo/cross-cu-linkonce.ll diff --git a/test/DebugInfo/cross-cu-linkonce.ll b/test/DebugInfo/cross-cu-linkonce.ll new file mode 100644 index 000000000000..16a50122dcf8 --- /dev/null +++ b/test/DebugInfo/cross-cu-linkonce.ll @@ -0,0 +1,74 @@ +; REQUIRES: object-emission + +; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s + +; Built from source: +; $ clang++ a.cpp b.cpp -g -c -emit-llvm +; $ llvm-link a.bc b.bc -o ab.bc +; $ opt -inline ab.bc -o ab-opt.bc +; $ cat a.cpp +; # 1 "func.h" +; inline int func(int i) { +; return i * 2; +; } +; int (*x)(int) = &func; +; $ cat b.cpp +; # 1 "func.h" +; inline int func(int i) { +; return i * 2; +; } +; int (*y)(int) = &func; + +; CHECK: DW_TAG_compile_unit +; CHECK: DW_TAG_subprogram +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_name {{.*}} "func" +; CHECK: DW_TAG_compile_unit +; CHECK-NOT: DW_TAG_subprogram + +@x = global i32 (i32)* @_Z4funci, align 8 +@y = global i32 (i32)* @_Z4funci, align 8 + +; Function Attrs: inlinehint nounwind uwtable +define linkonce_odr i32 @_Z4funci(i32 %i) #0 { + %1 = alloca i32, align 4 + store i32 %i, i32* %1, align 4 + call void @llvm.dbg.declare(metadata !{i32* %1}, metadata !20), !dbg !21 + %2 = load i32* %1, align 4, !dbg !22 + %3 = mul nsw i32 %2, 2, !dbg !22 + ret i32 %3, !dbg !22 +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata) #1 + +attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } + +!llvm.dbg.cu = !{!0, !13} +!llvm.module.flags = !{!17, !18} +!llvm.ident = !{!19, !19} + +!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !10, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/a.cpp] [DW_LANG_C_plus_plus] +!1 = metadata !{metadata !"a.cpp", metadata !"/tmp/dbginfo"} +!2 = metadata !{} +!3 = metadata !{metadata !4} +!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"func", metadata !"func", metadata !"_Z4funci", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z4funci, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [func] +!5 = metadata !{metadata !"func.h", metadata !"/tmp/dbginfo"} +!6 = metadata !{i32 786473, metadata !5} ; [ DW_TAG_file_type ] [/tmp/dbginfo/func.h] +!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!8 = metadata !{metadata !9, metadata !9} +!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!10 = metadata !{metadata !11} +!11 = metadata !{i32 786484, i32 0, null, metadata !"x", metadata !"x", metadata !"", metadata !6, i32 4, metadata !12, i32 0, i32 1, i32 (i32)** @x, null} ; [ DW_TAG_variable ] [x] [line 4] [def] +!12 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ] +!13 = metadata !{i32 786449, metadata !14, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !15, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/b.cpp] [DW_LANG_C_plus_plus] +!14 = metadata !{metadata !"b.cpp", metadata !"/tmp/dbginfo"} +!15 = metadata !{metadata !16} +!16 = metadata !{i32 786484, i32 0, null, metadata !"y", metadata !"y", metadata !"", metadata !6, i32 4, metadata !12, i32 0, i32 1, i32 (i32)** @y, null} ; [ DW_TAG_variable ] [y] [line 4] [def] +!17 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} +!18 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} +!19 = metadata !{metadata !"clang version 3.5.0 "} +!20 = metadata !{i32 786689, metadata !4, metadata !"i", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [i] [line 1] +!21 = metadata !{i32 1, i32 0, metadata !4, null} +!22 = metadata !{i32 2, i32 0, metadata !4, null} From 866ed7f63f9edf9c20763a9abc115fb23372f42f Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Mon, 26 May 2014 08:58:51 +0000 Subject: [PATCH 155/906] Make the LoopRotate pass's maximum header size configurable both programmatically and via the command line, mirroring similar functionality in LoopUnroll. In situations where clients used custom unrolling thresholds, their intent could previously be foiled by LoopRotate having a hardcoded threshold. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209617 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Transforms/Scalar.h | 2 +- lib/Transforms/Scalar/LoopRotation.cpp | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index 68d61bdbdb7a..cf1d65544075 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -155,7 +155,7 @@ Pass *createLoopRerollPass(); // // LoopRotate - This pass is a simple loop rotating pass. // -Pass *createLoopRotatePass(); +Pass *createLoopRotatePass(int MaxHeaderSize = -1); //===----------------------------------------------------------------------===// // diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 5c747e1ac518..2ce58314f8ef 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -32,7 +33,9 @@ using namespace llvm; #define DEBUG_TYPE "loop-rotate" -#define MAX_HEADER_SIZE 16 +static cl::opt +DefaultRotationThreshold("rotation-max-header-size", cl::init(16), cl::Hidden, + cl::desc("The default maximum header size for automatic loop rotation")); STATISTIC(NumRotated, "Number of loops rotated"); namespace { @@ -40,8 +43,12 @@ namespace { class LoopRotate : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopRotate() : LoopPass(ID) { + LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) { initializeLoopRotatePass(*PassRegistry::getPassRegistry()); + if (SpecifiedMaxHeaderSize == -1) + MaxHeaderSize = DefaultRotationThreshold; + else + MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize); } // LCSSA form makes instruction renaming easier. @@ -62,6 +69,7 @@ namespace { bool rotateLoop(Loop *L, bool SimplifiedLatch); private: + unsigned MaxHeaderSize; LoopInfo *LI; const TargetTransformInfo *TTI; }; @@ -75,7 +83,9 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false) -Pass *llvm::createLoopRotatePass() { return new LoopRotate(); } +Pass *llvm::createLoopRotatePass(int MaxHeaderSize) { + return new LoopRotate(MaxHeaderSize); +} /// Rotate Loop L as many times as possible. Return true if /// the loop is rotated at least once. @@ -320,7 +330,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { << " instructions: "; L->dump()); return false; } - if (Metrics.NumInsts > MAX_HEADER_SIZE) + if (Metrics.NumInsts > MaxHeaderSize) return false; } From 47e3b437684acf6a025ba45e5b5da28712befe73 Mon Sep 17 00:00:00 2001 From: Tilmann Scheller Date: Mon, 26 May 2014 09:37:19 +0000 Subject: [PATCH 156/906] [AArch64] Add a regression test for the load store optimizer. We have a couple of regression tests for load/store pairing, but (to my knowledge) there are no regression tests for the load/store + add/sub folding. As a first step towards increased test coverage of this area, this commit adds a test for one instance of a load + add to pre-indexed load transformation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209618 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/AArch64/ldst-opt.ll | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 test/CodeGen/AArch64/ldst-opt.ll diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll new file mode 100644 index 000000000000..79c18cacb8dd --- /dev/null +++ b/test/CodeGen/AArch64/ldst-opt.ll @@ -0,0 +1,31 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s + +; This file contains tests for the AArch64 load/store optimizer. + +%struct.A = type { %struct.B, %struct.C } +%struct.B = type { i8*, i8*, i8*, i8* } +%struct.C = type { i32, i32 } + +; Check the following transform: +; +; ldr w1, [x0, #32] +; ... +; add x0, x0, #32 +; -> +; ldr w1, [x0, #32]! + +define void @foo(%struct.A* %ptr) nounwind { +; CHECK-LABEL: foo +; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #32]! +entry: + %a = getelementptr inbounds %struct.A* %ptr, i64 0, i32 1, i32 0 + %add = load i32* %a, align 4 + br label %bar +bar: + %c = getelementptr inbounds %struct.A* %ptr, i64 0, i32 1 + tail call void @bar(%struct.C* %c, i32 %add) + ret void +} + +declare void @bar(%struct.C*, i32) + From 852c0cc64fdd9f02ab87bafd9b3fb5fb9e439106 Mon Sep 17 00:00:00 2001 From: Tilmann Scheller Date: Mon, 26 May 2014 09:40:40 +0000 Subject: [PATCH 157/906] Remove accidentally committed whitespace. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209619 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/AArch64/ldst-opt.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll index 79c18cacb8dd..b2855f38a639 100644 --- a/test/CodeGen/AArch64/ldst-opt.ll +++ b/test/CodeGen/AArch64/ldst-opt.ll @@ -13,7 +13,7 @@ ; add x0, x0, #32 ; -> ; ldr w1, [x0, #32]! - + define void @foo(%struct.A* %ptr) nounwind { ; CHECK-LABEL: foo ; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #32]! @@ -23,7 +23,7 @@ entry: br label %bar bar: %c = getelementptr inbounds %struct.A* %ptr, i64 0, i32 1 - tail call void @bar(%struct.C* %c, i32 %add) + tail call void @bar(%struct.C* %c, i32 %add) ret void } From 29682f428e7865da85d974050a75baf4d76cc7e2 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Mon, 26 May 2014 11:25:33 +0000 Subject: [PATCH 158/906] AArch64: remove empty ARM64 directories from svn. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209621 91177308-0d34-0410-b5e6-96231b3b80d8 From 3d150e08aed6fc424236312c7a5f724dde2c7d48 Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Mon, 26 May 2014 11:57:16 +0000 Subject: [PATCH 159/906] [asan] decrease asan-instrumentation-with-call-threshold from 10000 to 7000, see PR17409 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209623 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/AddressSanitizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 25acd2818935..0617215a7c09 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -156,7 +156,7 @@ static cl::opt ClInstrumentationWithCallsThreshold( cl::desc("If the function being instrumented contains more than " "this number of memory accesses, use callbacks instead of " "inline checks (-1 means never use callbacks)."), - cl::Hidden, cl::init(10000)); + cl::Hidden, cl::init(7000)); static cl::opt ClMemoryAccessCallbackPrefix( "asan-memory-access-callback-prefix", cl::desc("Prefix for memory access callbacks"), cl::Hidden, From 3390e6c4a83b8df00a4e3d22972ba92fce4600ca Mon Sep 17 00:00:00 2001 From: Tilmann Scheller Date: Mon, 26 May 2014 12:15:51 +0000 Subject: [PATCH 160/906] [AArch64] Add more regression tests for the load/store optimization pass. Cover the following cases: ldr X, [x0, #32] ... add x0, x0, #32 -> ldr X, [x0, #32]! with X being either w1, x1, s0, d0 or q0. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209624 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/AArch64/ldst-opt.ll | 92 ++++++++++++++++++++++++++++---- 1 file changed, 81 insertions(+), 11 deletions(-) diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll index b2855f38a639..9eeb938cec71 100644 --- a/test/CodeGen/AArch64/ldst-opt.ll +++ b/test/CodeGen/AArch64/ldst-opt.ll @@ -2,30 +2,100 @@ ; This file contains tests for the AArch64 load/store optimizer. -%struct.A = type { %struct.B, %struct.C } -%struct.B = type { i8*, i8*, i8*, i8* } -%struct.C = type { i32, i32 } +%padding = type { i8*, i8*, i8*, i8* } +%s.word = type { i32, i32 } +%s.doubleword = type { i64, i32 } +%s.quadword = type { fp128, i32 } +%s.float = type { float, i32 } +%s.double = type { double, i32 } +%struct.word = type { %padding, %s.word } +%struct.doubleword = type { %padding, %s.doubleword } +%struct.quadword = type { %padding, %s.quadword } +%struct.float = type { %padding, %s.float } +%struct.double = type { %padding, %s.double } ; Check the following transform: ; -; ldr w1, [x0, #32] +; ldr X, [x0, #32] ; ... ; add x0, x0, #32 ; -> -; ldr w1, [x0, #32]! +; ldr X, [x0, #32]! +; +; with X being either w1, x1, s0, d0 or q0. + +declare void @bar_word(%s.word*, i32) -define void @foo(%struct.A* %ptr) nounwind { -; CHECK-LABEL: foo +define void @load-pre-indexed-word(%struct.word* %ptr) nounwind { +; CHECK-LABEL: load-pre-indexed-word ; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #32]! entry: - %a = getelementptr inbounds %struct.A* %ptr, i64 0, i32 1, i32 0 + %a = getelementptr inbounds %struct.word* %ptr, i64 0, i32 1, i32 0 %add = load i32* %a, align 4 br label %bar bar: - %c = getelementptr inbounds %struct.A* %ptr, i64 0, i32 1 - tail call void @bar(%struct.C* %c, i32 %add) + %c = getelementptr inbounds %struct.word* %ptr, i64 0, i32 1 + tail call void @bar_word(%s.word* %c, i32 %add) ret void } -declare void @bar(%struct.C*, i32) +declare void @bar_doubleword(%s.doubleword*, i64) + +define void @load-pre-indexed-doubleword(%struct.doubleword* %ptr) nounwind { +; CHECK-LABEL: load-pre-indexed-doubleword +; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #32]! +entry: + %a = getelementptr inbounds %struct.doubleword* %ptr, i64 0, i32 1, i32 0 + %add = load i64* %a, align 4 + br label %bar +bar: + %c = getelementptr inbounds %struct.doubleword* %ptr, i64 0, i32 1 + tail call void @bar_doubleword(%s.doubleword* %c, i64 %add) + ret void +} + +declare void @bar_quadword(%s.quadword*, fp128) + +define void @load-pre-indexed-quadword(%struct.quadword* %ptr) nounwind { +; CHECK-LABEL: load-pre-indexed-quadword +; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #32]! +entry: + %a = getelementptr inbounds %struct.quadword* %ptr, i64 0, i32 1, i32 0 + %add = load fp128* %a, align 4 + br label %bar +bar: + %c = getelementptr inbounds %struct.quadword* %ptr, i64 0, i32 1 + tail call void @bar_quadword(%s.quadword* %c, fp128 %add) + ret void +} + +declare void @bar_float(%s.float*, float) + +define void @load-pre-indexed-float(%struct.float* %ptr) nounwind { +; CHECK-LABEL: load-pre-indexed-float +; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #32]! +entry: + %a = getelementptr inbounds %struct.float* %ptr, i64 0, i32 1, i32 0 + %add = load float* %a, align 4 + br label %bar +bar: + %c = getelementptr inbounds %struct.float* %ptr, i64 0, i32 1 + tail call void @bar_float(%s.float* %c, float %add) + ret void +} + +declare void @bar_double(%s.double*, double) + +define void @load-pre-indexed-double(%struct.double* %ptr) nounwind { +; CHECK-LABEL: load-pre-indexed-double +; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #32]! +entry: + %a = getelementptr inbounds %struct.double* %ptr, i64 0, i32 1, i32 0 + %add = load double* %a, align 4 + br label %bar +bar: + %c = getelementptr inbounds %struct.double* %ptr, i64 0, i32 1 + tail call void @bar_double(%s.double* %c, double %add) + ret void +} From 7aac65de51539ff21f5a4bfc8cb4d1dfda20cabe Mon Sep 17 00:00:00 2001 From: Tilmann Scheller Date: Mon, 26 May 2014 13:36:47 +0000 Subject: [PATCH 161/906] [AArch64] Add store + add folding regression tests for the load/store optimization pass. Add tests for the following transform: str X, [x0, #32] ... add x0, x0, #32 -> str X, [x0, #32]! with X being either w1, x1, s0, d0 or q0. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209627 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/AArch64/ldst-opt.ll | 68 +++++++++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 2 deletions(-) diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll index 9eeb938cec71..103c23c737aa 100644 --- a/test/CodeGen/AArch64/ldst-opt.ll +++ b/test/CodeGen/AArch64/ldst-opt.ll @@ -16,11 +16,11 @@ ; Check the following transform: ; -; ldr X, [x0, #32] +; (ldr|str) X, [x0, #32] ; ... ; add x0, x0, #32 ; -> -; ldr X, [x0, #32]! +; (ldr|str) X, [x0, #32]! ; ; with X being either w1, x1, s0, d0 or q0. @@ -39,6 +39,19 @@ bar: ret void } +define void @store-pre-indexed-word(%struct.word* %ptr, i32 %val) nounwind { +; CHECK-LABEL: store-pre-indexed-word +; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #32]! +entry: + %a = getelementptr inbounds %struct.word* %ptr, i64 0, i32 1, i32 0 + store i32 %val, i32* %a, align 4 + br label %bar +bar: + %c = getelementptr inbounds %struct.word* %ptr, i64 0, i32 1 + tail call void @bar_word(%s.word* %c, i32 %val) + ret void +} + declare void @bar_doubleword(%s.doubleword*, i64) define void @load-pre-indexed-doubleword(%struct.doubleword* %ptr) nounwind { @@ -54,6 +67,19 @@ bar: ret void } +define void @store-pre-indexed-doubleword(%struct.doubleword* %ptr, i64 %val) nounwind { +; CHECK-LABEL: store-pre-indexed-doubleword +; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #32]! +entry: + %a = getelementptr inbounds %struct.doubleword* %ptr, i64 0, i32 1, i32 0 + store i64 %val, i64* %a, align 4 + br label %bar +bar: + %c = getelementptr inbounds %struct.doubleword* %ptr, i64 0, i32 1 + tail call void @bar_doubleword(%s.doubleword* %c, i64 %val) + ret void +} + declare void @bar_quadword(%s.quadword*, fp128) define void @load-pre-indexed-quadword(%struct.quadword* %ptr) nounwind { @@ -69,6 +95,19 @@ bar: ret void } +define void @store-pre-indexed-quadword(%struct.quadword* %ptr, fp128 %val) nounwind { +; CHECK-LABEL: store-pre-indexed-quadword +; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #32]! +entry: + %a = getelementptr inbounds %struct.quadword* %ptr, i64 0, i32 1, i32 0 + store fp128 %val, fp128* %a, align 4 + br label %bar +bar: + %c = getelementptr inbounds %struct.quadword* %ptr, i64 0, i32 1 + tail call void @bar_quadword(%s.quadword* %c, fp128 %val) + ret void +} + declare void @bar_float(%s.float*, float) define void @load-pre-indexed-float(%struct.float* %ptr) nounwind { @@ -84,6 +123,19 @@ bar: ret void } +define void @store-pre-indexed-float(%struct.float* %ptr, float %val) nounwind { +; CHECK-LABEL: store-pre-indexed-float +; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #32]! +entry: + %a = getelementptr inbounds %struct.float* %ptr, i64 0, i32 1, i32 0 + store float %val, float* %a, align 4 + br label %bar +bar: + %c = getelementptr inbounds %struct.float* %ptr, i64 0, i32 1 + tail call void @bar_float(%s.float* %c, float %val) + ret void +} + declare void @bar_double(%s.double*, double) define void @load-pre-indexed-double(%struct.double* %ptr) nounwind { @@ -99,3 +151,15 @@ bar: ret void } +define void @store-pre-indexed-double(%struct.double* %ptr, double %val) nounwind { +; CHECK-LABEL: store-pre-indexed-double +; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #32]! +entry: + %a = getelementptr inbounds %struct.double* %ptr, i64 0, i32 1, i32 0 + store double %val, double* %a, align 4 + br label %bar +bar: + %c = getelementptr inbounds %struct.double* %ptr, i64 0, i32 1 + tail call void @bar_double(%s.double* %c, double %val) + ret void +} From 1322e998c1f00b972120ec7efb1d76a6de2e7956 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Mon, 26 May 2014 13:38:51 +0000 Subject: [PATCH 162/906] Convert a few loops to use ranges. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209628 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Writer/BitcodeWriter.cpp | 105 +++++++++++++-------------- 1 file changed, 51 insertions(+), 54 deletions(-) diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index db254e6a9413..cc73b842e338 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -476,8 +476,8 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { Stream.ExitBlock(); } -static unsigned getEncodedLinkage(const GlobalValue *GV) { - switch (GV->getLinkage()) { +static unsigned getEncodedLinkage(const GlobalValue &GV) { + switch (GV.getLinkage()) { case GlobalValue::ExternalLinkage: return 0; case GlobalValue::WeakAnyLinkage: return 1; case GlobalValue::AppendingLinkage: return 2; @@ -493,8 +493,8 @@ static unsigned getEncodedLinkage(const GlobalValue *GV) { llvm_unreachable("Invalid linkage"); } -static unsigned getEncodedVisibility(const GlobalValue *GV) { - switch (GV->getVisibility()) { +static unsigned getEncodedVisibility(const GlobalValue &GV) { + switch (GV.getVisibility()) { case GlobalValue::DefaultVisibility: return 0; case GlobalValue::HiddenVisibility: return 1; case GlobalValue::ProtectedVisibility: return 2; @@ -502,8 +502,8 @@ static unsigned getEncodedVisibility(const GlobalValue *GV) { llvm_unreachable("Invalid visibility"); } -static unsigned getEncodedDLLStorageClass(const GlobalValue *GV) { - switch (GV->getDLLStorageClass()) { +static unsigned getEncodedDLLStorageClass(const GlobalValue &GV) { + switch (GV.getDLLStorageClass()) { case GlobalValue::DefaultStorageClass: return 0; case GlobalValue::DLLImportStorageClass: return 1; case GlobalValue::DLLExportStorageClass: return 2; @@ -511,8 +511,8 @@ static unsigned getEncodedDLLStorageClass(const GlobalValue *GV) { llvm_unreachable("Invalid DLL storage class"); } -static unsigned getEncodedThreadLocalMode(const GlobalVariable *GV) { - switch (GV->getThreadLocalMode()) { +static unsigned getEncodedThreadLocalMode(const GlobalVariable &GV) { + switch (GV.getThreadLocalMode()) { case GlobalVariable::NotThreadLocal: return 0; case GlobalVariable::GeneralDynamicTLSModel: return 1; case GlobalVariable::LocalDynamicTLSModel: return 2; @@ -543,36 +543,35 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, std::map GCMap; unsigned MaxAlignment = 0; unsigned MaxGlobalType = 0; - for (Module::const_global_iterator GV = M->global_begin(),E = M->global_end(); - GV != E; ++GV) { - MaxAlignment = std::max(MaxAlignment, GV->getAlignment()); - MaxGlobalType = std::max(MaxGlobalType, VE.getTypeID(GV->getType())); - if (GV->hasSection()) { + for (const GlobalValue &GV : M->globals()) { + MaxAlignment = std::max(MaxAlignment, GV.getAlignment()); + MaxGlobalType = std::max(MaxGlobalType, VE.getTypeID(GV.getType())); + if (GV.hasSection()) { // Give section names unique ID's. - unsigned &Entry = SectionMap[GV->getSection()]; + unsigned &Entry = SectionMap[GV.getSection()]; if (!Entry) { - WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, GV->getSection(), + WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, GV.getSection(), 0/*TODO*/, Stream); Entry = SectionMap.size(); } } } - for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) { - MaxAlignment = std::max(MaxAlignment, F->getAlignment()); - if (F->hasSection()) { + for (const Function &F : *M) { + MaxAlignment = std::max(MaxAlignment, F.getAlignment()); + if (F.hasSection()) { // Give section names unique ID's. - unsigned &Entry = SectionMap[F->getSection()]; + unsigned &Entry = SectionMap[F.getSection()]; if (!Entry) { - WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, F->getSection(), + WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, F.getSection(), 0/*TODO*/, Stream); Entry = SectionMap.size(); } } - if (F->hasGC()) { + if (F.hasGC()) { // Same for GC names. - unsigned &Entry = GCMap[F->getGC()]; + unsigned &Entry = GCMap[F.getGC()]; if (!Entry) { - WriteStringRecord(bitc::MODULE_CODE_GCNAME, F->getGC(), + WriteStringRecord(bitc::MODULE_CODE_GCNAME, F.getGC(), 0/*TODO*/, Stream); Entry = GCMap.size(); } @@ -608,28 +607,27 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, // Emit the global variable information. SmallVector Vals; - for (Module::const_global_iterator GV = M->global_begin(),E = M->global_end(); - GV != E; ++GV) { + for (const GlobalVariable &GV : M->globals()) { unsigned AbbrevToUse = 0; // GLOBALVAR: [type, isconst, initid, // linkage, alignment, section, visibility, threadlocal, // unnamed_addr, externally_initialized, dllstorageclass] - Vals.push_back(VE.getTypeID(GV->getType())); - Vals.push_back(GV->isConstant()); - Vals.push_back(GV->isDeclaration() ? 0 : - (VE.getValueID(GV->getInitializer()) + 1)); + Vals.push_back(VE.getTypeID(GV.getType())); + Vals.push_back(GV.isConstant()); + Vals.push_back(GV.isDeclaration() ? 0 : + (VE.getValueID(GV.getInitializer()) + 1)); Vals.push_back(getEncodedLinkage(GV)); - Vals.push_back(Log2_32(GV->getAlignment())+1); - Vals.push_back(GV->hasSection() ? SectionMap[GV->getSection()] : 0); - if (GV->isThreadLocal() || - GV->getVisibility() != GlobalValue::DefaultVisibility || - GV->hasUnnamedAddr() || GV->isExternallyInitialized() || - GV->getDLLStorageClass() != GlobalValue::DefaultStorageClass) { + Vals.push_back(Log2_32(GV.getAlignment())+1); + Vals.push_back(GV.hasSection() ? SectionMap[GV.getSection()] : 0); + if (GV.isThreadLocal() || + GV.getVisibility() != GlobalValue::DefaultVisibility || + GV.hasUnnamedAddr() || GV.isExternallyInitialized() || + GV.getDLLStorageClass() != GlobalValue::DefaultStorageClass) { Vals.push_back(getEncodedVisibility(GV)); Vals.push_back(getEncodedThreadLocalMode(GV)); - Vals.push_back(GV->hasUnnamedAddr()); - Vals.push_back(GV->isExternallyInitialized()); + Vals.push_back(GV.hasUnnamedAddr()); + Vals.push_back(GV.isExternallyInitialized()); Vals.push_back(getEncodedDLLStorageClass(GV)); } else { AbbrevToUse = SimpleGVarAbbrev; @@ -640,20 +638,20 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, } // Emit the function proto information. - for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) { + for (const Function &F : *M) { // FUNCTION: [type, callingconv, isproto, linkage, paramattrs, alignment, // section, visibility, gc, unnamed_addr, prefix] - Vals.push_back(VE.getTypeID(F->getType())); - Vals.push_back(F->getCallingConv()); - Vals.push_back(F->isDeclaration()); + Vals.push_back(VE.getTypeID(F.getType())); + Vals.push_back(F.getCallingConv()); + Vals.push_back(F.isDeclaration()); Vals.push_back(getEncodedLinkage(F)); - Vals.push_back(VE.getAttributeID(F->getAttributes())); - Vals.push_back(Log2_32(F->getAlignment())+1); - Vals.push_back(F->hasSection() ? SectionMap[F->getSection()] : 0); + Vals.push_back(VE.getAttributeID(F.getAttributes())); + Vals.push_back(Log2_32(F.getAlignment())+1); + Vals.push_back(F.hasSection() ? SectionMap[F.getSection()] : 0); Vals.push_back(getEncodedVisibility(F)); - Vals.push_back(F->hasGC() ? GCMap[F->getGC()] : 0); - Vals.push_back(F->hasUnnamedAddr()); - Vals.push_back(F->hasPrefixData() ? (VE.getValueID(F->getPrefixData()) + 1) + Vals.push_back(F.hasGC() ? GCMap[F.getGC()] : 0); + Vals.push_back(F.hasUnnamedAddr()); + Vals.push_back(F.hasPrefixData() ? (VE.getValueID(F.getPrefixData()) + 1) : 0); Vals.push_back(getEncodedDLLStorageClass(F)); @@ -663,14 +661,13 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, } // Emit the alias information. - for (Module::const_alias_iterator AI = M->alias_begin(), E = M->alias_end(); - AI != E; ++AI) { + for (const GlobalAlias &A : M->aliases()) { // ALIAS: [alias type, aliasee val#, linkage, visibility] - Vals.push_back(VE.getTypeID(AI->getType())); - Vals.push_back(VE.getValueID(AI->getAliasee())); - Vals.push_back(getEncodedLinkage(AI)); - Vals.push_back(getEncodedVisibility(AI)); - Vals.push_back(getEncodedDLLStorageClass(AI)); + Vals.push_back(VE.getTypeID(A.getType())); + Vals.push_back(VE.getValueID(A.getAliasee())); + Vals.push_back(getEncodedLinkage(A)); + Vals.push_back(getEncodedVisibility(A)); + Vals.push_back(getEncodedDLLStorageClass(A)); unsigned AbbrevToUse = 0; Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals, AbbrevToUse); Vals.clear(); From 90e79a50bb0d198edb226cccc338fe4333466b5e Mon Sep 17 00:00:00 2001 From: Michael Zolotukhin Date: Mon, 26 May 2014 14:49:46 +0000 Subject: [PATCH 163/906] Some cleanup for r209568. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209634 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 461fdac3c6c0..1087e5df1636 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -1208,11 +1208,10 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, auto SMul = dyn_cast(SA->getOperand(1)); if (SMul && SC1) { if (auto SC2 = dyn_cast(SMul->getOperand(0))) { - APInt C1 = SC1->getValue()->getValue(); - APInt C2 = SC2->getValue()->getValue(); - APInt CDiff = C2 - C1; + const APInt &C1 = SC1->getValue()->getValue(); + const APInt &C2 = SC2->getValue()->getValue(); if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && - CDiff.isStrictlyPositive() && C2.isPowerOf2()) + C2.ugt(C1) && C2.isPowerOf2()) return getAddExpr(getSignExtendExpr(SC1, Ty), getSignExtendExpr(SMul, Ty)); } @@ -1316,11 +1315,10 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, auto SC1 = dyn_cast(Start); auto SC2 = dyn_cast(Step); if (SC1 && SC2) { - APInt C1 = SC1->getValue()->getValue(); - APInt C2 = SC2->getValue()->getValue(); - APInt CDiff = C2 - C1; - if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && - CDiff.isStrictlyPositive() && C2.isPowerOf2()) { + const APInt &C1 = SC1->getValue()->getValue(); + const APInt &C2 = SC2->getValue()->getValue(); + if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) && + C2.isPowerOf2()) { Start = getSignExtendExpr(Start, Ty); const SCEV *NewAR = getAddRecExpr(getConstant(AR->getType(), 0), Step, L, AR->getNoWrapFlags()); From 4146695fb264a617a272f3dfd05b99b342e2b037 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Mon, 26 May 2014 17:21:53 +0000 Subject: [PATCH 164/906] AArch64: simplify calling conventions slightly. We can eliminate the custom C++ code in favour of some TableGen to check the same things. Functionality should be identical, except for a buffer overrun that was present in the C++ code and meant webkit failed if any small argument needed to be passed on the stack. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209636 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64CallingConv.h | 94 ------------------- .../AArch64/AArch64CallingConvention.td | 12 ++- lib/Target/AArch64/AArch64FastISel.cpp | 1 - lib/Target/AArch64/AArch64ISelLowering.cpp | 57 ++++++----- test/CodeGen/AArch64/arm64-patchpoint.ll | 8 ++ 5 files changed, 44 insertions(+), 128 deletions(-) delete mode 100644 lib/Target/AArch64/AArch64CallingConv.h diff --git a/lib/Target/AArch64/AArch64CallingConv.h b/lib/Target/AArch64/AArch64CallingConv.h deleted file mode 100644 index 1fe426ed686f..000000000000 --- a/lib/Target/AArch64/AArch64CallingConv.h +++ /dev/null @@ -1,94 +0,0 @@ -//=== AArch64CallingConv.h - Custom Calling Convention Routines -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the custom routines for the AArch64 Calling Convention that -// aren't done by tablegen. -// -//===----------------------------------------------------------------------===// - -#ifndef AArch64CALLINGCONV_H -#define AArch64CALLINGCONV_H - -#include "AArch64InstrInfo.h" -#include "llvm/IR/CallingConv.h" -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/Target/TargetInstrInfo.h" - -namespace llvm { - -/// CC_AArch64_Custom_i1i8i16_Reg - customized handling of passing i1/i8/i16 via -/// register. Here, ValVT can be i1/i8/i16 or i32 depending on whether the -/// argument is already promoted and LocVT is i1/i8/i16. We only promote the -/// argument to i32 if we are sure this argument will be passed in register. -static bool CC_AArch64_Custom_i1i8i16_Reg(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, - CCState &State, - bool IsWebKitJS = false) { - static const MCPhysReg RegList1[] = { AArch64::W0, AArch64::W1, AArch64::W2, - AArch64::W3, AArch64::W4, AArch64::W5, - AArch64::W6, AArch64::W7 }; - static const MCPhysReg RegList2[] = { AArch64::X0, AArch64::X1, AArch64::X2, - AArch64::X3, AArch64::X4, AArch64::X5, - AArch64::X6, AArch64::X7 }; - static const MCPhysReg WebKitRegList1[] = { AArch64::W0 }; - static const MCPhysReg WebKitRegList2[] = { AArch64::X0 }; - - const MCPhysReg *List1 = IsWebKitJS ? WebKitRegList1 : RegList1; - const MCPhysReg *List2 = IsWebKitJS ? WebKitRegList2 : RegList2; - - if (unsigned Reg = State.AllocateReg(List1, List2, 8)) { - // Customized extra section for handling i1/i8/i16: - // We need to promote the argument to i32 if it is not done already. - if (ValVT != MVT::i32) { - if (ArgFlags.isSExt()) - LocInfo = CCValAssign::SExt; - else if (ArgFlags.isZExt()) - LocInfo = CCValAssign::ZExt; - else - LocInfo = CCValAssign::AExt; - ValVT = MVT::i32; - } - // Set LocVT to i32 as well if passing via register. - LocVT = MVT::i32; - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - return true; - } - return false; -} - -/// CC_AArch64_WebKit_JS_i1i8i16_Reg - customized handling of passing i1/i8/i16 -/// via register. This behaves the same as CC_AArch64_Custom_i1i8i16_Reg, but only -/// uses the first register. -static bool CC_AArch64_WebKit_JS_i1i8i16_Reg(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, - CCState &State) { - return CC_AArch64_Custom_i1i8i16_Reg(ValNo, ValVT, LocVT, LocInfo, ArgFlags, - State, true); -} - -/// CC_AArch64_Custom_i1i8i16_Stack: customized handling of passing i1/i8/i16 on -/// stack. Here, ValVT can be i1/i8/i16 or i32 depending on whether the argument -/// is already promoted and LocVT is i1/i8/i16. If ValVT is already promoted, -/// it will be truncated back to i1/i8/i16. -static bool CC_AArch64_Custom_i1i8i16_Stack(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, - CCState &State) { - unsigned Space = ((LocVT == MVT::i1 || LocVT == MVT::i8) ? 1 : 2); - unsigned Offset12 = State.AllocateStack(Space, Space); - ValVT = LocVT; - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset12, LocVT, LocInfo)); - return true; -} - -} // End llvm namespace - -#endif diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td index c263d14dcc37..ded2e17c544e 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.td +++ b/lib/Target/AArch64/AArch64CallingConvention.td @@ -18,6 +18,9 @@ class CCIfAlign : class CCIfBigEndian : CCIf<"State.getTarget().getDataLayout()->isBigEndian()", A>; +class CCIfUnallocated : + CCIf<"!State.isAllocated(AArch64::" # Reg # ")", A>; + //===----------------------------------------------------------------------===// // ARM AAPCS64 Calling Convention //===----------------------------------------------------------------------===// @@ -42,7 +45,7 @@ def CC_AArch64_AAPCS : CallingConv<[ // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, // up to eight each of GPR and FPR. - CCIfType<[i1, i8, i16], CCCustom<"CC_AArch64_Custom_i1i8i16_Reg">>, + CCIfType<[i1, i8, i16], CCIfUnallocated<"X7", CCPromoteToType>>, CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], [X0, X1, X2, X3, X4, X5, X6, X7]>>, // i128 is split to two i64s, we can't fit half to register X7. @@ -117,7 +120,7 @@ def CC_AArch64_DarwinPCS : CallingConv<[ // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, // up to eight each of GPR and FPR. - CCIfType<[i1, i8, i16], CCCustom<"CC_AArch64_Custom_i1i8i16_Reg">>, + CCIfType<[i1, i8, i16], CCIfUnallocated<"X7", CCPromoteToType>>, CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], [X0, X1, X2, X3, X4, X5, X6, X7]>>, // i128 is split to two i64s, we can't fit half to register X7. @@ -140,7 +143,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[ CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, // If more than will fit in registers, pass them on the stack instead. - CCIfType<[i1, i8, i16], CCCustom<"CC_AArch64_Custom_i1i8i16_Stack">>, + CCIfType<[i1, i8], CCAssignToStack<1, 1>>, + CCIfType<[i16], CCAssignToStack<2, 2>>, CCIfType<[i32, f32], CCAssignToStack<4, 4>>, CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8], CCAssignToStack<8, 8>>, @@ -168,7 +172,7 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[ // 32bit quantity as undef. def CC_AArch64_WebKit_JS : CallingConv<[ // Handle i1, i8, i16, i32, and i64 passing in register X0 (W0). - CCIfType<[i1, i8, i16], CCCustom<"CC_AArch64_WebKit_JS_i1i8i16_Reg">>, + CCIfType<[i1, i8, i16], CCIfUnallocated<"X0", CCPromoteToType>>, CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>, CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>, diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 8cc0f8a27355..c3b53692fb2a 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -16,7 +16,6 @@ #include "AArch64.h" #include "AArch64TargetMachine.h" #include "AArch64Subtarget.h" -#include "AArch64CallingConv.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/FastISel.h" diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 4ddba0073398..5c504d1e6c26 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14,7 +14,6 @@ #include "AArch64ISelLowering.h" #include "AArch64PerfectShuffle.h" #include "AArch64Subtarget.h" -#include "AArch64CallingConv.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64TargetMachine.h" #include "AArch64TargetObjectFile.h" @@ -1681,15 +1680,14 @@ SDValue AArch64TargetLowering::LowerFormalArguments( EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true); MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other; // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16. - MVT LocVT = ValVT; if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8) - LocVT = MVT::i8; + ValVT = MVT::i8; else if (ActualMVT == MVT::i16) - LocVT = MVT::i16; + ValVT = MVT::i16; CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false); bool Res = - AssignFn(i, ValVT, LocVT, CCValAssign::Full, Ins[i].Flags, CCInfo); + AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo); assert(!Res && "Call operand has unhandled type"); (void)Res; } @@ -1748,15 +1746,12 @@ SDValue AArch64TargetLowering::LowerFormalArguments( case CCValAssign::BCvt: ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue); break; + case CCValAssign::AExt: case CCValAssign::SExt: - ArgValue = DAG.getNode(ISD::AssertSext, DL, RegVT, ArgValue, - DAG.getValueType(VA.getValVT())); - ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue); - break; case CCValAssign::ZExt: - ArgValue = DAG.getNode(ISD::AssertZext, DL, RegVT, ArgValue, - DAG.getValueType(VA.getValVT())); - ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue); + // SelectionDAGBuilder will insert appropriate AssertZExt & AssertSExt + // nodes after our lowering. + assert(RegVT == Ins[i].VT && "incorrect register location selected"); break; } @@ -1777,21 +1772,26 @@ SDValue AArch64TargetLowering::LowerFormalArguments( SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); SDValue ArgValue; - // If the loc type and val type are not the same, create an anyext load. - if (VA.getLocVT().getSizeInBits() != VA.getValVT().getSizeInBits()) { - // We should only get here if this is a pure integer. - assert(!VA.getValVT().isVector() && VA.getValVT().isInteger() && - "Only integer extension supported!"); - ArgValue = DAG.getExtLoad(ISD::EXTLOAD, DL, VA.getValVT(), Chain, FIN, - MachinePointerInfo::getFixedStack(FI), - VA.getLocVT(), - false, false, false, 0); - } else { - ArgValue = DAG.getLoad(VA.getValVT(), DL, Chain, FIN, - MachinePointerInfo::getFixedStack(FI), false, - false, false, 0); + ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; + switch (VA.getLocInfo()) { + default: + break; + case CCValAssign::SExt: + ExtType = ISD::SEXTLOAD; + break; + case CCValAssign::ZExt: + ExtType = ISD::ZEXTLOAD; + break; + case CCValAssign::AExt: + ExtType = ISD::EXTLOAD; + break; } + ArgValue = DAG.getExtLoad(ExtType, DL, VA.getValVT(), Chain, FIN, + MachinePointerInfo::getFixedStack(FI), + VA.getLocVT(), + false, false, false, 0); + InVals.push_back(ArgValue); } } @@ -2184,14 +2184,13 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16. - MVT LocVT = ValVT; if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8) - LocVT = MVT::i8; + ValVT = MVT::i8; else if (ActualMVT == MVT::i16) - LocVT = MVT::i16; + ValVT = MVT::i16; CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false); - bool Res = AssignFn(i, ValVT, LocVT, CCValAssign::Full, ArgFlags, CCInfo); + bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo); assert(!Res && "Call operand has unhandled type"); (void)Res; } diff --git a/test/CodeGen/AArch64/arm64-patchpoint.ll b/test/CodeGen/AArch64/arm64-patchpoint.ll index 9ef1d778a319..039cdfcc3858 100644 --- a/test/CodeGen/AArch64/arm64-patchpoint.ll +++ b/test/CodeGen/AArch64/arm64-patchpoint.ll @@ -161,3 +161,11 @@ define void @clobberScratch(i32* %p) { declare void @llvm.experimental.stackmap(i64, i32, ...) declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...) declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...) + +; CHECK-LABEL: test_i16: +; CHECK: ldrh [[BREG:w[0-9]+]], [sp] +; CHECK: add w0, w0, [[BREG]] +define webkit_jscc i16 @test_i16(i16 zeroext %a, i16 zeroext %b) { + %sum = add i16 %a, %b + ret i16 %sum +} From e0c2787cb770ecb3bb865a5cf51705fe2cd57441 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Mon, 26 May 2014 17:22:07 +0000 Subject: [PATCH 165/906] AArch64: force i1 to be zero-extended at an ABI boundary. This commit is debatable. There are two possible approaches, neither of which is really satisfactory: 1. Use "@foo(i1 zeroext)" to mean an extension to 32-bits on Darwin, and 8 bits otherwise. 2. Redefine "@foo(i1)" to mean that the i1 is extended by the caller to 8 bits. This goes against the spirit of "zeroext" I think, but it's a bit of a vague construct anyway (by definition you're going to extend to the amount required by the ABI, that's why it's the ABI!). This implements option 2. The DAG machinery really isn't setup for the first (there's a fairly strong assumption that "zeroext" goes to at least the smallest register size), and even if it was the resulting DAG looks like it would be inferior in many cases. Theoretically we could add AssertZext nodes in the consumers of ABI-passed values too now, but this actually seems to make the code worse in practice by making truncation proceed in two steps. The code produced is equally valid if we continue to assume only the low bit is defined. Should fix PR19850 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209637 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 12 +++++ test/CodeGen/AArch64/i1-contents.ll | 55 ++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 test/CodeGen/AArch64/i1-contents.ll diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 5c504d1e6c26..80d6669cbf3d 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2263,6 +2263,11 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg); break; case CCValAssign::AExt: + if (Outs[realArgIdx].ArgVT == MVT::i1) { + // AAPCS requires i1 to be zero-extended to 8-bits by the caller. + Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg); + Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg); + } Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); break; case CCValAssign::BCvt: @@ -2503,6 +2508,13 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: + if (Outs[i].ArgVT == MVT::i1) { + // AAPCS requires i1 to be zero-extended to i8 by the producer of the + // value. This is strictly redundant on Darwin (which uses "zeroext + // i1"), but will be optimised out before ISel. + Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg); + Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg); + } break; case CCValAssign::BCvt: Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); diff --git a/test/CodeGen/AArch64/i1-contents.ll b/test/CodeGen/AArch64/i1-contents.ll new file mode 100644 index 000000000000..7f133fc3ea83 --- /dev/null +++ b/test/CodeGen/AArch64/i1-contents.ll @@ -0,0 +1,55 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s +%big = type i32 + +@var = global %big 0 + +; AAPCS: low 8 bits of %in (== w0) will be either 0 or 1. Need to extend to +; 32-bits. +define void @consume_i1_arg(i1 %in) { +; CHECK-LABEL: consume_i1_arg: +; CHECK: and [[BOOL32:w[0-9]+]], w0, #{{0x1|0xff}} +; CHECK: str [[BOOL32]], [{{x[0-9]+}}, :lo12:var] + %val = zext i1 %in to %big + store %big %val, %big* @var + ret void +} + +; AAPCS: low 8 bits of %val1 (== w0) will be either 0 or 1. Need to extend to +; 32-bits (doesn't really matter if it's from 1 or 8 bits). +define void @consume_i1_ret() { +; CHECK-LABEL: consume_i1_ret: +; CHECK: bl produce_i1_ret +; CHECK: and [[BOOL32:w[0-9]+]], w0, #{{0x1|0xff}} +; CHECK: str [[BOOL32]], [{{x[0-9]+}}, :lo12:var] + %val1 = call i1 @produce_i1_ret() + %val = zext i1 %val1 to %big + store %big %val, %big* @var + ret void +} + +; AAPCS: low 8 bits of w0 must be either 0 or 1. Need to mask them off. +define i1 @produce_i1_ret() { +; CHECK-LABEL: produce_i1_ret: +; CHECK: ldr [[VAR32:w[0-9]+]], [{{x[0-9]+}}, :lo12:var] +; CHECK: and w0, [[VAR32]], #{{0x1|0xff}} + %val = load %big* @var + %val1 = trunc %big %val to i1 + ret i1 %val1 +} + +define void @produce_i1_arg() { +; CHECK-LABEL: produce_i1_arg: +; CHECK: ldr [[VAR32:w[0-9]+]], [{{x[0-9]+}}, :lo12:var] +; CHECK: and w0, [[VAR32]], #{{0x1|0xff}} +; CHECK: bl consume_i1_arg + %val = load %big* @var + %val1 = trunc %big %val to i1 + call void @consume_i1_arg(i1 %val1) + ret void +} + + +;define zeroext i1 @foo(i8 %in) { +; %val = trunc i8 %in to i1 +; ret i1 %val +;} From f2928b9b5f3d2af68f724af16cdaed2628fddfc9 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Mon, 26 May 2014 19:08:19 +0000 Subject: [PATCH 166/906] [PPC] Use alias symbols in address computation. This seems to match what gcc does for ppc and what every other llvm backend does. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209638 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCAsmPrinter.cpp | 36 ++++++++++---------------- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 13 ++-------- test/CodeGen/PowerPC/alias.ll | 31 ++++++++++++++++++++++ 3 files changed, 46 insertions(+), 34 deletions(-) create mode 100644 test/CodeGen/PowerPC/alias.ll diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 2174b18715f1..e89fb2d58a1c 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -380,15 +380,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { bool IsAvailExt = false; if (MO.isGlobal()) { - const GlobalValue *GValue = MO.getGlobal(); - const GlobalAlias *GAlias = dyn_cast(GValue); - const GlobalValue *RealGValue = GAlias ? GAlias->getAliasee() : GValue; - MOSymbol = getSymbol(RealGValue); - const GlobalVariable *GVar = dyn_cast(RealGValue); - IsExternal = GVar && !GVar->hasInitializer(); - IsCommon = GVar && RealGValue->hasCommonLinkage(); - IsFunction = !GVar; - IsAvailExt = GVar && RealGValue->hasAvailableExternallyLinkage(); + const GlobalValue *GV = MO.getGlobal(); + MOSymbol = getSymbol(GV); + IsExternal = GV->isDeclaration(); + IsCommon = GV->hasCommonLinkage(); + IsFunction = GV->getType()->getElementType()->isFunctionTy(); + IsAvailExt = GV->hasAvailableExternallyLinkage(); } else if (MO.isCPI()) MOSymbol = GetCPISymbol(MO.getIndex()); else if (MO.isJTI()) @@ -427,13 +424,9 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } else if (MO.isGlobal()) { const GlobalValue *GValue = MO.getGlobal(); - const GlobalAlias *GAlias = dyn_cast(GValue); - const GlobalValue *RealGValue = GAlias ? GAlias->getAliasee() : GValue; - MOSymbol = getSymbol(RealGValue); - const GlobalVariable *GVar = dyn_cast(RealGValue); - - if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() || - RealGValue->hasAvailableExternallyLinkage() || + MOSymbol = getSymbol(GValue); + if (GValue->isDeclaration() || GValue->hasCommonLinkage() || + GValue->hasAvailableExternallyLinkage() || TM.getCodeModel() == CodeModel::Large) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); } @@ -460,13 +453,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { bool IsFunction = false; if (MO.isGlobal()) { - const GlobalValue *GValue = MO.getGlobal(); - const GlobalAlias *GAlias = dyn_cast(GValue); - const GlobalValue *RealGValue = GAlias ? GAlias->getAliasee() : GValue; - MOSymbol = getSymbol(RealGValue); - const GlobalVariable *GVar = dyn_cast(RealGValue); - IsExternal = GVar && !GVar->hasInitializer(); - IsFunction = !GVar; + const GlobalValue *GV = MO.getGlobal(); + MOSymbol = getSymbol(GV); + IsExternal = GV->isDeclaration(); + IsFunction = GV->getType()->getElementType()->isFunctionTy(); } else if (MO.isCPI()) MOSymbol = GetCPISymbol(MO.getIndex()); diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index f6e075d27193..251e8b6246f6 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1472,17 +1472,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { if (GlobalAddressSDNode *G = dyn_cast(GA)) { const GlobalValue *GValue = G->getGlobal(); - const GlobalAlias *GAlias = dyn_cast(GValue); - const GlobalValue *RealGValue = GAlias ? GAlias->getAliasee() : GValue; - const GlobalVariable *GVar = dyn_cast(RealGValue); - assert((GVar || isa(RealGValue)) && - "Unexpected global value subclass!"); - - // An external variable is one without an initializer. For these, - // for variables with common linkage, and for Functions, generate - // the LDtocL form. - if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() || - RealGValue->hasAvailableExternallyLinkage()) + if (GValue->isDeclaration() || GValue->hasCommonLinkage() || + GValue->hasAvailableExternallyLinkage()) return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, SDValue(Tmp, 0)); } diff --git a/test/CodeGen/PowerPC/alias.ll b/test/CodeGen/PowerPC/alias.ll new file mode 100644 index 000000000000..86e41148a0d7 --- /dev/null +++ b/test/CodeGen/PowerPC/alias.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -code-model=medium| FileCheck --check-prefix=CHECK --check-prefix=MEDIUM %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -code-model=large | FileCheck --check-prefix=CHECK --check-prefix=LARGE %s + +@foo = global i32 42 +@fooa = alias i32* @foo + +@foo2 = global i64 42 +@foo2a = alias i64* @foo2 + +; CHECK-LABEL: bar: +define i32 @bar() { +; MEDIUM: addis 3, 2, fooa@toc@ha +; LARGE: addis 3, 2, .LC1@toc@ha + %a = load i32* @fooa + ret i32 %a +} + +; CHECK-LABEL: bar2: +define i64 @bar2() { +; MEDIUM: addis 3, 2, foo2a@toc@ha +; MEDIUM: addi 3, 3, foo2a@toc@l +; LARGE: addis 3, 2, .LC3@toc@ha + %a = load i64* @foo2a + ret i64 %a +} + +; LARGE: .LC1: +; LARGE-NEXT: .tc fooa[TC],fooa + +; LARGE: .LC3: +; LARGE-NEXT: .tc foo2a[TC],foo2a From ce5ddbab6df9201a1b3b98fd19e3cb313f13bdbb Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Mon, 26 May 2014 19:57:55 +0000 Subject: [PATCH 167/906] Use existing helper function. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209639 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Verifier.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 80e731ab152f..bcc38c1ad513 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -497,14 +497,7 @@ void Verifier::visitGlobalAlias(const GlobalAlias &GA) { "addrspacecast of GlobalValue", &GA); - if (CE->getOpcode() == Instruction::BitCast) { - unsigned SrcAS = GV->getType()->getPointerAddressSpace(); - unsigned DstAS = CE->getType()->getPointerAddressSpace(); - - Assert1(SrcAS == DstAS, - "Alias bitcasts cannot be between different address spaces", - &GA); - } + VerifyConstantExprBitcastType(CE); } Assert1(!GV->isDeclaration(), "Alias must point to a definition", &GA); if (const GlobalAlias *GAAliasee = dyn_cast(GV)) { From b84ced649ee46c4709770e22ccfbc73cc094f81e Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Mon, 26 May 2014 21:30:40 +0000 Subject: [PATCH 168/906] Fix link. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209640 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/LangRef.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 9b72eca7de5d..6473bb9ff270 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -443,7 +443,7 @@ styles: A symbol with ``internal`` or ``private`` linkage must have ``default`` visibility. -.. _namedtypes: +.. _dllstorageclass: DLL Storage Classes ------------------- @@ -464,6 +464,8 @@ DLL storage class: exists for defining a dll interface, the compiler, assembler and linker know it is externally referenced and must refrain from deleting the symbol. +.. _namedtypes: + Structure Types --------------- From c5f611404c4eecf5006e7bb72b69d6da029e382a Mon Sep 17 00:00:00 2001 From: Filipe Cabecinhas Date: Tue, 27 May 2014 03:42:20 +0000 Subject: [PATCH 169/906] Convert some X86 blendv* intrinsics into IR. Summary: Implemented an InstCombine transformation that takes a blendv* intrinsic call and translates it into an IR select, if the mask is constant. This will eventually get lowered into blends with immediates if possible, or pblendvb (with an option to further optimize if we can transform the pblendvb into a blend+immediate instruction, depending on the selector). It will also enable optimizations by the IR passes, which give up on sight of the intrinsic. Both the transformation and the lowering of its result to asm got shiny new tests. The transformation is a bit convoluted because of blendvp[sd]'s definition: Its mask is a floating point value! This forces us to convert it and get the highest bit. I suppose this happened because the mask has type __m128 in Intel's intrinsic and v4sf (for blendps) in gcc's builtin. I will send an email to llvm-dev to discuss if we want to change this or not. Reviewers: grosbach, delena, nadav Differential Revision: http://reviews.llvm.org/D3859 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209643 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineCalls.cpp | 35 ++++++++++++ test/CodeGen/X86/avx-blend.ll | 23 ++++++++ test/CodeGen/X86/avx2-blend.ll | 11 ++++ test/CodeGen/X86/sse41-blend.ll | 32 +++++++++++ test/Transforms/InstCombine/blend_x86.ll | 56 +++++++++++++++++++ 5 files changed, 157 insertions(+) create mode 100644 test/CodeGen/X86/avx2-blend.ll create mode 100644 test/Transforms/InstCombine/blend_x86.ll diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index be1b5aa50b18..a0819fdfc827 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -718,6 +718,41 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::x86_sse41_pblendvb: + case Intrinsic::x86_sse41_blendvps: + case Intrinsic::x86_sse41_blendvpd: + case Intrinsic::x86_avx_blendv_ps_256: + case Intrinsic::x86_avx_blendv_pd_256: + case Intrinsic::x86_avx2_pblendvb: { + // Convert blendv* to vector selects if the mask is constant. + // This optimization is convoluted because the intrinsic is defined as + // getting a vector of floats or doubles for the ps and pd versions. + // FIXME: That should be changed. + Value *Mask = II->getArgOperand(2); + if (auto C = dyn_cast(Mask)) { + auto Tyi1 = Builder->getInt1Ty(); + auto SelectorType = cast(Mask->getType()); + auto EltTy = SelectorType->getElementType(); + unsigned Size = SelectorType->getNumElements(); + unsigned BitWidth = EltTy->isFloatTy() ? 32 : (EltTy->isDoubleTy() ? 64 : EltTy->getIntegerBitWidth()); + assert(BitWidth == 64 || BitWidth == 32 || BitWidth == 8 && "Wrong arguments for variable blend intrinsic"); + SmallVector Selectors; + for (unsigned I = 0; I < Size; ++I) { + // The intrinsics only read the top bit + uint64_t Selector; + if (BitWidth == 8) + Selector = C->getElementAsInteger(I); + else + Selector = C->getElementAsAPFloat(I).bitcastToAPInt().getZExtValue(); + Selectors.push_back(ConstantInt::get(Tyi1, Selector >> (BitWidth - 1))); + } + auto NewSelector = ConstantVector::get(Selectors); + return SelectInst::Create(NewSelector, II->getArgOperand(0), II->getArgOperand(1), "blendv"); + } else { + break; + } + } + case Intrinsic::x86_avx_vpermilvar_ps: case Intrinsic::x86_avx_vpermilvar_ps_256: case Intrinsic::x86_avx_vpermilvar_pd: diff --git a/test/CodeGen/X86/avx-blend.ll b/test/CodeGen/X86/avx-blend.ll index 4d4f6c1a03ab..e21c7a07e8bd 100644 --- a/test/CodeGen/X86/avx-blend.ll +++ b/test/CodeGen/X86/avx-blend.ll @@ -135,3 +135,26 @@ define <2 x double> @testb(<2 x double> %x, <2 x double> %y) { %min = select <2 x i1> %min_is_x, <2 x double> %x, <2 x double> %y ret <2 x double> %min } + +; If we can figure out a blend has a constant mask, we should emit the +; blend instruction with an immediate mask +define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) { +; CHECK-LABEL: constant_blendvpd_avx: +; CHECK-NOT: mov +; CHECK: vblendpd +; CHECK: ret + %1 = select <4 x i1> , <4 x double> %xy, <4 x double> %ab + ret <4 x double> %1 +} + +define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) { +; CHECK-LABEL: constant_blendvps_avx: +; CHECK-NOT: mov +; CHECK: vblendps +; CHECK: ret + %1 = select <8 x i1> , <8 x float> %xyzw, <8 x float> %abcd + ret <8 x float> %1 +} + +declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) +declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) diff --git a/test/CodeGen/X86/avx2-blend.ll b/test/CodeGen/X86/avx2-blend.ll new file mode 100644 index 000000000000..b02442b6fadd --- /dev/null +++ b/test/CodeGen/X86/avx2-blend.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck %s + +define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) { +; CHECK-LABEL: constant_pblendvb_avx2: +; CHECK: vmovdqa +; CHECK: vpblendvb + %1 = select <32 x i1> , <32 x i8> %xyzw, <32 x i8> %abcd + ret <32 x i8> %1 +} + +declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) diff --git a/test/CodeGen/X86/sse41-blend.ll b/test/CodeGen/X86/sse41-blend.ll index 951bb7dc854a..8ad79877c8e6 100644 --- a/test/CodeGen/X86/sse41-blend.ll +++ b/test/CodeGen/X86/sse41-blend.ll @@ -88,3 +88,35 @@ entry: store double %extract214vector_func.i, double addrspace(1)* undef, align 8 ret void } + +; If we can figure out a blend has a constant mask, we should emit the +; blend instruction with an immediate mask +define <2 x double> @constant_blendvpd(<2 x double> %xy, <2 x double> %ab) { +; In this case, we emit a simple movss +; CHECK-LABEL: constant_blendvpd +; CHECK: movsd +; CHECK: ret + %1 = select <2 x i1> , <2 x double> %xy, <2 x double> %ab + ret <2 x double> %1 +} + +define <4 x float> @constant_blendvps(<4 x float> %xyzw, <4 x float> %abcd) { +; CHECK-LABEL: constant_blendvps +; CHECK-NOT: mov +; CHECK: blendps $7 +; CHECK: ret + %1 = select <4 x i1> , <4 x float> %xyzw, <4 x float> %abcd + ret <4 x float> %1 +} + +define <16 x i8> @constant_pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd) { +; CHECK-LABEL: constant_pblendvb: +; CHECK: movaps +; CHECK: pblendvb +; CHECK: ret + %1 = select <16 x i1> , <16 x i8> %xyzw, <16 x i8> %abcd + ret <16 x i8> %1 +} +declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) +declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) +declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) diff --git a/test/Transforms/InstCombine/blend_x86.ll b/test/Transforms/InstCombine/blend_x86.ll new file mode 100644 index 000000000000..6dbacf963ce4 --- /dev/null +++ b/test/Transforms/InstCombine/blend_x86.ll @@ -0,0 +1,56 @@ +; RUN: opt < %s -instcombine -mtriple=x86_64-apple-macosx -mcpu=core-avx2 -S | FileCheck %s + +define <2 x double> @constant_blendvpd(<2 x double> %xy, <2 x double> %ab) { +; CHECK-LABEL: @constant_blendvpd +; CHECK: select <2 x i1> + %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> ) + ret <2 x double> %1 +} + +define <4 x float> @constant_blendvps(<4 x float> %xyzw, <4 x float> %abcd) { +; CHECK-LABEL: @constant_blendvps +; CHECK: select <4 x i1> + %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> ) + ret <4 x float> %1 +} + +define <16 x i8> @constant_pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd) { +; CHECK-LABEL: @constant_pblendvb +; CHECK: select <16 x i1> + %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> ) + ret <16 x i8> %1 +} + +define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) { +; CHECK-LABEL: @constant_blendvpd_avx +; CHECK: select <4 x i1> + %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> ) + ret <4 x double> %1 +} + +define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) { +; CHECK-LABEL: @constant_blendvps_avx +; CHECK: select <8 x i1> + %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> ) + ret <8 x float> %1 +} + +define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) { +; CHECK-LABEL: @constant_pblendvb_avx2 +; CHECK: select <32 x i1> + %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd, + <32 x i8> ) + ret <32 x i8> %1 +} + +declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) +declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) +declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) + +declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) +declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) +declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) + From f31b009b3e3af261cbf6ec8f65337187414797f5 Mon Sep 17 00:00:00 2001 From: Dinesh Dwivedi Date: Tue, 27 May 2014 06:44:25 +0000 Subject: [PATCH 170/906] Adding testcase for PR18886. Differential Revision: http://reviews.llvm.org/D3837 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209645 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../ScalarEvolution/max-trip-count.ll | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/test/Analysis/ScalarEvolution/max-trip-count.ll b/test/Analysis/ScalarEvolution/max-trip-count.ll index 75f7418f14a7..31f06a46ad00 100644 --- a/test/Analysis/ScalarEvolution/max-trip-count.ll +++ b/test/Analysis/ScalarEvolution/max-trip-count.ll @@ -125,6 +125,33 @@ bar.exit: ; preds = %for.cond.i, %for.bo ret i32 0 } +; PR18886: Indvars miscompile due to an incorrect max backedge taken count from SCEV. +; CHECK-LABEL: @pr18886 +; CHECK: Loop %for.body: Unpredictable backedge-taken count. +; CHECK: Loop %for.body: max backedge-taken count is 3 +@aa = global i64 0, align 8 + +define i32 @pr18886() { +entry: + store i64 -21, i64* @aa, align 8 + br label %for.body + +for.body: + %storemerge1 = phi i64 [ -21, %entry ], [ %add, %for.cond ] + %tobool = icmp eq i64 %storemerge1, 0 + %add = add nsw i64 %storemerge1, 8 + br i1 %tobool, label %return, label %for.cond + +for.cond: + store i64 %add, i64* @aa, align 8 + %cmp = icmp slt i64 %add, 9 + br i1 %cmp, label %for.body, label %return + +return: + %retval.0 = phi i32 [ 1, %for.body ], [ 0, %for.cond ] + ret i32 %retval.0 +} + ; Here we have a must-exit loop latch that is not computable and a ; may-exit early exit that can only have one non-exiting iteration ; before the check is forever skipped. From 9bbb4066f8fc2b53594f0c47adf0843756e84aea Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 27 May 2014 07:37:21 +0000 Subject: [PATCH 171/906] AArch64: support 'c' and 'n' inline asm modifiers. These are tested by test/CodeGen/Generic, so we should probably know how to deal with them. Fortunately generic code does it if asked. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209646 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64AsmPrinter.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index 8553a591fee1..c3ee9bbb8179 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -266,6 +266,11 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) { const MachineOperand &MO = MI->getOperand(OpNum); + + // First try the generic code, which knows about modifiers like 'c' and 'n'. + if (!AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O)) + return false; + // Does this asm operand have a single letter operand modifier? if (ExtraCode && ExtraCode[0]) { if (ExtraCode[1] != 0) From ae85c73d4af578822bb957d94d6385d73ff57b3b Mon Sep 17 00:00:00 2001 From: Daniel Jasper Date: Tue, 27 May 2014 09:55:37 +0000 Subject: [PATCH 172/906] Fix bad assert. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209648 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineCalls.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index a0819fdfc827..dda585294fa2 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -735,7 +735,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { auto EltTy = SelectorType->getElementType(); unsigned Size = SelectorType->getNumElements(); unsigned BitWidth = EltTy->isFloatTy() ? 32 : (EltTy->isDoubleTy() ? 64 : EltTy->getIntegerBitWidth()); - assert(BitWidth == 64 || BitWidth == 32 || BitWidth == 8 && "Wrong arguments for variable blend intrinsic"); + assert((BitWidth == 64 || BitWidth == 32 || BitWidth == 8) && + "Wrong arguments for variable blend intrinsic"); SmallVector Selectors; for (unsigned I = 0; I < Size; ++I) { // The intrinsics only read the top bit From e43c5023fe0c1de8d5c1ff13d07ff710b196d121 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 27 May 2014 10:43:38 +0000 Subject: [PATCH 173/906] ARM: teach AAPCS-VFP to deal with Cortex-M4. Cortex-M4 only has single-precision floating point support, so any LLVM "double" type will have been split into 2 i32s by now. Fortunately, the consecutive-register framework turns out to be precisely what's needed to reconstruct the double and follow AAPCS-VFP correctly! rdar://problem/17012966 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209650 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../SelectionDAG/SelectionDAGBuilder.cpp | 19 +-- lib/Target/ARM/ARMCallingConv.h | 17 ++- lib/Target/ARM/ARMISelLowering.cpp | 17 ++- test/CodeGen/ARM/aapcs-hfa-code.ll | 111 ++++++++++++++++++ 4 files changed, 143 insertions(+), 21 deletions(-) create mode 100644 test/CodeGen/ARM/aapcs-hfa-code.ll diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index c181046ba235..070e929fce7b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7176,11 +7176,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } if (Args[i].isNest) Flags.setNest(); - if (NeedsRegBlock) { + if (NeedsRegBlock) Flags.setInConsecutiveRegs(); - if (Value == NumValues - 1) - Flags.setInConsecutiveRegsLast(); - } Flags.setOrigAlign(OriginalAlignment); MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); @@ -7226,6 +7223,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { else if (j != 0) MyFlags.Flags.setOrigAlign(1); + // Only mark the end at the last register of the last value. + if (NeedsRegBlock && Value == NumValues - 1 && j == NumParts - 1) + MyFlags.Flags.setInConsecutiveRegsLast(); + CLI.Outs.push_back(MyFlags); CLI.OutVals.push_back(Parts[j]); } @@ -7412,11 +7413,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { } if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) Flags.setNest(); - if (NeedsRegBlock) { + if (NeedsRegBlock) Flags.setInConsecutiveRegs(); - if (Value == NumValues - 1) - Flags.setInConsecutiveRegsLast(); - } Flags.setOrigAlign(OriginalAlignment); MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); @@ -7429,6 +7427,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // if it isn't first piece, alignment must be 1 else if (i > 0) MyFlags.Flags.setOrigAlign(1); + + // Only mark the end at the last register of the last value. + if (NeedsRegBlock && Value == NumValues - 1 && i == NumRegs - 1) + MyFlags.Flags.setInConsecutiveRegsLast(); + Ins.push_back(MyFlags); } PartBase += VT.getStoreSize(); diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h index 8e0fd8935282..dc41c1c14bbb 100644 --- a/lib/Target/ARM/ARMCallingConv.h +++ b/lib/Target/ARM/ARMCallingConv.h @@ -177,9 +177,8 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { SmallVectorImpl &PendingHAMembers = State.getPendingLocs(); - // AAPCS HFAs must have 1-4 elements, all of the same type - assert(PendingHAMembers.size() < 4); + assert(PendingHAMembers.size() < 8); if (PendingHAMembers.size() > 0) assert(PendingHAMembers[0].getLocVT() == LocVT); @@ -189,7 +188,7 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); if (ArgFlags.isInConsecutiveRegsLast()) { - assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 4 && + assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 8 && "Homogeneous aggregates must have between 1 and 4 members"); // Try to allocate a contiguous block of registers, each of the correct @@ -197,6 +196,7 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, const uint16_t *RegList; unsigned NumRegs; switch (LocVT.SimpleTy) { + case MVT::i32: case MVT::f32: RegList = SRegList; NumRegs = 16; @@ -235,11 +235,20 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, State.AllocateReg(SRegList[regNo]); unsigned Size = LocVT.getSizeInBits() / 8; - unsigned Align = LocVT.SimpleTy == MVT::v2f64 ? 8 : Size; + unsigned Align = Size; + + if (LocVT.SimpleTy == MVT::v2f64 || LocVT.SimpleTy == MVT::i32) { + // Vectors are always aligned to 8 bytes. If we've seen an i32 here + // it's because it's been split from a larger type, also with align 8. + Align = 8; + } for (auto It : PendingHAMembers) { It.convertToMem(State.AllocateStack(Size, Align)); State.addLoc(It); + + // Only the first member needs to be aligned. + Align = 1; } // All pending members have now been allocated diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 5beb752d3a42..00d07e840674 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -10778,14 +10778,13 @@ static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, /// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate. bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters( Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { - if (getEffectiveCallingConv(CallConv, isVarArg) == - CallingConv::ARM_AAPCS_VFP) { - HABaseType Base = HA_UNKNOWN; - uint64_t Members = 0; - bool result = isHomogeneousAggregate(Ty, Base, Members); - DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n"); - return result; - } else { + if (getEffectiveCallingConv(CallConv, isVarArg) != + CallingConv::ARM_AAPCS_VFP) return false; - } + + HABaseType Base = HA_UNKNOWN; + uint64_t Members = 0; + bool result = isHomogeneousAggregate(Ty, Base, Members); + DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n"); + return result; } diff --git a/test/CodeGen/ARM/aapcs-hfa-code.ll b/test/CodeGen/ARM/aapcs-hfa-code.ll new file mode 100644 index 000000000000..396e83816ccf --- /dev/null +++ b/test/CodeGen/ARM/aapcs-hfa-code.ll @@ -0,0 +1,111 @@ +; RUN: llc < %s -mtriple=armv7-linux-gnueabihf -o - | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7em-none-eabi -mcpu=cortex-m4 | FileCheck %s --check-prefix=CHECK-M4F + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define arm_aapcs_vfpcc void @test_1float({ float } %a) { + call arm_aapcs_vfpcc void @test_1float({ float } { float 1.0 }) + ret void + +; CHECK-LABEL: test_1float: +; CHECK-DAG: vmov.f32 s0, #1.{{0+}}e+00 +; CHECK: bl test_1float + +; CHECK-M4F-LABEL: test_1float: +; CHECK-M4F-DAG: vmov.f32 s0, #1.{{0+}}e+00 +; CHECK-M4F: bl test_1float +} + +define arm_aapcs_vfpcc void @test_2float({ float, float } %a) { + call arm_aapcs_vfpcc void @test_2float({ float, float } { float 1.0, float 2.0 }) + ret void + +; CHECK-LABEL: test_2float: +; CHECK-DAG: vmov.f32 s0, #1.{{0+}}e+00 +; CHECK-DAG: vmov.f32 s1, #2.{{0+}}e+00 +; CHECK: bl test_2float + +; CHECK-M4F-LABEL: test_2float: +; CHECK-M4F-DAG: vmov.f32 s0, #1.{{0+}}e+00 +; CHECK-M4F-DAG: vmov.f32 s1, #2.{{0+}}e+00 +; CHECK-M4F: bl test_2float +} + +define arm_aapcs_vfpcc void @test_3float({ float, float, float } %a) { + call arm_aapcs_vfpcc void @test_3float({ float, float, float } { float 1.0, float 2.0, float 3.0 }) + ret void + +; CHECK-LABEL: test_3float: +; CHECK-DAG: vmov.f32 s0, #1.{{0+}}e+00 +; CHECK-DAG: vmov.f32 s1, #2.{{0+}}e+00 +; CHECK-DAG: vmov.f32 s2, #3.{{0+}}e+00 +; CHECK: bl test_3float + +; CHECK-M4F-LABEL: test_3float: +; CHECK-M4F-DAG: vmov.f32 s0, #1.{{0+}}e+00 +; CHECK-M4F-DAG: vmov.f32 s1, #2.{{0+}}e+00 +; CHECK-M4F-DAG: vmov.f32 s2, #3.{{0+}}e+00 +; CHECK-M4F: bl test_3float +} + +define arm_aapcs_vfpcc void @test_1double({ double } %a) { +; CHECK-LABEL: test_1double: +; CHECK-DAG: vmov.f64 d0, #1.{{0+}}e+00 +; CHECK: bl test_1double + +; CHECK-M4F-LABEL: test_1double: +; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0 +; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0 +; CHECK-M4F: movt [[ONEHI]], #16368 +; CHECK-M4F-DAG: vmov s0, [[ONELO]] +; CHECK-M4F-DAG: vmov s1, [[ONEHI]] +; CHECK-M4F: bl test_1double + + call arm_aapcs_vfpcc void @test_1double({ double } { double 1.0 }) + ret void +} + +; Final double argument might be put in s15 & [sp] if we're careless. It should +; go all on the stack. +define arm_aapcs_vfpcc void @test_1double_nosplit([4 x float], [4 x double], [3 x float], double %a) { +; CHECK-LABEL: test_1double_nosplit: +; CHECK-DAG: mov [[ONELO:r[0-9]+]], #0 +; CHECK-DAG: movw [[ONEHI:r[0-9]+]], #0 +; CHECK-DAG: movt [[ONEHI]], #16368 +; CHECK: strd [[ONELO]], [[ONEHI]], [sp] +; CHECK: bl test_1double_nosplit + +; CHECK-M4F-LABEL: test_1double_nosplit: +; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0 +; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0 +; CHECK-M4F: movt [[ONEHI]], #16368 +; CHECK-M4F-DAG: str [[ONELO]], [sp] +; CHECK-M4F-DAG: str [[ONEHI]], [sp, #4] +; CHECK-M4F: bl test_1double_nosplit + call arm_aapcs_vfpcc void @test_1double_nosplit([4 x float] undef, [4 x double] undef, [3 x float] undef, double 1.0) + ret void +} + +; Final double argument might go at [sp, #4] if we're careless. Should go at +; [sp, #8] to preserve alignment. +define arm_aapcs_vfpcc void @test_1double_misaligned([4 x double], [4 x double], float, double) { + call arm_aapcs_vfpcc void @test_1double_misaligned([4 x double] undef, [4 x double] undef, float undef, double 1.0) + +; CHECK-LABEL: test_1double_misaligned: +; CHECK-DAG: mov [[ONELO:r[0-9]+]], #0 +; CHECK-DAG: mov r[[BASE:[0-9]+]], sp +; CHECK-DAG: movw [[ONEHI:r[0-9]+]], #0 +; CHECK-DAG: movt [[ONEHI]], #16368 +; CHECK-DAG: str [[ONELO]], [r[[BASE]], #8]! +; CHECK-DAG: str [[ONEHI]], [r[[BASE]], #4] + +; CHECK-M4F-LABEL: test_1double_misaligned: +; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0 +; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0 +; CHECK-M4F: movt [[ONEHI]], #16368 +; CHECK-M4F-DAG: str [[ONELO]], [sp, #8] +; CHECK-M4F-DAG: str [[ONEHI]], [sp, #12] +; CHECK-M4F: bl test_1double_misaligned + + ret void +} From c74a70df6ded8b538cc13be0f13db62d30a7c71f Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 27 May 2014 12:16:02 +0000 Subject: [PATCH 174/906] AArch64: implement copies to/from NZCV as a last ditch effort. A test in test/Generic creates a DAG where the NZCV output of an ADCS is used by multiple nodes. This makes LLVM want to save a copy of NZCV for later, which it couldn't do before. This should be the last fix required for the aarch64 buildbot. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209651 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrInfo.cpp | 20 +++++++++++++++++++- lib/Target/AArch64/AArch64RegisterInfo.cpp | 2 +- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 52e3b333eb08..ff115c0bd5ed 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1507,7 +1507,25 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - assert(0 && "unimplemented reg-to-reg copy"); + if (DestReg == AArch64::NZCV) { + assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy"); + BuildMI(MBB, I, DL, get(AArch64::MSR)) + .addImm(AArch64SysReg::NZCV) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define); + return; + } + + if (SrcReg == AArch64::NZCV) { + assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy"); + BuildMI(MBB, I, DL, get(AArch64::MRS)) + .addReg(DestReg) + .addImm(AArch64SysReg::NZCV) + .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc)); + return; + } + + llvm_unreachable("unimplemented reg-to-reg copy"); } void AArch64InstrInfo::storeRegToStackSlot( diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index 48a361d50e55..01b9587b3174 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -138,7 +138,7 @@ AArch64RegisterInfo::getPointerRegClass(const MachineFunction &MF, const TargetRegisterClass * AArch64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { if (RC == &AArch64::CCRRegClass) - return nullptr; // Can't copy NZCV. + return &AArch64::GPR64RegClass; // Only MSR & MRS copy NZCV. return RC; } From 18b6fb96120336cfcf8981483501ecf98c63f00c Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Tue, 27 May 2014 12:39:31 +0000 Subject: [PATCH 175/906] [asancov] Emit an initializer passing number of coverage code locations in each module. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209654 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Instrumentation/AddressSanitizer.cpp | 18 ++++++++++++++---- .../AddressSanitizer/coverage.ll | 12 ++++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 0617215a7c09..95fca75392af 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -80,6 +80,7 @@ static const char *const kAsanUnregisterGlobalsName = static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init"; static const char *const kAsanInitName = "__asan_init_v3"; +static const char *const kAsanCovModuleInitName = "__sanitizer_cov_module_init"; static const char *const kAsanCovName = "__sanitizer_cov"; static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp"; static const char *const kAsanPtrSub = "__sanitizer_ptr_sub"; @@ -408,6 +409,7 @@ class AddressSanitizerModule : public ModulePass { Function *AsanUnpoisonGlobals; Function *AsanRegisterGlobals; Function *AsanUnregisterGlobals; + Function *AsanCovModuleInit; }; // Stack poisoning does not play well with exception handling. @@ -990,6 +992,10 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) { kAsanUnregisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage); + AsanCovModuleInit = checkInterfaceFunction(M.getOrInsertFunction( + kAsanCovModuleInitName, + IRB.getVoidTy(), IntptrTy, NULL)); + AsanCovModuleInit->setLinkage(Function::ExternalLinkage); } // This function replaces all global variables with new variables that have @@ -1020,6 +1026,14 @@ bool AddressSanitizerModule::runOnModule(Module &M) { GlobalsToChange.push_back(G); } + Function *CtorFunc = M.getFunction(kAsanModuleCtorName); + assert(CtorFunc); + IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator()); + + Function *CovFunc = M.getFunction(kAsanCovName); + int nCov = CovFunc ? CovFunc->getNumUses() : 0; + IRB.CreateCall(AsanCovModuleInit, ConstantInt::get(IntptrTy, nCov)); + size_t n = GlobalsToChange.size(); if (n == 0) return false; @@ -1036,10 +1050,6 @@ bool AddressSanitizerModule::runOnModule(Module &M) { IntptrTy, IntptrTy, NULL); SmallVector Initializers(n); - Function *CtorFunc = M.getFunction(kAsanModuleCtorName); - assert(CtorFunc); - IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator()); - bool HasDynamicallyInitializedGlobals = false; // We shouldn't merge same module names, as this string serves as unique diff --git a/test/Instrumentation/AddressSanitizer/coverage.ll b/test/Instrumentation/AddressSanitizer/coverage.ll index 7f397f453ff1..5bc510316aab 100644 --- a/test/Instrumentation/AddressSanitizer/coverage.ll +++ b/test/Instrumentation/AddressSanitizer/coverage.ll @@ -16,6 +16,7 @@ entry: if.end: ; preds = %entry, %if.then ret void } + ; CHECK1-LABEL: define void @foo ; CHECK1: %0 = load atomic i8* @__asan_gen_cov_foo monotonic, align 1 ; CHECK1: %1 = icmp eq i8 0, %0 @@ -24,9 +25,20 @@ entry: ; CHECK1-NOT: call void @__sanitizer_cov ; CHECK1: store atomic i8 1, i8* @__asan_gen_cov_foo monotonic, align 1 +; CHECK1-LABEL: define internal void @asan.module_ctor +; CHECK1-NOT: ret +; CHECK1: call void @__sanitizer_cov_module_init(i64 1) +; CHECK1: ret + + ; CHECK2-LABEL: define void @foo ; CHECK2: call void @__sanitizer_cov ; CHECK2: call void @__sanitizer_cov ; CHECK2: call void @__sanitizer_cov ; CHECK2-NOT: call void @__sanitizer_cov ; CHECK2: ret void + +; CHECK2-LABEL: define internal void @asan.module_ctor +; CHECK2-NOT: ret +; CHECK2: call void @__sanitizer_cov_module_init(i64 3) +; CHECK2: ret From f7744906f02a832c833384bd4041ab65d9869d39 Mon Sep 17 00:00:00 2001 From: Zoran Jovanovic Date: Tue, 27 May 2014 12:55:40 +0000 Subject: [PATCH 176/906] [mips][mips64r6] Add relocations R_MIPS_PC21_S2, R_MIPS_PC26_S2 Differential Revision: http://reviews.llvm.org/D3824 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209655 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Mips/MCTargetDesc/MipsAsmBackend.cpp | 20 +++++++++ .../Mips/MCTargetDesc/MipsELFObjectWriter.cpp | 6 +++ lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h | 6 +++ .../Mips/MCTargetDesc/MipsMCCodeEmitter.cpp | 8 +++- test/MC/Mips/mips32r6/relocations.s | 43 +++++++++++++++++++ test/MC/Mips/mips64r6/relocations.s | 43 +++++++++++++++++++ 6 files changed, 124 insertions(+), 2 deletions(-) create mode 100644 test/MC/Mips/mips32r6/relocations.s create mode 100644 test/MC/Mips/mips64r6/relocations.s diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 332f7ea7a261..048e6bdb0e7e 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -102,6 +102,22 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, if (!isIntN(16, Value) && Ctx) Ctx->FatalError(Fixup.getLoc(), "out of range PC16 fixup"); break; + case Mips::fixup_MIPS_PC21_S2: + Value -= 4; + // Forcing a signed division because Value can be negative. + Value = (int64_t) Value / 4; + // We now check if Value can be encoded as a 21-bit signed immediate. + if (!isIntN(21, Value) && Ctx) + Ctx->FatalError(Fixup.getLoc(), "out of range PC21 fixup"); + break; + case Mips::fixup_MIPS_PC26_S2: + Value -= 4; + // Forcing a signed division because Value can be negative. + Value = (int64_t) Value / 4; + // We now check if Value can be encoded as a 26-bit signed immediate. + if (!isIntN(26, Value) && Ctx) + Ctx->FatalError(Fixup.getLoc(), "out of range PC26 fixup"); + break; } return Value; @@ -229,6 +245,8 @@ getFixupKindInfo(MCFixupKind Kind) const { { "fixup_Mips_GOT_LO16", 0, 16, 0 }, { "fixup_Mips_CALL_HI16", 0, 16, 0 }, { "fixup_Mips_CALL_LO16", 0, 16, 0 }, + { "fixup_MIPS_PC21_S2", 0, 21, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_MIPS_PC26_S2", 0, 26, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MICROMIPS_26_S1", 0, 26, 0 }, { "fixup_MICROMIPS_HI16", 0, 16, 0 }, { "fixup_MICROMIPS_LO16", 0, 16, 0 }, @@ -286,6 +304,8 @@ getFixupKindInfo(MCFixupKind Kind) const { { "fixup_Mips_GOT_LO16", 16, 16, 0 }, { "fixup_Mips_CALL_HI16", 16, 16, 0 }, { "fixup_Mips_CALL_LO16", 16, 16, 0 }, + { "fixup_MIPS_PC21_S2", 11, 21, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_MIPS_PC26_S2", 6, 26, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MICROMIPS_26_S1", 6, 26, 0 }, { "fixup_MICROMIPS_HI16", 16, 16, 0 }, { "fixup_MICROMIPS_LO16", 16, 16, 0 }, diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 794978b30bf8..ef8a0910149d 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -193,6 +193,12 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, case Mips::fixup_MICROMIPS_TLS_TPREL_LO16: Type = ELF::R_MICROMIPS_TLS_TPREL_LO16; break; + case Mips::fixup_MIPS_PC21_S2: + Type = ELF::R_MIPS_PC21_S2; + break; + case Mips::fixup_MIPS_PC26_S2: + Type = ELF::R_MIPS_PC26_S2; + break; } return Type; } diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h index dc6192c20506..a9c1656aa361 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h +++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h @@ -128,6 +128,12 @@ namespace Mips { // resulting in - R_MIPS_CALL_LO16 fixup_Mips_CALL_LO16, + // resulting in - R_MIPS_PC21_S2 + fixup_MIPS_PC21_S2, + + // resulting in - R_MIPS_PC26_S2 + fixup_MIPS_PC26_S2, + // resulting in - R_MICROMIPS_26_S1 fixup_MICROMIPS_26_S1, diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 6be8c03c5e63..3bef1cf6b1c8 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -258,7 +258,9 @@ getBranchTarget21OpValue(const MCInst &MI, unsigned OpNo, assert(MO.isExpr() && "getBranchTarget21OpValue expects only expressions or immediates"); - // TODO: Push 21 PC fixup. + const MCExpr *Expr = MO.getExpr(); + Fixups.push_back(MCFixup::Create(0, Expr, + MCFixupKind(Mips::fixup_MIPS_PC21_S2))); return 0; } @@ -278,7 +280,9 @@ getBranchTarget26OpValue(const MCInst &MI, unsigned OpNo, assert(MO.isExpr() && "getBranchTarget26OpValue expects only expressions or immediates"); - // TODO: Push 26 PC fixup. + const MCExpr *Expr = MO.getExpr(); + Fixups.push_back(MCFixup::Create(0, Expr, + MCFixupKind(Mips::fixup_MIPS_PC26_S2))); return 0; } diff --git a/test/MC/Mips/mips32r6/relocations.s b/test/MC/Mips/mips32r6/relocations.s new file mode 100644 index 000000000000..2e7663730ba0 --- /dev/null +++ b/test/MC/Mips/mips32r6/relocations.s @@ -0,0 +1,43 @@ +# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 \ +# RUN: | FileCheck %s -check-prefix=CHECK-FIXUP +# RUN: llvm-mc %s -filetype=obj -triple=mips-unknown-linux -mcpu=mips32r6 \ +# RUN: | llvm-readobj -r | FileCheck %s -check-prefix=CHECK-ELF +#------------------------------------------------------------------------------ +# Check that the assembler can handle the documented syntax for fixups. +#------------------------------------------------------------------------------ +# CHECK-FIXUP: beqc $5, $6, bar # encoding: [0x20,0xa6,A,A] +# CHECK-FIXUP: # fixup A - offset: 0, +# CHECK-FIXUP: value: bar, kind: fixup_Mips_PC16 +# CHECK-FIXUP: bnec $5, $6, bar # encoding: [0x60,0xa6,A,A] +# CHECK-FIXUP: # fixup A - offset: 0, +# CHECK-FIXUP: value: bar, kind: fixup_Mips_PC16 +# CHECK-FIXUP: beqzc $9, bar # encoding: [0xd9,0b001AAAAA,A,A] +# CHECK-FIXUP: # fixup A - offset: 0, +# CHECK-FIXUP: value: bar, kind: fixup_MIPS_PC21_S2 +# CHECK-FIXUP: bnezc $9, bar # encoding: [0xf9,0b001AAAAA,A,A] +# CHECK-FIXUP: # fixup A - offset: 0, +# CHECK-FIXUP: value: bar, kind: fixup_MIPS_PC21_S2 +# CHECK-FIXUP: balc bar # encoding: [0b111010AA,A,A,A] +# CHECK-FIXUP: # fixup A - offset: 0, +# CHECK-FIXUP: value: bar, kind: fixup_MIPS_PC26_S2 +# CHECK-FIXUP: bc bar # encoding: [0b110010AA,A,A,A] +# CHECK-FIXUP: # fixup A - offset: 0, +# CHECK-FIXUP: value: bar, kind: fixup_MIPS_PC26_S2 +#------------------------------------------------------------------------------ +# Check that the appropriate relocations were created. +#------------------------------------------------------------------------------ +# CHECK-ELF: Relocations [ +# CHECK-ELF: 0x0 R_MIPS_PC16 bar 0x0 +# CHECK-ELF: 0x4 R_MIPS_PC16 bar 0x0 +# CHECK-ELF: 0x8 R_MIPS_PC21_S2 bar 0x0 +# CHECK-ELF: 0xC R_MIPS_PC21_S2 bar 0x0 +# CHECK-ELF: 0x10 R_MIPS_PC26_S2 bar 0x0 +# CHECK-ELF: 0x14 R_MIPS_PC26_S2 bar 0x0 +# CHECK-ELF: ] + + beqc $5, $6, bar + bnec $5, $6, bar + beqzc $9, bar + bnezc $9, bar + balc bar + bc bar diff --git a/test/MC/Mips/mips64r6/relocations.s b/test/MC/Mips/mips64r6/relocations.s new file mode 100644 index 000000000000..4ad0ae79232a --- /dev/null +++ b/test/MC/Mips/mips64r6/relocations.s @@ -0,0 +1,43 @@ +# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r6 \ +# RUN: | FileCheck %s -check-prefix=CHECK-FIXUP +# RUN: llvm-mc %s -filetype=obj -triple=mips-unknown-linux -mcpu=mips64r6 \ +# RUN: | llvm-readobj -r | FileCheck %s -check-prefix=CHECK-ELF +#------------------------------------------------------------------------------ +# Check that the assembler can handle the documented syntax for fixups. +#------------------------------------------------------------------------------ +# CHECK-FIXUP: beqc $5, $6, bar # encoding: [0x20,0xa6,A,A] +# CHECK-FIXUP: # fixup A - offset: 0, +# CHECK-FIXUP: value: bar, kind: fixup_Mips_PC16 +# CHECK-FIXUP: bnec $5, $6, bar # encoding: [0x60,0xa6,A,A] +# CHECK-FIXUP: # fixup A - offset: 0, +# CHECK-FIXUP: value: bar, kind: fixup_Mips_PC16 +# CHECK-FIXUP: beqzc $9, bar # encoding: [0xd9,0b001AAAAA,A,A] +# CHECK-FIXUP: # fixup A - offset: 0, +# CHECK-FIXUP: value: bar, kind: fixup_MIPS_PC21_S2 +# CHECK-FIXUP: bnezc $9, bar # encoding: [0xf9,0b001AAAAA,A,A] +# CHECK-FIXUP: # fixup A - offset: 0, +# CHECK-FIXUP: value: bar, kind: fixup_MIPS_PC21_S2 +# CHECK-FIXUP: balc bar # encoding: [0b111010AA,A,A,A] +# CHECK-FIXUP: # fixup A - offset: 0, +# CHECK-FIXUP: value: bar, kind: fixup_MIPS_PC26_S2 +# CHECK-FIXUP: bc bar # encoding: [0b110010AA,A,A,A] +# CHECK-FIXUP: # fixup A - offset: 0, +# CHECK-FIXUP: value: bar, kind: fixup_MIPS_PC26_S2 +#------------------------------------------------------------------------------ +# Check that the appropriate relocations were created. +#------------------------------------------------------------------------------ +# CHECK-ELF: Relocations [ +# CHECK-ELF: 0x0 R_MIPS_PC16 bar 0x0 +# CHECK-ELF: 0x4 R_MIPS_PC16 bar 0x0 +# CHECK-ELF: 0x8 R_MIPS_PC21_S2 bar 0x0 +# CHECK-ELF: 0xC R_MIPS_PC21_S2 bar 0x0 +# CHECK-ELF: 0x10 R_MIPS_PC26_S2 bar 0x0 +# CHECK-ELF: 0x14 R_MIPS_PC26_S2 bar 0x0 +# CHECK-ELF: ] + + beqc $5, $6, bar + bnec $5, $6, bar + beqzc $9, bar + bnezc $9, bar + balc bar + bc bar From 87d192bb728aab9509ab51436fc3a73ca0d4e2c5 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Tue, 27 May 2014 13:30:21 +0000 Subject: [PATCH 177/906] [ARM] Emit correct build attributes for the relocation models. Patch by Asiri Rathnayake. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209656 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/ARMBuildAttributes.h | 13 +++++++++++++ lib/Target/ARM/ARMAsmPrinter.cpp | 14 ++++++++++++++ test/CodeGen/ARM/build-attributes.ll | 10 ++++++++++ 3 files changed, 37 insertions(+) diff --git a/include/llvm/Support/ARMBuildAttributes.h b/include/llvm/Support/ARMBuildAttributes.h index 69732fc041e7..16312004c871 100644 --- a/include/llvm/Support/ARMBuildAttributes.h +++ b/include/llvm/Support/ARMBuildAttributes.h @@ -146,6 +146,19 @@ enum { AllowNeon2 = 2, // SIMDv2 was permitted (Half-precision FP, MAC operations) AllowNeonARMv8 = 3, // ARM v8-A SIMD was permitted + // Tag_ABI_PCS_RW_data, (=15), uleb128 + AddressRWPCRel = 1, // Address RW static data PC-relative + AddressRWSBRel = 2, // Address RW static data SB-relative + AddressRWNone = 3, // No RW static data permitted + + // Tag_ABI_PCS_RO_data, (=14), uleb128 + AddressROPCRel = 1, // Address RO static data PC-relative + AddressRONone = 2, // No RO static data permitted + + // Tag_ABI_PCS_GOT_use, (=17), uleb128 + AddressDirect = 1, // Address imported data directly + AddressGOT = 2, // Address imported data indirectly (via GOT) + // Tag_ABI_FP_denormal, (=20), uleb128 PreserveFPSign = 2, // sign when flushed-to-zero is preserved diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 2b97e015bf41..55e9fe5f5c57 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -671,6 +671,20 @@ void ARMAsmPrinter::emitAttributes() { ATS.emitFPU(ARM::VFPV2); } + if (TM.getRelocationModel() == Reloc::PIC_) { + // PIC specific attributes. + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_RW_data, + ARMBuildAttrs::AddressRWPCRel); + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_RO_data, + ARMBuildAttrs::AddressROPCRel); + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_GOT_use, + ARMBuildAttrs::AddressGOT); + } else { + // Allow direct addressing of imported data for all other relocation models. + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_GOT_use, + ARMBuildAttrs::AddressDirect); + } + // Signal various FP modes. if (!TM.Options.UnsafeFPMath) { ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, ARMBuildAttrs::Allowed); diff --git a/test/CodeGen/ARM/build-attributes.ll b/test/CodeGen/ARM/build-attributes.ll index 3e825e8d7d6f..d75d55d0fa68 100644 --- a/test/CodeGen/ARM/build-attributes.ll +++ b/test/CodeGen/ARM/build-attributes.ll @@ -33,6 +33,11 @@ ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=-vfp2,-vfp3,-vfp4,-neon | FileCheck %s --check-prefix=CORTEX-A7-NOFPU ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,-neon | FileCheck %s --check-prefix=CORTEX-A7-FPUV4 ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,,+d16,-neon | FileCheck %s --check-prefix=CORTEX-A7-FPUV4 +; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -relocation-model=pic | FileCheck %s --check-prefix=RELOC-PIC +; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -relocation-model=static | FileCheck %s --check-prefix=RELOC-OTHER +; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -relocation-model=default | FileCheck %s --check-prefix=RELOC-OTHER +; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -relocation-model=dynamic-no-pic | FileCheck %s --check-prefix=RELOC-OTHER +; RUN: llc < %s -mtriple=arm-none-linux-gnueabi | FileCheck %s --check-prefix=RELOC-OTHER ; XSCALE: .eabi_attribute 6, 5 ; XSCALE: .eabi_attribute 8, 1 @@ -453,6 +458,11 @@ ; CORTEX-A57-NOT: .eabi_attribute 44 ; CORTEX-A57: .eabi_attribute 68, 3 +; RELOC-PIC: .eabi_attribute 15, 1 +; RELOC-PIC: .eabi_attribute 16, 1 +; RELOC-PIC: .eabi_attribute 17, 2 +; RELOC-OTHER: .eabi_attribute 17, 1 + define i32 @f(i64 %z) { ret i32 0 } From 61e341e0bfcc83ec865fe763759e66ab3c2feba5 Mon Sep 17 00:00:00 2001 From: Zoran Jovanovic Date: Tue, 27 May 2014 14:58:51 +0000 Subject: [PATCH 178/906] [mips][mips64r6] Add Relocations R_MIPS_PCHI16, R_MIPS_PCLO16 Differential Revision: http://reviews.llvm.org/D3860 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209659 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/MC/MCExpr.h | 2 ++ lib/MC/MCExpr.cpp | 2 ++ lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 2 ++ lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp | 2 ++ lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp | 6 ++++++ lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp | 6 ++++++ lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h | 6 ++++++ lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp | 6 ++++++ test/MC/Mips/mips32r6/relocations.s | 12 ++++++++++++ test/MC/Mips/mips64r6/relocations.s | 12 ++++++++++++ 10 files changed, 56 insertions(+) diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h index f28adfa86526..ca5cecbef0a2 100644 --- a/include/llvm/MC/MCExpr.h +++ b/include/llvm/MC/MCExpr.h @@ -262,6 +262,8 @@ class MCSymbolRefExpr : public MCExpr { VK_Mips_GOT_LO16, VK_Mips_CALL_HI16, VK_Mips_CALL_LO16, + VK_Mips_PCREL_HI16, + VK_Mips_PCREL_LO16, VK_COFF_IMGREL32 // symbol@imgrel (image-relative) }; diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index ff5009f0a203..f724716eacd4 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -271,6 +271,8 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { case VK_Mips_GOT_LO16: return "GOT_LO16"; case VK_Mips_CALL_HI16: return "CALL_HI16"; case VK_Mips_CALL_LO16: return "CALL_LO16"; + case VK_Mips_PCREL_HI16: return "PCREL_HI16"; + case VK_Mips_PCREL_LO16: return "PCREL_LO16"; case VK_COFF_IMGREL32: return "IMGREL32"; } llvm_unreachable("Invalid variant kind"); diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index b9a0962603fb..86fd386967f9 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -1982,6 +1982,8 @@ MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) { .Case("call_lo", MCSymbolRefExpr::VK_Mips_CALL_LO16) .Case("higher", MCSymbolRefExpr::VK_Mips_HIGHER) .Case("highest", MCSymbolRefExpr::VK_Mips_HIGHEST) + .Case("pcrel_hi", MCSymbolRefExpr::VK_Mips_PCREL_HI16) + .Case("pcrel_lo", MCSymbolRefExpr::VK_Mips_PCREL_LO16) .Default(MCSymbolRefExpr::VK_None); assert(VK != MCSymbolRefExpr::VK_None); diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index cff6855d4a3f..8c797517e316 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -166,6 +166,8 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) { case MCSymbolRefExpr::VK_Mips_GOT_LO16: OS << "%got_lo("; break; case MCSymbolRefExpr::VK_Mips_CALL_HI16: OS << "%call_hi("; break; case MCSymbolRefExpr::VK_Mips_CALL_LO16: OS << "%call_lo("; break; + case MCSymbolRefExpr::VK_Mips_PCREL_HI16: OS << "%pcrel_hi("; break; + case MCSymbolRefExpr::VK_Mips_PCREL_LO16: OS << "%pcrel_lo("; break; } OS << SRE->getSymbol(); diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 048e6bdb0e7e..5375a00fd110 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -56,6 +56,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case Mips::fixup_MICROMIPS_GOT_PAGE: case Mips::fixup_MICROMIPS_GOT_OFST: case Mips::fixup_MICROMIPS_GOT_DISP: + case Mips::fixup_MIPS_PCLO16: break; case Mips::fixup_Mips_PC16: // So far we are only using this type for branches. @@ -80,6 +81,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case Mips::fixup_Mips_GOT_HI16: case Mips::fixup_Mips_CALL_HI16: case Mips::fixup_MICROMIPS_HI16: + case Mips::fixup_MIPS_PCHI16: // Get the 2nd 16-bits. Also add 1 if bit 15 is 1. Value = ((Value + 0x8000) >> 16) & 0xffff; break; @@ -247,6 +249,8 @@ getFixupKindInfo(MCFixupKind Kind) const { { "fixup_Mips_CALL_LO16", 0, 16, 0 }, { "fixup_MIPS_PC21_S2", 0, 21, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MIPS_PC26_S2", 0, 26, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_MIPS_PCHI16", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_MIPS_PCLO16", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MICROMIPS_26_S1", 0, 26, 0 }, { "fixup_MICROMIPS_HI16", 0, 16, 0 }, { "fixup_MICROMIPS_LO16", 0, 16, 0 }, @@ -306,6 +310,8 @@ getFixupKindInfo(MCFixupKind Kind) const { { "fixup_Mips_CALL_LO16", 16, 16, 0 }, { "fixup_MIPS_PC21_S2", 11, 21, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MIPS_PC26_S2", 6, 26, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_MIPS_PCHI16", 16, 16, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_MIPS_PCLO16", 16, 16, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MICROMIPS_26_S1", 6, 26, 0 }, { "fixup_MICROMIPS_HI16", 16, 16, 0 }, { "fixup_MICROMIPS_LO16", 16, 16, 0 }, diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index ef8a0910149d..74c12ff3428c 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -199,6 +199,12 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, case Mips::fixup_MIPS_PC26_S2: Type = ELF::R_MIPS_PC26_S2; break; + case Mips::fixup_MIPS_PCHI16: + Type = ELF::R_MIPS_PCHI16; + break; + case Mips::fixup_MIPS_PCLO16: + Type = ELF::R_MIPS_PCLO16; + break; } return Type; } diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h index a9c1656aa361..3079004f9ac6 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h +++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h @@ -134,6 +134,12 @@ namespace Mips { // resulting in - R_MIPS_PC26_S2 fixup_MIPS_PC26_S2, + // resulting in - R_MIPS_PCHI16 + fixup_MIPS_PCHI16, + + // resulting in - R_MIPS_PCLO16 + fixup_MIPS_PCLO16, + // resulting in - R_MICROMIPS_26_S1 fixup_MICROMIPS_26_S1, diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 3bef1cf6b1c8..85e0bf1569a7 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -480,6 +480,12 @@ getExprOpValue(const MCExpr *Expr,SmallVectorImpl &Fixups, case MCSymbolRefExpr::VK_Mips_CALL_LO16: FixupKind = Mips::fixup_Mips_CALL_LO16; break; + case MCSymbolRefExpr::VK_Mips_PCREL_HI16: + FixupKind = Mips::fixup_MIPS_PCHI16; + break; + case MCSymbolRefExpr::VK_Mips_PCREL_LO16: + FixupKind = Mips::fixup_MIPS_PCLO16; + break; } // switch Fixups.push_back(MCFixup::Create(0, Expr, MCFixupKind(FixupKind))); diff --git a/test/MC/Mips/mips32r6/relocations.s b/test/MC/Mips/mips32r6/relocations.s index 2e7663730ba0..4532e42cc53e 100644 --- a/test/MC/Mips/mips32r6/relocations.s +++ b/test/MC/Mips/mips32r6/relocations.s @@ -23,6 +23,14 @@ # CHECK-FIXUP: bc bar # encoding: [0b110010AA,A,A,A] # CHECK-FIXUP: # fixup A - offset: 0, # CHECK-FIXUP: value: bar, kind: fixup_MIPS_PC26_S2 +# CHECK-FIXUP: aluipc $2, %pcrel_hi(bar) # encoding: [0xec,0x5f,A,A] +# CHECK-FIXUP: # fixup A - offset: 0, +# CHECK-FIXUP: value: bar@PCREL_HI16, +# CHECK-FIXUP: kind: fixup_MIPS_PCHI16 +# CHECK-FIXUP: addiu $2, $2, %pcrel_lo(bar) # encoding: [0x24,0x42,A,A] +# CHECK-FIXUP: # fixup A - offset: 0, +# CHECK-FIXUP: value: bar@PCREL_LO16, +# CHECK-FIXUP: kind: fixup_MIPS_PCLO16 #------------------------------------------------------------------------------ # Check that the appropriate relocations were created. #------------------------------------------------------------------------------ @@ -33,6 +41,8 @@ # CHECK-ELF: 0xC R_MIPS_PC21_S2 bar 0x0 # CHECK-ELF: 0x10 R_MIPS_PC26_S2 bar 0x0 # CHECK-ELF: 0x14 R_MIPS_PC26_S2 bar 0x0 +# CHECK-ELF: 0x18 R_MIPS_PCHI16 bar 0x0 +# CHECK-ELF: 0x1C R_MIPS_PCLO16 bar 0x0 # CHECK-ELF: ] beqc $5, $6, bar @@ -41,3 +51,5 @@ bnezc $9, bar balc bar bc bar + aluipc $2, %pcrel_hi(bar) + addiu $2, $2, %pcrel_lo(bar) diff --git a/test/MC/Mips/mips64r6/relocations.s b/test/MC/Mips/mips64r6/relocations.s index 4ad0ae79232a..db8471565543 100644 --- a/test/MC/Mips/mips64r6/relocations.s +++ b/test/MC/Mips/mips64r6/relocations.s @@ -23,6 +23,14 @@ # CHECK-FIXUP: bc bar # encoding: [0b110010AA,A,A,A] # CHECK-FIXUP: # fixup A - offset: 0, # CHECK-FIXUP: value: bar, kind: fixup_MIPS_PC26_S2 +# CHECK-FIXUP: aluipc $2, %pcrel_hi(bar) # encoding: [0xec,0x5f,A,A] +# CHECK-FIXUP: # fixup A - offset: 0, +# CHECK-FIXUP: value: bar@PCREL_HI16, +# CHECK-FIXUP: kind: fixup_MIPS_PCHI16 +# CHECK-FIXUP: addiu $2, $2, %pcrel_lo(bar) # encoding: [0x24,0x42,A,A] +# CHECK-FIXUP: # fixup A - offset: 0, +# CHECK-FIXUP: value: bar@PCREL_LO16, +# CHECK-FIXUP: kind: fixup_MIPS_PCLO16 #------------------------------------------------------------------------------ # Check that the appropriate relocations were created. #------------------------------------------------------------------------------ @@ -33,6 +41,8 @@ # CHECK-ELF: 0xC R_MIPS_PC21_S2 bar 0x0 # CHECK-ELF: 0x10 R_MIPS_PC26_S2 bar 0x0 # CHECK-ELF: 0x14 R_MIPS_PC26_S2 bar 0x0 +# CHECK-ELF: 0x18 R_MIPS_PCHI16 bar 0x0 +# CHECK-ELF: 0x1C R_MIPS_PCLO16 bar 0x0 # CHECK-ELF: ] beqc $5, $6, bar @@ -41,3 +51,5 @@ bnezc $9, bar balc bar bc bar + aluipc $2, %pcrel_hi(bar) + addiu $2, $2, %pcrel_lo(bar) From 3f01f5296ec2d146f92bcc479a5bade9e898fc40 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Tue, 27 May 2014 15:57:51 +0000 Subject: [PATCH 179/906] [PATCH] Correct type used for VADD_SPLAT optimization on PowerPC In PPCISelLowering.cpp: PPCTargetLowering::LowerBUILD_VECTOR(), there is an optimization for certain patterns to generate one or two vector splats followed by a vector add or subtract. This operation is represented by a VADD_SPLAT in the selection DAG. Prior to this patch, it was possible for the VADD_SPLAT to be assigned the wrong data type, causing incorrect code generation. This patch corrects the problem. Specifically, the code previously assigned the value type of the BUILD_VECTOR node to the newly generated VADD_SPLAT node. This is correct much of the time, but not always. The problem is that the call to isConstantSplat() may return a SplatBitSize that is not the same as the number of bits in the original element vector type. The correct type to assign is a vector type with the same element bit size as SplatBitSize. The included test case shows an example of this, where the BUILD_VECTOR node has a type of v16i8. The vector to be built is {0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16}. isConstantSplat detects that we can generate a splat of 16 for type v8i16, which is the type we must assign to the VADD_SPLAT node. If we do not, we generate a vspltisb of 8 and a vaddubm, which generates the incorrect result {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16}. The correct code generation is a vspltish of 8 and a vadduhm. This patch also corrected code generation for CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll, which had been marked as an XFAIL, so we can remove the XFAIL from the test case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209662 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 12 ++++++++---- .../PowerPC/2008-07-10-SplatMiscompile.ll | 1 - test/CodeGen/PowerPC/splat-bug.ll | 18 ++++++++++++++++++ 3 files changed, 26 insertions(+), 5 deletions(-) create mode 100644 test/CodeGen/PowerPC/splat-bug.ll diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 214c86920609..cf4c9e61a58d 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5531,10 +5531,14 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // we convert to a pseudo that will be expanded later into one of // the above forms. SDValue Elt = DAG.getConstant(SextVal, MVT::i32); - EVT VT = Op.getValueType(); - int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4); - SDValue EltSize = DAG.getConstant(Size, MVT::i32); - return DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); + EVT VT = (SplatSize == 1 ? MVT::v16i8 : + (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32)); + SDValue EltSize = DAG.getConstant(SplatSize, MVT::i32); + SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); + if (VT == Op.getValueType()) + return RetVal; + else + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal); } // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is diff --git a/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll b/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll index 00a402e0e487..8802b97d2a6a 100644 --- a/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll +++ b/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll @@ -1,6 +1,5 @@ ; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vadduhm ; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsubuhm -; XFAIL: * define <4 x i32> @test() nounwind { ret <4 x i32> < i32 4293066722, i32 4293066722, i32 4293066722, i32 4293066722> diff --git a/test/CodeGen/PowerPC/splat-bug.ll b/test/CodeGen/PowerPC/splat-bug.ll new file mode 100644 index 000000000000..4b5250b259fa --- /dev/null +++ b/test/CodeGen/PowerPC/splat-bug.ll @@ -0,0 +1,18 @@ +; RUN: llc -mcpu=ppc64 -O0 -fast-isel=false < %s | FileCheck %s + +; Checks for a previous bug where vspltisb/vaddubm were issued in place +; of vsplitsh/vadduhm. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +@a = external global <16 x i8> + +define void @foo() nounwind ssp { +; CHECK: foo: + store <16 x i8> , <16 x i8>* @a +; CHECK: vspltish [[REG:[0-9]+]], 8 +; CHECK: vadduhm {{[0-9]+}}, [[REG]], [[REG]] + ret void +} + From be5c8baeb6c66415357fac9ccb3d9701cdfba5b6 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 27 May 2014 16:50:03 +0000 Subject: [PATCH 180/906] AArch64: add AArch64-specific test for 'c' and 'n'. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209664 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/AArch64/asm-large-immediate.ll | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 test/CodeGen/AArch64/asm-large-immediate.ll diff --git a/test/CodeGen/AArch64/asm-large-immediate.ll b/test/CodeGen/AArch64/asm-large-immediate.ll new file mode 100644 index 000000000000..05e4dddc7a7f --- /dev/null +++ b/test/CodeGen/AArch64/asm-large-immediate.ll @@ -0,0 +1,10 @@ +; RUN: llc -march=aarch64 -no-integrated-as < %s | FileCheck %s + +define void @test() { +entry: +; CHECK: /* result: 68719476738 */ + tail call void asm sideeffect "/* result: ${0:c} */", "i,~{dirflag},~{fpsr},~{flags}"( i64 68719476738 ) +; CHECK: /* result: -68719476738 */ + tail call void asm sideeffect "/* result: ${0:n} */", "i,~{dirflag},~{fpsr},~{flags}"( i64 68719476738 ) + ret void +} From fb26356bed7ca3ada126eae946e34b1b281642de Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 27 May 2014 16:50:09 +0000 Subject: [PATCH 181/906] AArch64: add test for NZCV cross-copy save. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209665 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/AArch64/nzcv-save.ll | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 test/CodeGen/AArch64/nzcv-save.ll diff --git a/test/CodeGen/AArch64/nzcv-save.ll b/test/CodeGen/AArch64/nzcv-save.ll new file mode 100644 index 000000000000..32baff3dbe64 --- /dev/null +++ b/test/CodeGen/AArch64/nzcv-save.ll @@ -0,0 +1,18 @@ +; RUN: llc -march=aarch64 < %s | FileCheck %s + +; CHECK: mrs [[NZCV_SAVE:x[0-9]+]], NZCV +; CHECK: msr NZCV, [[NZCV_SAVE]] + +; DAG ends up with two uses for the flags from an ADCS node, which means they +; must be saved for later. +define void @f(i256* nocapture %a, i256* nocapture %b, i256* nocapture %cc, i256* nocapture %dd) nounwind uwtable noinline ssp { +entry: + %c = load i256* %cc + %d = load i256* %dd + %add = add nsw i256 %c, %d + store i256 %add, i256* %a, align 8 + %or = or i256 %c, 1606938044258990275541962092341162602522202993782792835301376 + %add6 = add nsw i256 %or, %d + store i256 %add6, i256* %b, align 8 + ret void +} From 078862e67f226103d4368eeda8c0d8c9ac6766e7 Mon Sep 17 00:00:00 2001 From: Filipe Cabecinhas Date: Tue, 27 May 2014 16:54:33 +0000 Subject: [PATCH 182/906] Post-commit fixes for r209643 Detected by Daniel Jasper, Ilia Filippov, and Andrea Di Biagio Fixed the argument order to select (the mask semantics to blendv* are the inverse of select) and fixed the tests Added parenthesis to the assert condition Ran clang-format git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209667 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineCalls.cpp | 10 +++++++--- test/Transforms/InstCombine/blend_x86.ll | 13 ++++++------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index dda585294fa2..d4b583b3a7dd 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -734,10 +734,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { auto SelectorType = cast(Mask->getType()); auto EltTy = SelectorType->getElementType(); unsigned Size = SelectorType->getNumElements(); - unsigned BitWidth = EltTy->isFloatTy() ? 32 : (EltTy->isDoubleTy() ? 64 : EltTy->getIntegerBitWidth()); + unsigned BitWidth = + EltTy->isFloatTy() + ? 32 + : (EltTy->isDoubleTy() ? 64 : EltTy->getIntegerBitWidth()); assert((BitWidth == 64 || BitWidth == 32 || BitWidth == 8) && "Wrong arguments for variable blend intrinsic"); - SmallVector Selectors; + SmallVector Selectors; for (unsigned I = 0; I < Size; ++I) { // The intrinsics only read the top bit uint64_t Selector; @@ -748,7 +751,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Selectors.push_back(ConstantInt::get(Tyi1, Selector >> (BitWidth - 1))); } auto NewSelector = ConstantVector::get(Selectors); - return SelectInst::Create(NewSelector, II->getArgOperand(0), II->getArgOperand(1), "blendv"); + return SelectInst::Create(NewSelector, II->getArgOperand(1), + II->getArgOperand(0), "blendv"); } else { break; } diff --git a/test/Transforms/InstCombine/blend_x86.ll b/test/Transforms/InstCombine/blend_x86.ll index 6dbacf963ce4..778d44ba342c 100644 --- a/test/Transforms/InstCombine/blend_x86.ll +++ b/test/Transforms/InstCombine/blend_x86.ll @@ -2,42 +2,42 @@ define <2 x double> @constant_blendvpd(<2 x double> %xy, <2 x double> %ab) { ; CHECK-LABEL: @constant_blendvpd -; CHECK: select <2 x i1> +; CHECK: select <2 x i1> , <2 x double> %ab, <2 x double> %xy %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> ) ret <2 x double> %1 } define <4 x float> @constant_blendvps(<4 x float> %xyzw, <4 x float> %abcd) { ; CHECK-LABEL: @constant_blendvps -; CHECK: select <4 x i1> +; CHECK: select <4 x i1> , <4 x float> %abcd, <4 x float> %xyzw %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> ) ret <4 x float> %1 } define <16 x i8> @constant_pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd) { ; CHECK-LABEL: @constant_pblendvb -; CHECK: select <16 x i1> +; CHECK: select <16 x i1> , <16 x i8> %abcd, <16 x i8> %xyzw %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> ) ret <16 x i8> %1 } define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) { ; CHECK-LABEL: @constant_blendvpd_avx -; CHECK: select <4 x i1> +; CHECK: select <4 x i1> , <4 x double> %ab, <4 x double> %xy %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> ) ret <4 x double> %1 } define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) { ; CHECK-LABEL: @constant_blendvps_avx -; CHECK: select <8 x i1> +; CHECK: select <8 x i1> , <8 x float> %abcd, <8 x float> %xyzw %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> ) ret <8 x float> %1 } define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) { ; CHECK-LABEL: @constant_pblendvb_avx2 -; CHECK: select <32 x i1> +; CHECK: select <32 x i1> , <32 x i8> %abcd, <32 x i8> %xyzw %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd, <32 x i8> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x d declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) - From 991c9c1c8919c77342c61c6b8263cbd30501809d Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 27 May 2014 17:57:14 +0000 Subject: [PATCH 183/906] DebugInfo: Fix argument ordering in test by adding argument numbering. This old test didn't have the argument numbering that's now squirelled away in the high bits of the line number in the DW_TAG_arg_variable metadata. Add the numbering and update the test to ensure arguments are in-order. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209669 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../X86/dbg-value-inlined-parameter.ll | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll index 45281c92953e..b901711c2dcf 100644 --- a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll +++ b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll @@ -5,8 +5,20 @@ ; RUN: llc -mtriple=x86_64-apple-darwin < %s -filetype=obj -regalloc=basic \ ; RUN: | llvm-dwarfdump -debug-dump=info - | FileCheck --check-prefix=CHECK --check-prefix=DARWIN %s +; FIXME: This is both a concrete and abstract definition, which is +; incorrect. They should be separate +; CHECK: [[ABS:.*]]: DW_TAG_subprogram +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_name {{.*}} "foo" +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_high_pc +; CHECK: [[ABS_SP:.*]]: DW_TAG_formal_parameter +; CHECK-NEXT: DW_AT_name {{.*}} "sp" +; CHECK: [[ABS_NUMS:.*]]: DW_TAG_formal_parameter +; CHECK-NEXT: DW_AT_name {{.*}} "nums" + ;CHECK: DW_TAG_inlined_subroutine -;CHECK-NEXT: DW_AT_abstract_origin +;CHECK-NEXT: DW_AT_abstract_origin {{.*}}{[[ABS]]} ;CHECK-NEXT: DW_AT_low_pc [DW_FORM_addr] ;CHECK-NEXT: DW_AT_high_pc [DW_FORM_data4] ;CHECK-NEXT: DW_AT_call_file @@ -14,9 +26,11 @@ ;CHECK: DW_TAG_formal_parameter ;FIXME: Linux shouldn't drop this parameter either... -;LINUX-NOT: DW_TAG_formal_parameter +;FIXME: These parameters should have DW_AT_abstract_origin, instead of names. +;DARWIN-NEXT: DW_AT_name {{.*}} "sp" ;DARWIN: DW_TAG_formal_parameter -;DARWIN-NEXT: DW_AT_name {{.*}} "sp" +;CHECK-NEXT: DW_AT_name {{.*}} "nums" +;CHECK-NOT: DW_TAG_formal_parameter %struct.S1 = type { float*, i32 } @@ -62,7 +76,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !6 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"", i32 15, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, void ()* @foobar, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 15] [def] [scope 0] [foobar] !7 = metadata !{i32 786453, metadata !42, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !8 = metadata !{null} -!9 = metadata !{i32 786689, metadata !0, metadata !"sp", metadata !1, i32 7, metadata !10, i32 0, metadata !32} ; [ DW_TAG_arg_variable ] +!9 = metadata !{i32 786689, metadata !0, metadata !"sp", metadata !1, i32 16777223, metadata !10, i32 0, metadata !32} ; [ DW_TAG_arg_variable ] !10 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] !11 = metadata !{i32 786454, metadata !42, metadata !2, metadata !"S1", i32 4, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ] !12 = metadata !{i32 786451, metadata !42, metadata !2, metadata !"S1", i32 1, i64 128, i64 64, i32 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [S1] [line 1, size 128, align 64, offset 0] [def] [from ] @@ -71,7 +85,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !15 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] !16 = metadata !{i32 786468, null, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] !17 = metadata !{i32 786445, metadata !42, metadata !1, metadata !"nums", i32 3, i64 32, i64 32, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ] -!18 = metadata !{i32 786689, metadata !0, metadata !"nums", metadata !1, i32 7, metadata !5, i32 0, metadata !32} ; [ DW_TAG_arg_variable ] +!18 = metadata !{i32 786689, metadata !0, metadata !"nums", metadata !1, i32 33554439, metadata !5, i32 0, metadata !32} ; [ DW_TAG_arg_variable ] !19 = metadata !{i32 786484, i32 0, metadata !2, metadata !"p", metadata !"p", metadata !"", metadata !1, i32 14, metadata !11, i32 0, i32 1, %struct.S1* @p, null} ; [ DW_TAG_variable ] !20 = metadata !{i32 7, i32 13, metadata !0, null} !21 = metadata !{i32 7, i32 21, metadata !0, null} From 585644611e1f696bbd8f95a5eb75fc808b1cfdb6 Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Tue, 27 May 2014 18:00:00 +0000 Subject: [PATCH 184/906] Distribute sext/zext to the operands of and/or/xor This is an enhancement to SeparateConstOffsetFromGEP. With this patch, we can extract a constant offset from "s/zext and/or/xor A, B". Added a new test @ext_or to verify this enhancement. Refactoring the code, I also extracted some common logic to function Distributable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209670 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Scalar/SeparateConstOffsetFromGEP.cpp | 42 +++++++++++++------ .../NVPTX/split-gep.ll | 19 +++++++++ 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index ac3e7c4d74a1..b8529e174cad 100644 --- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -165,6 +165,10 @@ class ConstantOffsetExtractor { void ComputeKnownBits(Value *V, APInt &KnownOne, APInt &KnownZero) const; /// Finds the first use of Used in U. Returns -1 if not found. static unsigned FindFirstUse(User *U, Value *Used); + /// Returns whether OPC (sext or zext) can be distributed to the operands of + /// BO. e.g., sext can be distributed to the operands of an "add nsw" because + /// sext (add nsw a, b) == add nsw (sext a), (sext b). + static bool Distributable(unsigned OPC, BinaryOperator *BO); /// The path from the constant offset to the old GEP index. e.g., if the GEP /// index is "a * b + (c + 5)". After running function find, UserChain[0] will @@ -223,6 +227,25 @@ FunctionPass *llvm::createSeparateConstOffsetFromGEPPass() { return new SeparateConstOffsetFromGEP(); } +bool ConstantOffsetExtractor::Distributable(unsigned OPC, BinaryOperator *BO) { + assert(OPC == Instruction::SExt || OPC == Instruction::ZExt); + + // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B) + // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B) + if (BO->getOpcode() == Instruction::Add || + BO->getOpcode() == Instruction::Sub) { + return (OPC == Instruction::SExt && BO->hasNoSignedWrap()) || + (OPC == Instruction::ZExt && BO->hasNoUnsignedWrap()); + } + + // sext/zext (and/or/xor A, B) == and/or/xor (sext/zext A), (sext/zext B) + // -instcombine also leverages this invariant to do the reverse + // transformation to reduce integer casts. + return BO->getOpcode() == Instruction::And || + BO->getOpcode() == Instruction::Or || + BO->getOpcode() == Instruction::Xor; +} + int64_t ConstantOffsetExtractor::findInEitherOperand(User *U, bool IsSub) { assert(U->getNumOperands() == 2); int64_t ConstantOffset = find(U->getOperand(0)); @@ -273,21 +296,14 @@ int64_t ConstantOffsetExtractor::find(Value *V) { ConstantOffset = findInEitherOperand(U, false); break; } - case Instruction::SExt: { - // For safety, we trace into sext only when its operand is marked - // "nsw" because xxx.nsw guarantees no signed wrap. e.g., we can safely - // transform "sext (add nsw a, 5)" into "add nsw (sext a), 5". - if (BinaryOperator *BO = dyn_cast(U->getOperand(0))) { - if (BO->hasNoSignedWrap()) - ConstantOffset = find(U->getOperand(0)); - } - break; - } + case Instruction::SExt: case Instruction::ZExt: { - // Similarly, we trace into zext only when its operand is marked with - // "nuw" because zext (add nuw a, b) == add nuw (zext a), (zext b). + // We trace into sext/zext if the operator can be distributed to its + // operand. e.g., we can transform into "sext (add nsw a, 5)" and + // extract constant 5, because + // sext (add nsw a, 5) == add nsw (sext a), 5 if (BinaryOperator *BO = dyn_cast(U->getOperand(0))) { - if (BO->hasNoUnsignedWrap()) + if (Distributable(O->getOpcode(), BO)) ConstantOffset = find(U->getOperand(0)); } break; diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll index 320af5fd613f..42136d2b657e 100644 --- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll +++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll @@ -57,6 +57,25 @@ define float* @ext_add_no_overflow(i64 %a, i32 %b, i64 %c, i32 %d) { ; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[0-9]+}}, i64 %{{[0-9]+}} ; CHECK: getelementptr float* [[BASE_PTR]], i64 33 +; Similar to @ext_add_no_overflow, we should be able to trace into sext/zext if +; its operand is an "or" instruction. +define float* @ext_or(i64 %a, i32 %b) { +entry: + %b1 = shl i32 %b, 2 + %b2 = or i32 %b1, 1 + %b3 = or i32 %b1, 2 + %b2.ext = sext i32 %b2 to i64 + %b3.ext = sext i32 %b3 to i64 + %i = add i64 %a, %b2.ext + %j = add i64 %a, %b3.ext + %p = getelementptr inbounds [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %j + ret float* %p +} +; CHECK-LABEL: @ext_or +; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[0-9]+}}, i64 %{{[0-9]+}} +; CHECK: [[BASE_INT:%[0-9]+]] = ptrtoint float* [[BASE_PTR]] to i64 +; CHECK: add i64 [[BASE_INT]], 136 + ; We should treat "or" with no common bits (%k) as "add", and leave "or" with ; potentially common bits (%l) as is. define float* @or(i64 %i) { From 3efc250128c86a4109a24e02f1baf7fba6e168d9 Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Tue, 27 May 2014 18:12:55 +0000 Subject: [PATCH 185/906] Fixed a test in r209670 The test was outdated with r209537. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209671 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll index 42136d2b657e..2e50f5fd0cb1 100644 --- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll +++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll @@ -73,8 +73,7 @@ entry: } ; CHECK-LABEL: @ext_or ; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[0-9]+}}, i64 %{{[0-9]+}} -; CHECK: [[BASE_INT:%[0-9]+]] = ptrtoint float* [[BASE_PTR]] to i64 -; CHECK: add i64 [[BASE_INT]], 136 +; CHECK: getelementptr float* [[BASE_PTR]], i64 34 ; We should treat "or" with no common bits (%k) as "add", and leave "or" with ; potentially common bits (%l) as is. From 75325b9f658e1d61d129270ffa5d30bad655d3c4 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 27 May 2014 18:37:38 +0000 Subject: [PATCH 186/906] DebugInfo: Separate out the addition of subprogram attribute additions so that they can be added later depending on whether or not the function is inlined. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209673 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 24 +++++++++++++++--------- lib/CodeGen/AsmPrinter/DwarfUnit.h | 2 ++ 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 8d8c6181e1dd..c6e47ec07203 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1374,24 +1374,32 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // Construct the context before querying for the existence of the DIE in case // such construction creates the DIE (as is the case for member function // declarations). - DIScope Context = resolve(SP.getContext()); - DIE *ContextDIE = getOrCreateContextDIE(Context); + DIE *ContextDIE = getOrCreateContextDIE(resolve(SP.getContext())); if (DIE *SPDie = getDIE(SP)) return SPDie; - DIE *DeclDie = nullptr; - StringRef DeclLinkageName; if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { // Add subprogram definitions to the CU die directly. ContextDIE = &getUnitDie(); - DeclDie = getOrCreateSubprogramDIE(SPDecl); - DeclLinkageName = SPDecl.getLinkageName(); + // Build the decl now to ensure it preceeds the definition. + getOrCreateSubprogramDIE(SPDecl); } // DW_TAG_inlined_subroutine may refer to this DIE. DIE &SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP); + applySubprogramAttributes(SP, SPDie); + return &SPDie; +} + +void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) { + DIE *DeclDie = nullptr; + StringRef DeclLinkageName; + if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { + DeclDie = getOrCreateSubprogramDIE(SPDecl); + DeclLinkageName = SPDecl.getLinkageName(); + } // Add function template parameters. addTemplateParams(SPDie, SP.getTemplateParams()); @@ -1409,7 +1417,7 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // Refer to the function declaration where all the other attributes will be // found. addDIEEntry(SPDie, dwarf::DW_AT_specification, *DeclDie); - return &SPDie; + return; } // Constructors and operators for anonymous aggregates do not have names. @@ -1486,8 +1494,6 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) { if (SP.isExplicit()) addFlag(SPDie, dwarf::DW_AT_explicit); - - return &SPDie; } // Return const expression if value is a GEP to access merged global diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index e44f256c3a35..7025b712f0e7 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -401,6 +401,8 @@ class DwarfUnit { /// getOrCreateSubprogramDIE - Create new DIE using SP. DIE *getOrCreateSubprogramDIE(DISubprogram SP); + void applySubprogramAttributes(DISubprogram SP, DIE &SPDie); + /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the /// given DIType. DIE *getOrCreateTypeDIE(const MDNode *N); From def5a057976fe7ab21c8a6e6ccf1311d20971f4f Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 27 May 2014 18:37:43 +0000 Subject: [PATCH 187/906] DebugInfo: Lazily attach definition attributes to definitions. This is a precursor to fixing inlined debug info where the concrete, out-of-line definition may preceed any inlined usage. To cope with this, the attributes that may appear on the concrete definition or the abstract definition are delayed until the end of the module. Then, if an abstract definition was created, it is referenced (and no other attributes are added to the out-of-line definition), otherwise the attributes are added directly to the out-of-line definition. In a couple of cases this causes not just reordering of attributes, but reordering of types. When the creation of the attribute is delayed, if that creation would create a type (such as for a DW_AT_type attribute) then other top level DIEs may've been constructed during the delay, causing the referenced type to be created and added after those intervening DIEs. In the extreme case, in cross-cu-inlining.ll, this actually causes the DW_TAG_basic_type for "int" to move from one CU to another. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209674 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 21 +++++++++++++++++++ lib/CodeGen/AsmPrinter/DwarfDebug.h | 2 ++ lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 8 ++++++- test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll | 4 ++-- test/DebugInfo/X86/concrete_out_of_line.ll | 9 ++++---- .../X86/dbg-value-inlined-parameter.ll | 4 ++-- test/DebugInfo/X86/debug-info-blocks.ll | 14 ++++++++----- .../DebugInfo/X86/empty-and-one-elem-array.ll | 10 ++++----- test/DebugInfo/cross-cu-inlining.ll | 10 +++++---- .../namespace_function_definition.ll | 4 ++-- 10 files changed, 60 insertions(+), 26 deletions(-) diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 5c802f7a2ceb..6234f12dd2be 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -806,6 +806,25 @@ void DwarfDebug::beginModule() { SectionMap[Asm->getObjFileLowering().getTextSection()]; } +void DwarfDebug::finishSubprogramDefinitions() { + const Module *M = MMI->getModule(); + + NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); + for (MDNode *N : CU_Nodes->operands()) { + DICompileUnit TheCU(N); + // Construct subprogram DIE and add variables DIEs. + DwarfCompileUnit *SPCU = + static_cast(CUMap.lookup(TheCU)); + DIArray Subprograms = TheCU.getSubprograms(); + for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) { + DISubprogram SP(Subprograms.getElement(i)); + if (DIE *D = SPCU->getDIE(SP)) + SPCU->applySubprogramAttributes(SP, *D); + } + } +} + + // Collect info for variables that were optimized out. void DwarfDebug::collectDeadVariables() { const Module *M = MMI->getModule(); @@ -847,6 +866,8 @@ void DwarfDebug::finalizeModuleInfo() { // Collect info for variables that were optimized out. collectDeadVariables(); + finishSubprogramDefinitions(); + // Handle anything that needs to be done on a per-unit basis after // all other generation. for (const auto &TheU : getUnits()) { diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 1b0b1ebafd88..4a4d01246c23 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -389,6 +389,8 @@ class DwarfDebug : public AsmPrinterHandler { /// \brief Collect info for variables that were optimized out. void collectDeadVariables(); + void finishSubprogramDefinitions(); + /// \brief Finish off debug information after all functions have been /// processed. void finalizeModuleInfo(); diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index c6e47ec07203..2707f8b73d84 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1389,6 +1389,11 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // DW_TAG_inlined_subroutine may refer to this DIE. DIE &SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP); + // Abort here and fill this in later, depending on whether or not this + // subprogram turns out to have inlined instances or not. + if (SP.isDefinition()) + return &SPDie; + applySubprogramAttributes(SP, SPDie); return &SPDie; } @@ -1397,7 +1402,8 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) { DIE *DeclDie = nullptr; StringRef DeclLinkageName; if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { - DeclDie = getOrCreateSubprogramDIE(SPDecl); + DeclDie = getDIE(SPDecl); + assert(DeclDie); DeclLinkageName = SPDecl.getLinkageName(); } diff --git a/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll b/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll index e81667f6793c..5f7cb696d738 100644 --- a/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll +++ b/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll @@ -10,10 +10,10 @@ ; Check that the subprogram inside the class definition has low_pc, only ; attached to the definition. ; CHECK: [[FOO_INL:0x........]]: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_MIPS_linkage_name {{.*}} "_ZZN1B2fnEvEN1A3fooEv" -; CHECK-NOT: NULL ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_low_pc +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_ZZN1B2fnEvEN1A3fooEv" ; And just double check that there's no out of line definition that references ; this subprogram. ; CHECK-NOT: DW_AT_specification {{.*}} {[[FOO_INL]]} diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll index a8bf7ca5f9ee..5d4d5802bd1a 100644 --- a/test/DebugInfo/X86/concrete_out_of_line.ll +++ b/test/DebugInfo/X86/concrete_out_of_line.ll @@ -34,17 +34,16 @@ ; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[D2_ABS:0x........]]} ; CHECK: [[D1_ABS]]: DW_TAG_subprogram +; CHECK-NEXT: DW_AT_inline ; CHECK-NEXT: DW_AT_{{.*}}linkage_name ; CHECK-NEXT: DW_AT_specification {{.*}} {[[DTOR_DECL]]} -; CHECK-NEXT: DW_AT_inline -; CHECK-NOT: DW_AT_inline -; CHECK-NOT: DW_TAG +; CHECK-NOT: DW_AT ; CHECK: [[D1_THIS_ABS:0x........]]: DW_TAG_formal_parameter ; CHECK: [[D2_ABS]]: DW_TAG_subprogram +; CHECK-NEXT: DW_AT_inline ; CHECK-NEXT: DW_AT_{{.*}}linkage_name ; CHECK-NEXT: DW_AT_specification {{.*}} {[[DTOR_DECL]]} -; CHECK-NEXT: DW_AT_inline -; CHECK-NOT: DW_AT_inline +; CHECK-NOT: DW_AT ; CHECK: DW_TAG ; and then that a TAG_subprogram refers to it with AT_abstract_origin. diff --git a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll index b901711c2dcf..74b2f8bc338c 100644 --- a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll +++ b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll @@ -9,9 +9,9 @@ ; incorrect. They should be separate ; CHECK: [[ABS:.*]]: DW_TAG_subprogram ; CHECK-NOT: DW_TAG -; CHECK: DW_AT_name {{.*}} "foo" -; CHECK-NOT: DW_TAG ; CHECK: DW_AT_high_pc +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_name {{.*}} "foo" ; CHECK: [[ABS_SP:.*]]: DW_TAG_formal_parameter ; CHECK-NEXT: DW_AT_name {{.*}} "sp" ; CHECK: [[ABS_NUMS:.*]]: DW_TAG_formal_parameter diff --git a/test/DebugInfo/X86/debug-info-blocks.ll b/test/DebugInfo/X86/debug-info-blocks.ll index b2531f647083..5feab2477239 100644 --- a/test/DebugInfo/X86/debug-info-blocks.ll +++ b/test/DebugInfo/X86/debug-info-blocks.ll @@ -6,16 +6,20 @@ ; test that the DW_AT_location of self is at ( fbreg +{{[0-9]+}}, deref, +{{[0-9]+}} ) ; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name{{.*}}_block_invoke +; CHECK: DW_TAG_subprogram +; CHECK-NOT: DW_TAG ; CHECK: DW_AT_object_pointer +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_name{{.*}}_block_invoke -; CHECK-NOT: DW_TAG_subprogram +; CHECK-NOT: {{DW_TAG|NULL}} ; CHECK: DW_TAG_formal_parameter -; CHECK-NEXT: DW_AT_name{{.*}}.block_descriptor +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_name{{.*}}.block_descriptor ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_location -; CHECK-NOT: DW_TAG_subprogram +; CHECK-NOT: {{DW_TAG|NULL}} ; CHECK: DW_TAG_variable ; CHECK-NEXT: DW_AT_name{{.*}}"self" ; CHECK-NOT: DW_TAG @@ -31,7 +35,7 @@ ; CHECK: [[A:.*]]: DW_TAG_structure_type ; CHECK-NEXT: DW_AT_APPLE_objc_complete_type ; CHECK-NEXT: DW_AT_name{{.*}}"A" -; CHECK: [[APTR]]: DW_TAG_pointer_type [5] +; CHECK: [[APTR]]: DW_TAG_pointer_type ; CHECK-NEXT: {[[A]]} diff --git a/test/DebugInfo/X86/empty-and-one-elem-array.ll b/test/DebugInfo/X86/empty-and-one-elem-array.ll index f5c37df1e5e8..974bd7347951 100644 --- a/test/DebugInfo/X86/empty-and-one-elem-array.ll +++ b/test/DebugInfo/X86/empty-and-one-elem-array.ll @@ -28,11 +28,6 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone ; An empty array should not have an AT_upper_bound attribute. But an array of 1 ; should. -; CHECK: DW_TAG_base_type -; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ( .debug_str[{{.*}}] = "int") -; CHECK-NEXT: DW_AT_encoding [DW_FORM_data1] (0x05) -; CHECK-NEXT: DW_AT_byte_size [DW_FORM_data1] (0x04) - ; int foo::b[1]: ; CHECK: DW_TAG_structure_type ; CHECK: DW_AT_name{{.*}}"foo" @@ -41,6 +36,11 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone ; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ( .debug_str[{{.*}}] = "b") ; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] +; CHECK: DW_TAG_base_type +; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ( .debug_str[{{.*}}] = "int") +; CHECK-NEXT: DW_AT_encoding [DW_FORM_data1] (0x05) +; CHECK-NEXT: DW_AT_byte_size [DW_FORM_data1] (0x04) + ; int[1]: ; CHECK: DW_TAG_array_type [{{.*}}] * ; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] diff --git a/test/DebugInfo/cross-cu-inlining.ll b/test/DebugInfo/cross-cu-inlining.ll index 44a1a5850693..6e0378d57f0b 100644 --- a/test/DebugInfo/cross-cu-inlining.ll +++ b/test/DebugInfo/cross-cu-inlining.ll @@ -23,13 +23,11 @@ ; CHECK: DW_TAG_compile_unit ; CHECK: DW_AT_name {{.*}} "a.cpp" ; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_type [DW_FORM_ref_addr] (0x00000000[[INT:.*]]) ; CHECK: DW_TAG_inlined_subroutine ; CHECK-NEXT: DW_AT_abstract_origin {{.*}}[[ABS_FUNC:........]]) ; CHECK: DW_TAG_formal_parameter ; CHECK-NEXT: DW_AT_abstract_origin {{.*}}[[ABS_VAR:........]]) -; CHECK: 0x[[INT:.*]]: DW_TAG_base_type -; CHECK-NOT: DW_TAG -; CHECK: DW_AT_name {{.*}} "int" ; Check the abstract definition is in the 'b.cpp' CU and doesn't contain any ; concrete information (address range or variable location) @@ -40,9 +38,13 @@ ; CHECK: 0x[[ABS_VAR]]: DW_TAG_formal_parameter ; CHECK-NOT: DW_TAG ; CHECK-NOT: DW_AT_location -; CHECK: DW_AT_type [DW_FORM_ref_addr] (0x00000000[[INT]]) +; CHECK: DW_AT_type [DW_FORM_ref4] {{.*}} {0x[[INT]]} ; CHECK-NOT: DW_AT_location +; CHECK: 0x[[INT]]: DW_TAG_base_type +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_name {{.*}} "int" + ; Check the concrete out of line definition references the abstract and ; provides the address range and variable location ; CHECK: DW_TAG_subprogram diff --git a/test/DebugInfo/namespace_function_definition.ll b/test/DebugInfo/namespace_function_definition.ll index 15f39fd6d9d8..590f2b301ffe 100644 --- a/test/DebugInfo/namespace_function_definition.ll +++ b/test/DebugInfo/namespace_function_definition.ll @@ -12,9 +12,9 @@ ; CHECK-NEXT: DW_AT_name {{.*}} "ns" ; CHECK: DW_TAG_subprogram ; CHECK-NOT: DW_TAG -; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_ZN2ns4funcEv" -; CHECK-NOT: DW_TAG ; CHECK: DW_AT_low_pc +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_ZN2ns4funcEv" ; CHECK: NULL ; CHECK: NULL From 254d093f991287bea06e47c27ef2e14db36b1c59 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 27 May 2014 18:37:48 +0000 Subject: [PATCH 188/906] DebugInfo: Lazily construct subprogram definition DIEs. A further step to correctly emitting concrete out of line definitions preceeding inlined instances of the same program. To do this, emission of subprograms must be delayed until required since we don't know which (abstract only (if there's no out of line definition), concrete only (if there are no inlined instances), or both) DIEs are required at the start of the module. To reduce the test churn in the following commit that actually fixes the bug, this commit introduces the lazy DIE construction and cleans up test cases that are impacted by the changes in the resulting DIE ordering. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209675 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 32 +++++++++++------ test/DebugInfo/X86/concrete_out_of_line.ll | 31 ++++++++--------- test/DebugInfo/X86/debug-info-blocks.ll | 8 +++-- test/DebugInfo/X86/inline-member-function.ll | 17 +++++---- test/DebugInfo/X86/inline-seldag-test.ll | 7 ++-- test/DebugInfo/X86/sret.ll | 4 +-- test/DebugInfo/debug-info-qualifiers.ll | 2 -- test/DebugInfo/namespace.ll | 36 +++++++++----------- test/DebugInfo/varargs.ll | 14 ++++---- test/Linker/type-unique-odr-a.ll | 12 +++---- 10 files changed, 88 insertions(+), 75 deletions(-) diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 6234f12dd2be..421cdbd95fa2 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -314,7 +314,7 @@ bool DwarfDebug::isSubprogramContext(const MDNode *Context) { // scope then create and insert DIEs for these variables. DIE &DwarfDebug::updateSubprogramScopeDIE(DwarfCompileUnit &SPCU, DISubprogram SP) { - DIE *SPDie = SPCU.getDIE(SP); + DIE *SPDie = SPCU.getOrCreateSubprogramDIE(SP); assert(SPDie && "Unable to find subprogram DIE!"); @@ -525,15 +525,18 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU, DISubprogram SP(Scope->getScopeNode()); - if (!ProcessedSPNodes.insert(SP)) + DIE *&AbsDef = AbstractSPDies[SP]; + if (AbsDef) return; // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram // was inlined from another compile unit. DwarfCompileUnit &SPCU = *SPMap[SP]; - DIE *AbsDef = SPCU.getDIE(SP); - assert(AbsDef); - AbstractSPDies.insert(std::make_pair(SP, AbsDef)); + AbsDef = SPCU.getOrCreateSubprogramDIE(SP); + + if (!ProcessedSPNodes.insert(SP)) + return; + SPCU.addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); createAndAddScopeChildren(SPCU, Scope, *AbsDef); } @@ -781,7 +784,7 @@ void DwarfDebug::beginModule() { CU.createGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i))); DIArray SPs = CUNode.getSubprograms(); for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) - constructSubprogramDIE(CU, SPs.getElement(i)); + SPMap.insert(std::make_pair(SPs.getElement(i), &CU)); DIArray EnumTypes = CUNode.getEnumTypes(); for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) CU.getOrCreateTypeDIE(EnumTypes.getElement(i)); @@ -818,8 +821,17 @@ void DwarfDebug::finishSubprogramDefinitions() { DIArray Subprograms = TheCU.getSubprograms(); for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) { DISubprogram SP(Subprograms.getElement(i)); - if (DIE *D = SPCU->getDIE(SP)) - SPCU->applySubprogramAttributes(SP, *D); + // Perhaps the subprogram is in another CU (such as due to comdat + // folding, etc), in which case ignore it here. + if (SPMap[SP] != SPCU) + continue; + DIE *D = SPCU->getDIE(SP); + if (!D) + // Lazily construct the subprogram if we didn't see either concrete or + // inlined versions during codegen. + D = SPCU->getOrCreateSubprogramDIE(SP); + SPCU->applySubprogramAttributes(SP, *D); + SPCU->addGlobalName(SP.getName(), *D, resolve(SP.getContext())); } } } @@ -863,11 +875,11 @@ void DwarfDebug::collectDeadVariables() { } void DwarfDebug::finalizeModuleInfo() { + finishSubprogramDefinitions(); + // Collect info for variables that were optimized out. collectDeadVariables(); - finishSubprogramDefinitions(); - // Handle anything that needs to be done on a per-unit basis after // all other generation. for (const auto &TheU : getUnits()) { diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll index 5d4d5802bd1a..5d9f6a5779bb 100644 --- a/test/DebugInfo/X86/concrete_out_of_line.ll +++ b/test/DebugInfo/X86/concrete_out_of_line.ll @@ -14,6 +14,19 @@ ; CHECK: [[RELEASE_DECL:0x........]]: DW_TAG_subprogram ; CHECK: [[DTOR_DECL:0x........]]: DW_TAG_subprogram +; CHECK: [[D2_ABS:.*]]: DW_TAG_subprogram +; CHECK-NEXT: DW_AT_inline +; CHECK-NEXT: DW_AT_{{.*}}linkage_name {{.*}}D2 +; CHECK-NEXT: DW_AT_specification {{.*}} {[[DTOR_DECL]]} +; CHECK-NOT: DW_AT +; CHECK: DW_TAG +; CHECK: [[D1_ABS:.*]]: DW_TAG_subprogram +; CHECK-NEXT: DW_AT_inline +; CHECK-NEXT: DW_AT_{{.*}}linkage_name {{.*}}D1 +; CHECK-NEXT: DW_AT_specification {{.*}} {[[DTOR_DECL]]} +; CHECK-NOT: DW_AT +; CHECK: [[D1_THIS_ABS:.*]]: DW_TAG_formal_parameter + ; CHECK: [[RELEASE:0x........]]: DW_TAG_subprogram ; CHECK: DW_AT_specification {{.*}} {[[RELEASE_DECL]]} ; CHECK: DW_TAG_formal_parameter @@ -27,28 +40,14 @@ ; CHECK-NOT: NULL ; CHECK-NOT: DW_TAG ; CHECK: DW_TAG_inlined_subroutine -; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[D1_ABS:0x........]]} +; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[D1_ABS]]} ; CHECK-NOT: NULL ; CHECK-NOT: DW_TAG ; CHECK: DW_TAG_inlined_subroutine -; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[D2_ABS:0x........]]} - -; CHECK: [[D1_ABS]]: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_inline -; CHECK-NEXT: DW_AT_{{.*}}linkage_name -; CHECK-NEXT: DW_AT_specification {{.*}} {[[DTOR_DECL]]} -; CHECK-NOT: DW_AT -; CHECK: [[D1_THIS_ABS:0x........]]: DW_TAG_formal_parameter -; CHECK: [[D2_ABS]]: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_inline -; CHECK-NEXT: DW_AT_{{.*}}linkage_name -; CHECK-NEXT: DW_AT_specification {{.*}} {[[DTOR_DECL]]} -; CHECK-NOT: DW_AT -; CHECK: DW_TAG +; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[D2_ABS]]} ; and then that a TAG_subprogram refers to it with AT_abstract_origin. -; CHECK: DW_TAG_subprogram ; CHECK: DW_TAG_subprogram ; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[D1_ABS]]} ; CHECK: DW_TAG_formal_parameter diff --git a/test/DebugInfo/X86/debug-info-blocks.ll b/test/DebugInfo/X86/debug-info-blocks.ll index 5feab2477239..430c1575816a 100644 --- a/test/DebugInfo/X86/debug-info-blocks.ll +++ b/test/DebugInfo/X86/debug-info-blocks.ll @@ -5,6 +5,11 @@ ; rdar://problem/9279956 ; test that the DW_AT_location of self is at ( fbreg +{{[0-9]+}}, deref, +{{[0-9]+}} ) +; CHECK: [[A:.*]]: DW_TAG_structure_type +; CHECK-NEXT: DW_AT_APPLE_objc_complete_type +; CHECK-NEXT: DW_AT_name{{.*}}"A" + +; CHECK: DW_TAG_subprogram ; CHECK: DW_TAG_subprogram ; CHECK: DW_TAG_subprogram ; CHECK-NOT: DW_TAG @@ -32,9 +37,6 @@ ; 0x91 = DW_OP_fbreg ; CHECK: DW_AT_location{{.*}}91 {{[0-9]+}} 06 23 {{[0-9]+}} ) -; CHECK: [[A:.*]]: DW_TAG_structure_type -; CHECK-NEXT: DW_AT_APPLE_objc_complete_type -; CHECK-NEXT: DW_AT_name{{.*}}"A" ; CHECK: [[APTR]]: DW_TAG_pointer_type ; CHECK-NEXT: {[[A]]} diff --git a/test/DebugInfo/X86/inline-member-function.ll b/test/DebugInfo/X86/inline-member-function.ll index 4a4a19c19130..3dc6043bf36c 100644 --- a/test/DebugInfo/X86/inline-member-function.ll +++ b/test/DebugInfo/X86/inline-member-function.ll @@ -13,21 +13,24 @@ ; return foo().func(i); ; } +; CHECK: DW_TAG_structure_type +; CHECK: DW_TAG_subprogram + +; But make sure we emit DW_AT_object_pointer on the abstract definition. +; CHECK: [[ABSTRACT_ORIGIN:.*]]: DW_TAG_subprogram +; CHECK-NOT: NULL +; CHECK-NOT: TAG +; CHECK: DW_AT_object_pointer + ; Ensure we omit DW_AT_object_pointer on inlined subroutines. ; CHECK: DW_TAG_inlined_subroutine -; CHECK-NEXT: DW_AT_abstract_origin {{.*}}{[[ABSTRACT_ORIGIN:0x[0-9a-e]*]]} +; CHECK-NEXT: DW_AT_abstract_origin {{.*}}{[[ABSTRACT_ORIGIN]]} ; CHECK-NOT: NULL ; CHECK-NOT: DW_AT_object_pointer ; CHECK: DW_TAG_formal_parameter ; CHECK-NOT: DW_AT_artificial ; CHECK: DW_TAG -; But make sure we emit DW_AT_object_pointer on the abstract definition. -; CHECK: [[ABSTRACT_ORIGIN]]: DW_TAG_subprogram -; CHECK-NOT: NULL -; CHECK-NOT: TAG -; CHECK: DW_AT_object_pointer - %struct.foo = type { i8 } @i = global i32 0, align 4 diff --git a/test/DebugInfo/X86/inline-seldag-test.ll b/test/DebugInfo/X86/inline-seldag-test.ll index f139140ee758..615f03a2ad28 100644 --- a/test/DebugInfo/X86/inline-seldag-test.ll +++ b/test/DebugInfo/X86/inline-seldag-test.ll @@ -11,12 +11,13 @@ ; x = f(x); ; } -; CHECK: DW_TAG_inlined_subroutine -; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[F:0x.*]]} -; CHECK: [[F]]: DW_TAG_subprogram +; CHECK: [[F:.*]]: DW_TAG_subprogram ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_name {{.*}} "f" +; CHECK: DW_TAG_inlined_subroutine +; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[F]]} + ; Make sure the condition test is attributed to the inline function, not the ; location of the test's operands within the caller. diff --git a/test/DebugInfo/X86/sret.ll b/test/DebugInfo/X86/sret.ll index 004632814c2c..fed4334c27f5 100644 --- a/test/DebugInfo/X86/sret.ll +++ b/test/DebugInfo/X86/sret.ll @@ -3,8 +3,8 @@ ; Based on the debuginfo-tests/sret.cpp code. -; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8] (0xc68148e4333befda) -; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8] (0xc68148e4333befda) +; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8] (0x72aabf538392d298) +; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8] (0x72aabf538392d298) %class.A = type { i32 (...)**, i32 } %class.B = type { i8 } diff --git a/test/DebugInfo/debug-info-qualifiers.ll b/test/DebugInfo/debug-info-qualifiers.ll index 2aea73607076..b624d3874cb3 100644 --- a/test/DebugInfo/debug-info-qualifiers.ll +++ b/test/DebugInfo/debug-info-qualifiers.ll @@ -21,8 +21,6 @@ ; CHECK-NEXT: DW_AT_rvalue_reference DW_FORM_flag_present ; ; CHECK: DW_TAG_subprogram -; -; CHECK: DW_TAG_subprogram ; CHECK-NOT: DW_TAG_subprogram ; CHECK: DW_AT_name {{.*}}"l" ; CHECK-NOT: DW_TAG_subprogram diff --git a/test/DebugInfo/namespace.ll b/test/DebugInfo/namespace.ll index ca5cf808d180..a9de62c39062 100644 --- a/test/DebugInfo/namespace.ll +++ b/test/DebugInfo/namespace.ll @@ -16,27 +16,24 @@ ; CHECK: [[I:0x[0-9a-f]*]]:{{ *}}DW_TAG_variable ; CHECK-NEXT: DW_AT_name{{.*}}= "i" ; CHECK-NOT: NULL -; CHECK: DW_TAG_subprogram +; CHECK: [[FOO:0x[0-9a-f]*]]:{{ *}}DW_TAG_structure_type +; CHECK-NEXT: DW_AT_name{{.*}}= "foo" +; CHECK-NEXT: DW_AT_declaration +; CHECK-NOT: NULL +; CHECK: [[BAR:0x[0-9a-f]*]]:{{ *}}DW_TAG_structure_type +; CHECK-NEXT: DW_AT_name{{.*}}= "bar" +; CHECK: NULL +; CHECK: [[FUNC1:.*]]: DW_TAG_subprogram ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_MIPS_linkage_name ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_name{{.*}}= "f1" -; CHECK: [[FUNC1:0x[0-9a-f]*]]:{{ *}}DW_TAG_subprogram +; CHECK: DW_TAG_subprogram ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_MIPS_linkage_name ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_name{{.*}}= "f1" ; CHECK: NULL -; CHECK-NOT: NULL -; CHECK: [[FOO:0x[0-9a-f]*]]:{{ *}}DW_TAG_structure_type -; CHECK-NEXT: DW_AT_name{{.*}}= "foo" -; CHECK-NEXT: DW_AT_declaration -; CHECK-NOT: NULL -; CHECK: [[BAR:0x[0-9a-f]*]]:{{ *}}DW_TAG_structure_type -; CHECK-NEXT: DW_AT_name{{.*}}= "bar" -; CHECK: NULL -; CHECK: NULL -; CHECK: NULL ; CHECK-NOT: NULL ; CHECK: DW_TAG_imported_module @@ -48,6 +45,13 @@ ; CHECK: NULL ; CHECK-NOT: NULL +; CHECK: DW_TAG_imported_module +; Same bug as above, this should be F2, not F1 +; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F1]]) +; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x0b) +; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS1]]}) +; CHECK-NOT: NULL + ; CHECK: DW_TAG_subprogram ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_MIPS_linkage_name @@ -99,13 +103,7 @@ ; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS2]]}) ; CHECK: NULL ; CHECK: NULL -; CHECK-NOT: NULL - -; CHECK: DW_TAG_imported_module -; Same bug as above, this should be F2, not F1 -; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F1]]) -; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x0b) -; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS1]]}) +; CHECK: NULL ; CHECK: file_names[ [[F1]]]{{.*}}debug-info-namespace.cpp ; CHECK: file_names[ [[F2]]]{{.*}}foo.cpp diff --git a/test/DebugInfo/varargs.ll b/test/DebugInfo/varargs.ll index a32741426194..ddfcd858f539 100644 --- a/test/DebugInfo/varargs.ll +++ b/test/DebugInfo/varargs.ll @@ -13,25 +13,25 @@ ; ; CHECK: DW_TAG_subprogram ; CHECK-NOT: DW_TAG -; CHECK: DW_AT_name {{.*}} "b" +; CHECK: DW_AT_name {{.*}} "a" +; CHECK-NOT: DW_TAG +; CHECK: DW_TAG_formal_parameter ; CHECK-NOT: DW_TAG ; CHECK: DW_TAG_formal_parameter ; CHECK-NOT: DW_TAG ; CHECK: DW_TAG_unspecified_parameters ; -; Variadic C++ member function. -; struct A { void a(int c, ...); } -; ; CHECK: DW_TAG_subprogram ; CHECK-NOT: DW_TAG -; CHECK: DW_AT_name {{.*}} "a" -; CHECK-NOT: DW_TAG -; CHECK: DW_TAG_formal_parameter +; CHECK: DW_AT_name {{.*}} "b" ; CHECK-NOT: DW_TAG ; CHECK: DW_TAG_formal_parameter ; CHECK-NOT: DW_TAG ; CHECK: DW_TAG_unspecified_parameters ; +; Variadic C++ member function. +; struct A { void a(int c, ...); } +; ; Variadic function pointer. ; void (*fptr)(int, ...); ; diff --git a/test/Linker/type-unique-odr-a.ll b/test/Linker/type-unique-odr-a.ll index 54befb75ba45..91c80339ec03 100644 --- a/test/Linker/type-unique-odr-a.ll +++ b/test/Linker/type-unique-odr-a.ll @@ -22,12 +22,6 @@ ; return A().getFoo(); ; } ; -; CHECK: DW_TAG_subprogram -; CHECK-NOT: DW_TAG -; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_Z3bazv" -; CHECK: DW_TAG_subprogram -; CHECK-NOT: DW_TAG -; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_ZL3barv" ; CHECK: DW_TAG_class_type ; CHECK-NEXT: DW_AT_name {{.*}} "A" ; CHECK-NOT: DW_TAG @@ -39,6 +33,12 @@ ; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_ZN1A6getFooEv" ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_name {{.*}} "getFoo" +; CHECK: DW_TAG_subprogram +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_Z3bazv" +; CHECK: DW_TAG_subprogram +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_ZL3barv" ; getFoo and A may only appear once. ; CHECK-NOT: {{(getFoo)|("A")}} From 0fb9226470d8b5f4da5f08d84adc75849f7cc920 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 27 May 2014 18:37:51 +0000 Subject: [PATCH 189/906] DebugInfo: Avoid an extra map lookup when finding abstract subprogram DIEs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209676 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 421cdbd95fa2..c28c20d68a7a 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -435,7 +435,7 @@ DwarfDebug::constructInlinedScopeDIE(DwarfCompileUnit &TheCU, DISubprogram InlinedSP = getDISubprogram(DS); // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram // was inlined from another compile unit. - DIE *OriginDIE = SPMap[InlinedSP]->getDIE(InlinedSP); + DIE *OriginDIE = AbstractSPDies[InlinedSP]; assert(OriginDIE && "Unable to find original DIE for an inlined subprogram."); auto ScopeDIE = make_unique(dwarf::DW_TAG_inlined_subroutine); From 41087d99df6ed87405639a6ee1a4bc7b825d4b65 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 27 May 2014 18:37:55 +0000 Subject: [PATCH 190/906] DebugInfo: Create abstract function definitions even when concrete definitions preceed inline definitions. After much puppetry, here's the major piece of the work to ensure that even when a concrete definition preceeds all inline definitions, an abstract definition is still created and referenced from both concrete and inline definitions. Variables are still broken in this case (see comment in dbg-value-inlined-parameter.ll test case) and will be addressed in follow up work. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209677 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 81 ++++++++----------- lib/CodeGen/AsmPrinter/DwarfDebug.h | 3 - test/DebugInfo/X86/concrete_out_of_line.ll | 10 ++- .../X86/dbg-value-inlined-parameter.ll | 21 ++--- test/DebugInfo/cross-cu-inlining.ll | 13 ++- 5 files changed, 62 insertions(+), 66 deletions(-) diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index c28c20d68a7a..c0badde3685a 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -316,20 +316,6 @@ DIE &DwarfDebug::updateSubprogramScopeDIE(DwarfCompileUnit &SPCU, DISubprogram SP) { DIE *SPDie = SPCU.getOrCreateSubprogramDIE(SP); - assert(SPDie && "Unable to find subprogram DIE!"); - - // If we're updating an abstract DIE, then we will be adding the children and - // object pointer later on. But what we don't want to do is process the - // concrete DIE twice. - if (DIE *AbsSPDIE = AbstractSPDies.lookup(SP)) { - assert(SPDie == AbsSPDIE); - // Pick up abstract subprogram DIE. - SPDie = &SPCU.createAndAddDIE( - dwarf::DW_TAG_subprogram, - *SPCU.getOrCreateContextDIE(resolve(SP.getContext()))); - SPCU.addDIEEntry(*SPDie, dwarf::DW_AT_abstract_origin, *AbsSPDIE); - } - attachLowHighPC(SPCU, *SPDie, FunctionBeginSym, FunctionEndSym); const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); @@ -525,6 +511,8 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU, DISubprogram SP(Scope->getScopeNode()); + ProcessedSPNodes.insert(SP); + DIE *&AbsDef = AbstractSPDies[SP]; if (AbsDef) return; @@ -532,10 +520,24 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU, // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram // was inlined from another compile unit. DwarfCompileUnit &SPCU = *SPMap[SP]; - AbsDef = SPCU.getOrCreateSubprogramDIE(SP); + DIE *ContextDIE; + + // Some of this is duplicated from DwarfUnit::getOrCreateSubprogramDIE, with + // the important distinction that the DIDescriptor is not associated with the + // DIE (since the DIDescriptor will be associated with the concrete DIE, if + // any). It could be refactored to some common utility function. + if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { + ContextDIE = &SPCU.getUnitDie(); + SPCU.getOrCreateSubprogramDIE(SPDecl); + } else + ContextDIE = SPCU.getOrCreateContextDIE(resolve(SP.getContext())); - if (!ProcessedSPNodes.insert(SP)) - return; + // Passing null as the associated DIDescriptor because the abstract definition + // shouldn't be found by lookup. + AbsDef = &SPCU.createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, + DIDescriptor()); + SPCU.applySubprogramAttributes(SP, *AbsDef); + SPCU.addGlobalName(SP.getName(), *AbsDef, resolve(SP.getContext())); SPCU.addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); createAndAddScopeChildren(SPCU, Scope, *AbsDef); @@ -686,28 +688,6 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) { return NewCU; } -// Construct subprogram DIE. -void DwarfDebug::constructSubprogramDIE(DwarfCompileUnit &TheCU, - const MDNode *N) { - // FIXME: We should only call this routine once, however, during LTO if a - // program is defined in multiple CUs we could end up calling it out of - // beginModule as we walk the CUs. - - DwarfCompileUnit *&CURef = SPMap[N]; - if (CURef) - return; - CURef = &TheCU; - - DISubprogram SP(N); - assert(SP.isSubprogram()); - assert(SP.isDefinition()); - - DIE &SubprogramDie = *TheCU.getOrCreateSubprogramDIE(SP); - - // Expose as a global name. - TheCU.addGlobalName(SP.getName(), SubprogramDie, resolve(SP.getContext())); -} - void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit &TheCU, const MDNode *N) { DIImportedEntity Module(N); @@ -826,12 +806,19 @@ void DwarfDebug::finishSubprogramDefinitions() { if (SPMap[SP] != SPCU) continue; DIE *D = SPCU->getDIE(SP); - if (!D) - // Lazily construct the subprogram if we didn't see either concrete or - // inlined versions during codegen. - D = SPCU->getOrCreateSubprogramDIE(SP); - SPCU->applySubprogramAttributes(SP, *D); - SPCU->addGlobalName(SP.getName(), *D, resolve(SP.getContext())); + if (DIE *AbsSPDIE = AbstractSPDies.lookup(SP)) { + if (D) + // If this subprogram has an abstract definition, reference that + SPCU->addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE); + } else { + if (!D) + // Lazily construct the subprogram if we didn't see either concrete or + // inlined versions during codegen. + D = SPCU->getOrCreateSubprogramDIE(SP); + // And attach the attributes + SPCU->applySubprogramAttributes(SP, *D); + SPCU->addGlobalName(SP.getName(), *D, resolve(SP.getContext())); + } } } } @@ -861,7 +848,9 @@ void DwarfDebug::collectDeadVariables() { if (Variables.getNumElements() == 0) continue; - DIE *SPDIE = SPCU->getDIE(SP); + DIE *SPDIE = AbstractSPDies.lookup(SP); + if (!SPDIE) + SPDIE = SPCU->getDIE(SP); assert(SPDIE); for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) { DIVariable DV(Variables.getElement(vi)); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 4a4d01246c23..2f5abc829ea1 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -491,9 +491,6 @@ class DwarfDebug : public AsmPrinterHandler { /// DW_TAG_compile_unit. DwarfCompileUnit &constructDwarfCompileUnit(DICompileUnit DIUnit); - /// \brief Construct subprogram DIE. - void constructSubprogramDIE(DwarfCompileUnit &TheCU, const MDNode *N); - /// \brief Construct imported_module or imported_declaration DIE. void constructImportedEntityDIE(DwarfCompileUnit &TheCU, const MDNode *N); diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll index 5d9f6a5779bb..40300de793d5 100644 --- a/test/DebugInfo/X86/concrete_out_of_line.ll +++ b/test/DebugInfo/X86/concrete_out_of_line.ll @@ -15,15 +15,15 @@ ; CHECK: [[DTOR_DECL:0x........]]: DW_TAG_subprogram ; CHECK: [[D2_ABS:.*]]: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_inline ; CHECK-NEXT: DW_AT_{{.*}}linkage_name {{.*}}D2 ; CHECK-NEXT: DW_AT_specification {{.*}} {[[DTOR_DECL]]} +; CHECK-NEXT: DW_AT_inline ; CHECK-NOT: DW_AT ; CHECK: DW_TAG ; CHECK: [[D1_ABS:.*]]: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_inline ; CHECK-NEXT: DW_AT_{{.*}}linkage_name {{.*}}D1 ; CHECK-NEXT: DW_AT_specification {{.*}} {[[DTOR_DECL]]} +; CHECK-NEXT: DW_AT_inline ; CHECK-NOT: DW_AT ; CHECK: [[D1_THIS_ABS:.*]]: DW_TAG_formal_parameter @@ -49,9 +49,11 @@ ; and then that a TAG_subprogram refers to it with AT_abstract_origin. ; CHECK: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[D1_ABS]]} +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_abstract_origin {{.*}} {[[D1_ABS]]} ; CHECK: DW_TAG_formal_parameter -; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[D1_THIS_ABS]]} +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_abstract_origin {{.*}} {[[D1_THIS_ABS]]} ; CHECK: DW_TAG_inlined_subroutine ; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[D2_ABS]]} diff --git a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll index 74b2f8bc338c..1922272cab90 100644 --- a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll +++ b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll @@ -5,12 +5,16 @@ ; RUN: llc -mtriple=x86_64-apple-darwin < %s -filetype=obj -regalloc=basic \ ; RUN: | llvm-dwarfdump -debug-dump=info - | FileCheck --check-prefix=CHECK --check-prefix=DARWIN %s -; FIXME: This is both a concrete and abstract definition, which is -; incorrect. They should be separate -; CHECK: [[ABS:.*]]: DW_TAG_subprogram -; CHECK-NOT: DW_TAG -; CHECK: DW_AT_high_pc -; CHECK-NOT: DW_TAG +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_abstract_origin {{.*}}{[[ABS:.*]]} +; FIXME: An out of line definition preceeding an inline usage doesn't properly +; reference abstract variables. +; CHECK: DW_TAG_formal_parameter +; CHECK-NEXT: DW_AT_name {{.*}} "sp" +; CHECK: DW_TAG_formal_parameter +; CHECK-NEXT: DW_AT_name {{.*}} "nums" + +; CHECK: [[ABS]]: DW_TAG_subprogram ; CHECK: DW_AT_name {{.*}} "foo" ; CHECK: [[ABS_SP:.*]]: DW_TAG_formal_parameter ; CHECK-NEXT: DW_AT_name {{.*}} "sp" @@ -26,10 +30,9 @@ ;CHECK: DW_TAG_formal_parameter ;FIXME: Linux shouldn't drop this parameter either... -;FIXME: These parameters should have DW_AT_abstract_origin, instead of names. -;DARWIN-NEXT: DW_AT_name {{.*}} "sp" +;DARWIN-NEXT: DW_AT_abstract_origin {{.*}}{[[ABS_SP]]} ;DARWIN: DW_TAG_formal_parameter -;CHECK-NEXT: DW_AT_name {{.*}} "nums" +;CHECK-NEXT: DW_AT_abstract_origin {{.*}}{[[ABS_NUMS]]} ;CHECK-NOT: DW_TAG_formal_parameter %struct.S1 = type { float*, i32 } diff --git a/test/DebugInfo/cross-cu-inlining.ll b/test/DebugInfo/cross-cu-inlining.ll index 6e0378d57f0b..266a24ddc670 100644 --- a/test/DebugInfo/cross-cu-inlining.ll +++ b/test/DebugInfo/cross-cu-inlining.ll @@ -25,9 +25,11 @@ ; CHECK: DW_TAG_subprogram ; CHECK: DW_AT_type [DW_FORM_ref_addr] (0x00000000[[INT:.*]]) ; CHECK: DW_TAG_inlined_subroutine -; CHECK-NEXT: DW_AT_abstract_origin {{.*}}[[ABS_FUNC:........]]) +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_abstract_origin {{.*}}[[ABS_FUNC:........]]) ; CHECK: DW_TAG_formal_parameter -; CHECK-NEXT: DW_AT_abstract_origin {{.*}}[[ABS_VAR:........]]) +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_abstract_origin {{.*}}[[ABS_VAR:........]]) ; Check the abstract definition is in the 'b.cpp' CU and doesn't contain any ; concrete information (address range or variable location) @@ -48,10 +50,13 @@ ; Check the concrete out of line definition references the abstract and ; provides the address range and variable location ; CHECK: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {0x[[ABS_FUNC]]} +; CHECK-NOT: DW_TAG ; CHECK: DW_AT_low_pc +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_abstract_origin {{.*}} {0x[[ABS_FUNC]]} ; CHECK: DW_TAG_formal_parameter -; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {0x[[ABS_VAR]]} +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_abstract_origin {{.*}} {0x[[ABS_VAR]]} ; CHECK: DW_AT_location From 95ce098219b204612be7595a94a382d5cab6eea4 Mon Sep 17 00:00:00 2001 From: Sasa Stankovic Date: Tue, 27 May 2014 18:53:06 +0000 Subject: [PATCH 191/906] [mips] Optimize long branch for MIPS64 by removing %higher and %highest. %higher and %highest can have non-zero values only for offsets greater than 2GB, which is highly unlikely, if not impossible when compiling a single function. This makes long branch for MIPS64 3 instructions smaller. Differential Revision: http://llvm-reviews.chandlerc.com/D3281.diff git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209678 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips64InstrInfo.td | 8 ++----- lib/Target/Mips/MipsAsmPrinter.cpp | 1 - lib/Target/Mips/MipsLongBranch.cpp | 36 ++++++++++++++--------------- lib/Target/Mips/MipsMCInstLower.cpp | 23 +++++++----------- lib/Target/Mips/MipsMCInstLower.h | 3 +-- test/CodeGen/Mips/longbranch.ll | 5 +--- 6 files changed, 30 insertions(+), 46 deletions(-) diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 43103e65375e..924b32529b2d 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -245,16 +245,12 @@ let isCodeGenOnly = 1, rs = 0, shamt = 0 in { "sll\t$rd, $rt, 0", [], II_SLL>; } -// We need the following two pseudo instructions to avoid offset calculation for +// We need the following pseudo instruction to avoid offset calculation for // long branches. See the comment in file MipsLongBranch.cpp for detailed // explanation. -// Expands to: lui $dst, %highest($tgt - $baltgt) -def LONG_BRANCH_LUi64 : PseudoSE<(outs GPR64Opnd:$dst), - (ins brtarget:$tgt, brtarget:$baltgt), []>; - // Expands to: daddiu $dst, $src, %PART($tgt - $baltgt) -// where %PART may be %higher, %hi or %lo, depending on the relocation kind +// where %PART may be %hi or %lo, depending on the relocation kind // that $tgt is annotated with. def LONG_BRANCH_DADDiu : PseudoSE<(outs GPR64Opnd:$dst), (ins GPR64Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>; diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 626657e9dff2..6df90aa75a11 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -958,7 +958,6 @@ void MipsAsmPrinter::NaClAlignIndirectJumpTargets(MachineFunction &MF) { bool MipsAsmPrinter::isLongBranchPseudo(int Opcode) const { return (Opcode == Mips::LONG_BRANCH_LUi || Opcode == Mips::LONG_BRANCH_ADDiu - || Opcode == Mips::LONG_BRANCH_LUi64 || Opcode == Mips::LONG_BRANCH_DADDiu); } diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp index a8fd39176eb6..acfe76e35cf5 100644 --- a/lib/Target/Mips/MipsLongBranch.cpp +++ b/lib/Target/Mips/MipsLongBranch.cpp @@ -64,7 +64,7 @@ namespace { : MachineFunctionPass(ID), TM(tm), IsPIC(TM.getRelocationModel() == Reloc::PIC_), ABI(TM.getSubtarget().getTargetABI()), - LongBranchSeqSize(!IsPIC ? 2 : (ABI == MipsSubtarget::N64 ? 13 : 9)) {} + LongBranchSeqSize(!IsPIC ? 2 : (ABI == MipsSubtarget::N64 ? 10 : 9)) {} const char *getPassName() const override { return "Mips Long Branch"; @@ -324,10 +324,7 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { // $longbr: // daddiu $sp, $sp, -16 // sd $ra, 0($sp) - // lui64 $at, %highest($tgt - $baltgt) - // daddiu $at, $at, %higher($tgt - $baltgt) - // dsll $at, $at, 16 - // daddiu $at, $at, %hi($tgt - $baltgt) + // daddiu $at, $zero, %hi($tgt - $baltgt) // dsll $at, $at, 16 // bal $baltgt // daddiu $at, $at, %lo($tgt - $baltgt) @@ -339,10 +336,20 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { // $fallthrough: // - // TODO: %highest and %higher can have non-zero values only when the - // offset is greater than 4GB, which is highly unlikely. Replace - // them (and the following instructon that shifts $at by 16) with the - // instruction that sets $at to zero. + // We assume the branch is within-function, and that offset is within + // +/- 2GB. High 32 bits will therefore always be zero. + + // Note that this will work even if the offset is negative, because + // of the +1 modification that's added in that case. For example, if the + // offset is -1MB (0xFFFFFFFFFFF00000), the computation for %higher is + // + // 0xFFFFFFFFFFF00000 + 0x80008000 = 0x000000007FF08000 + // + // and the bits [47:32] are zero. For %highest + // + // 0xFFFFFFFFFFF00000 + 0x800080008000 = 0x000080007FF08000 + // + // and the bits [63:48] are zero. Pos = LongBrMBB->begin(); @@ -350,16 +357,9 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { .addReg(Mips::SP_64).addImm(-16); BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::SD)).addReg(Mips::RA_64) .addReg(Mips::SP_64).addImm(0); - BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_LUi64), - Mips::AT_64).addMBB(TgtMBB).addMBB(BalTgtMBB); - BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_DADDiu), - Mips::AT_64).addReg(Mips::AT_64).addMBB(TgtMBB, MipsII::MO_HIGHER) - .addMBB(BalTgtMBB); - BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64) - .addReg(Mips::AT_64).addImm(16); BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_DADDiu), - Mips::AT_64).addReg(Mips::AT_64).addMBB(TgtMBB, MipsII::MO_ABS_HI) - .addMBB(BalTgtMBB); + Mips::AT_64).addReg(Mips::ZERO_64) + .addMBB(TgtMBB, MipsII::MO_ABS_HI).addMBB(BalTgtMBB); BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64) .addReg(Mips::AT_64).addImm(16); diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp index 85f786746611..821392e1d45f 100644 --- a/lib/Target/Mips/MipsMCInstLower.cpp +++ b/lib/Target/Mips/MipsMCInstLower.cpp @@ -162,16 +162,16 @@ MCOperand MipsMCInstLower::createSub(MachineBasicBlock *BB1, } void MipsMCInstLower:: -lowerLongBranchLUi(const MachineInstr *MI, MCInst &OutMI, int Opcode, - MCSymbolRefExpr::VariantKind Kind) const { - OutMI.setOpcode(Opcode); +lowerLongBranchLUi(const MachineInstr *MI, MCInst &OutMI) const { + OutMI.setOpcode(Mips::LUi); // Lower register operand. OutMI.addOperand(LowerOperand(MI->getOperand(0))); - // Create %hi($tgt-$baltgt) or %highest($tgt-$baltgt). + // Create %hi($tgt-$baltgt). OutMI.addOperand(createSub(MI->getOperand(1).getMBB(), - MI->getOperand(2).getMBB(), Kind)); + MI->getOperand(2).getMBB(), + MCSymbolRefExpr::VK_Mips_ABS_HI)); } void MipsMCInstLower:: @@ -185,7 +185,7 @@ lowerLongBranchADDiu(const MachineInstr *MI, MCInst &OutMI, int Opcode, OutMI.addOperand(LowerOperand(MO)); } - // Create %lo($tgt-$baltgt), %hi($tgt-$baltgt) or %higher($tgt-$baltgt). + // Create %lo($tgt-$baltgt) or %hi($tgt-$baltgt). OutMI.addOperand(createSub(MI->getOperand(2).getMBB(), MI->getOperand(3).getMBB(), Kind)); } @@ -196,11 +196,7 @@ bool MipsMCInstLower::lowerLongBranch(const MachineInstr *MI, default: return false; case Mips::LONG_BRANCH_LUi: - lowerLongBranchLUi(MI, OutMI, Mips::LUi, MCSymbolRefExpr::VK_Mips_ABS_HI); - return true; - case Mips::LONG_BRANCH_LUi64: - lowerLongBranchLUi(MI, OutMI, Mips::LUi64, - MCSymbolRefExpr::VK_Mips_HIGHEST); + lowerLongBranchLUi(MI, OutMI); return true; case Mips::LONG_BRANCH_ADDiu: lowerLongBranchADDiu(MI, OutMI, Mips::ADDiu, @@ -208,10 +204,7 @@ bool MipsMCInstLower::lowerLongBranch(const MachineInstr *MI, return true; case Mips::LONG_BRANCH_DADDiu: unsigned TargetFlags = MI->getOperand(2).getTargetFlags(); - if (TargetFlags == MipsII::MO_HIGHER) - lowerLongBranchADDiu(MI, OutMI, Mips::DADDiu, - MCSymbolRefExpr::VK_Mips_HIGHER); - else if (TargetFlags == MipsII::MO_ABS_HI) + if (TargetFlags == MipsII::MO_ABS_HI) lowerLongBranchADDiu(MI, OutMI, Mips::DADDiu, MCSymbolRefExpr::VK_Mips_ABS_HI); else if (TargetFlags == MipsII::MO_ABS_LO) diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h index 6971f49ff233..269190ffc065 100644 --- a/lib/Target/Mips/MipsMCInstLower.h +++ b/lib/Target/Mips/MipsMCInstLower.h @@ -39,8 +39,7 @@ class LLVM_LIBRARY_VISIBILITY MipsMCInstLower { MachineOperandType MOTy, unsigned Offset) const; MCOperand createSub(MachineBasicBlock *BB1, MachineBasicBlock *BB2, MCSymbolRefExpr::VariantKind Kind) const; - void lowerLongBranchLUi(const MachineInstr *MI, MCInst &OutMI, - int Opcode, MCSymbolRefExpr::VariantKind Kind) const; + void lowerLongBranchLUi(const MachineInstr *MI, MCInst &OutMI) const; void lowerLongBranchADDiu(const MachineInstr *MI, MCInst &OutMI, int Opcode, MCSymbolRefExpr::VariantKind Kind) const; diff --git a/test/CodeGen/Mips/longbranch.ll b/test/CodeGen/Mips/longbranch.ll index f9980940643d..c7fe6fd69dc1 100644 --- a/test/CodeGen/Mips/longbranch.ll +++ b/test/CodeGen/Mips/longbranch.ll @@ -80,10 +80,7 @@ end: ; Check for long branch expansion: ; N64: daddiu $sp, $sp, -16 ; N64-NEXT: sd $ra, 0($sp) -; N64-NEXT: lui $1, %highest(($[[BB2:BB[0-9_]+]])-($[[BB1:BB[0-9_]+]])) -; N64-NEXT: daddiu $1, $1, %higher(($[[BB2]])-($[[BB1]])) -; N64-NEXT: dsll $1, $1, 16 -; N64-NEXT: daddiu $1, $1, %hi(($[[BB2]])-($[[BB1]])) +; N64-NEXT: daddiu $1, $zero, %hi(($[[BB2:BB[0-9_]+]])-($[[BB1:BB[0-9_]+]])) ; N64-NEXT: dsll $1, $1, 16 ; N64-NEXT: bal $[[BB1]] ; N64-NEXT: daddiu $1, $1, %lo(($[[BB2]])-($[[BB1]])) From 502ffc7e2d86f9b553ff0ab77a81a470aa777e67 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 27 May 2014 19:34:32 +0000 Subject: [PATCH 192/906] DebugInfo: Simplify solution to avoid DW_AT_artificial on inlined parameters. Originally committed in r207717, I clearly didn't look very closely at the code to understand how existing things were working... git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209680 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 7 +------ lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 22 +++++++++------------- lib/CodeGen/AsmPrinter/DwarfUnit.h | 6 ++---- 3 files changed, 12 insertions(+), 23 deletions(-) diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index c0badde3685a..967c7b1b5964 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -448,12 +448,7 @@ static std::unique_ptr constructVariableDIE(DwarfCompileUnit &TheCU, DbgVariable &DV, const LexicalScope &Scope, DIE *&ObjectPointer) { - AbstractOrInlined AOI = AOI_None; - if (Scope.isAbstractScope()) - AOI = AOI_Abstract; - else if (Scope.getInlinedAt()) - AOI = AOI_Inlined; - auto Var = TheCU.constructVariableDIE(DV, AOI); + auto Var = TheCU.constructVariableDIE(DV, Scope.isAbstractScope()); if (DV.isObjectPointer()) ObjectPointer = Var.get(); return Var; diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 2707f8b73d84..4bf0b1878473 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1769,34 +1769,30 @@ void DwarfUnit::constructContainingTypeDIEs() { /// constructVariableDIE - Construct a DIE for the given DbgVariable. std::unique_ptr DwarfUnit::constructVariableDIE(DbgVariable &DV, - AbstractOrInlined AbsIn) { - auto D = constructVariableDIEImpl(DV, AbsIn); + bool Abstract) { + auto D = constructVariableDIEImpl(DV, Abstract); DV.setDIE(*D); return D; } -std::unique_ptr -DwarfUnit::constructVariableDIEImpl(const DbgVariable &DV, - AbstractOrInlined AbsIn) { +std::unique_ptr DwarfUnit::constructVariableDIEImpl(const DbgVariable &DV, + bool Abstract) { StringRef Name = DV.getName(); // Define variable debug information entry. auto VariableDie = make_unique(DV.getTag()); - DbgVariable *AbsVar = DV.getAbstractVariable(); - DIE *AbsDIE = AbsVar ? AbsVar->getDIE() : nullptr; - if (AbsDIE) - addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin, *AbsDIE); + if (DbgVariable *AbsVar = DV.getAbstractVariable()) + addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin, *AbsVar->getDIE()); else { if (!Name.empty()) addString(*VariableDie, dwarf::DW_AT_name, Name); addSourceLine(*VariableDie, DV.getVariable()); addType(*VariableDie, DV.getType()); + if (DV.isArtificial()) + addFlag(*VariableDie, dwarf::DW_AT_artificial); } - if (AbsIn != AOI_Inlined && DV.isArtificial()) - addFlag(*VariableDie, dwarf::DW_AT_artificial); - - if (AbsIn == AOI_Abstract) + if (Abstract) return VariableDie; // Add variable address. diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index 7025b712f0e7..acb75283530f 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -61,8 +61,6 @@ class RangeSpanList { void addRange(RangeSpan Range) { Ranges.push_back(Range); } }; -enum AbstractOrInlined { AOI_None, AOI_Inlined, AOI_Abstract }; - //===----------------------------------------------------------------------===// /// Unit - This dwarf writer support class manages information associated /// with a source file. @@ -419,7 +417,7 @@ class DwarfUnit { /// constructVariableDIE - Construct a DIE for the given DbgVariable. std::unique_ptr constructVariableDIE(DbgVariable &DV, - AbstractOrInlined AbsIn = AOI_None); + bool Abstract = false); /// constructSubprogramArguments - Construct function argument DIEs. void constructSubprogramArguments(DIE &Buffer, DIArray Args); @@ -457,7 +455,7 @@ class DwarfUnit { /// \brief Construct a DIE for the given DbgVariable without initializing the /// DbgVariable's DIE reference. std::unique_ptr constructVariableDIEImpl(const DbgVariable &DV, - AbstractOrInlined AbsIn); + bool Abstract); /// constructTypeDIE - Construct basic type die from DIBasicType. void constructTypeDIE(DIE &Buffer, DIBasicType BTy); From 6343f5cfefad072aea1d2ca891d7aced130af0ae Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 27 May 2014 20:20:43 +0000 Subject: [PATCH 193/906] DebugInfo: partially revert cleanup committed in r209680 I'm not sure exactly where/how we end up with an abstract DbgVariable with a null DIE, but we do... looking into it & will add a test and/or fix when I figure it out. Currently shows up in selfhost or compiler-rt builds. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209683 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 4bf0b1878473..a70c0f7c11ae 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1781,7 +1781,8 @@ std::unique_ptr DwarfUnit::constructVariableDIEImpl(const DbgVariable &DV, // Define variable debug information entry. auto VariableDie = make_unique(DV.getTag()); - if (DbgVariable *AbsVar = DV.getAbstractVariable()) + DbgVariable *AbsVar = DV.getAbstractVariable(); + if (AbsVar && AbsVar->getDIE()) addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin, *AbsVar->getDIE()); else { if (!Name.empty()) From 54ba0dfb55153b14871ff9fe620dde3c32fbda17 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Tue, 27 May 2014 21:35:17 +0000 Subject: [PATCH 194/906] Wording fix for llvm.global_dtors docs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209687 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/LangRef.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 6473bb9ff270..fa40363a7548 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -3201,7 +3201,7 @@ The '``llvm.global_dtors``' Global Variable The ``@llvm.global_dtors`` array contains a list of destructor functions, priorities, and an optional associated global or function. The functions referenced by this array will be called in descending -order of priority (i.e. highest first) when the module is loaded. The +order of priority (i.e. highest first) when the module is unloaded. The order of functions with the same priority is not defined. If the third field is present, non-null, and points to a global variable From d4ffb93bf764b5b515beb2cad9bf80c600ac32dc Mon Sep 17 00:00:00 2001 From: "Arnaud A. de Grandmaison" Date: Tue, 27 May 2014 21:35:46 +0000 Subject: [PATCH 195/906] Factor out comparison of Instruction "special" states. No functional change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209688 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Instruction.cpp | 139 ++++++++++++++++------------------------- 1 file changed, 55 insertions(+), 84 deletions(-) diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp index e638540b2c90..28cc4cb9f136 100644 --- a/lib/IR/Instruction.cpp +++ b/lib/IR/Instruction.cpp @@ -262,6 +262,58 @@ const char *Instruction::getOpcodeName(unsigned OpCode) { } } +/// Return true if both instructions have the same special state +/// This must be kept in sync with lib/Transforms/IPO/MergeFunctions.cpp. +static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2, + bool IgnoreAlignment = false) { + assert(I1->getOpcode() == I2->getOpcode() && + "Can not compare special state of different instructions"); + + if (const LoadInst *LI = dyn_cast(I1)) + return LI->isVolatile() == cast(I2)->isVolatile() && + (LI->getAlignment() == cast(I2)->getAlignment() || + IgnoreAlignment) && + LI->getOrdering() == cast(I2)->getOrdering() && + LI->getSynchScope() == cast(I2)->getSynchScope(); + if (const StoreInst *SI = dyn_cast(I1)) + return SI->isVolatile() == cast(I2)->isVolatile() && + (SI->getAlignment() == cast(I2)->getAlignment() || + IgnoreAlignment) && + SI->getOrdering() == cast(I2)->getOrdering() && + SI->getSynchScope() == cast(I2)->getSynchScope(); + if (const CmpInst *CI = dyn_cast(I1)) + return CI->getPredicate() == cast(I2)->getPredicate(); + if (const CallInst *CI = dyn_cast(I1)) + return CI->isTailCall() == cast(I2)->isTailCall() && + CI->getCallingConv() == cast(I2)->getCallingConv() && + CI->getAttributes() == cast(I2)->getAttributes(); + if (const InvokeInst *CI = dyn_cast(I1)) + return CI->getCallingConv() == cast(I2)->getCallingConv() && + CI->getAttributes() == + cast(I2)->getAttributes(); + if (const InsertValueInst *IVI = dyn_cast(I1)) + return IVI->getIndices() == cast(I2)->getIndices(); + if (const ExtractValueInst *EVI = dyn_cast(I1)) + return EVI->getIndices() == cast(I2)->getIndices(); + if (const FenceInst *FI = dyn_cast(I1)) + return FI->getOrdering() == cast(I2)->getOrdering() && + FI->getSynchScope() == cast(I2)->getSynchScope(); + if (const AtomicCmpXchgInst *CXI = dyn_cast(I1)) + return CXI->isVolatile() == cast(I2)->isVolatile() && + CXI->getSuccessOrdering() == + cast(I2)->getSuccessOrdering() && + CXI->getFailureOrdering() == + cast(I2)->getFailureOrdering() && + CXI->getSynchScope() == cast(I2)->getSynchScope(); + if (const AtomicRMWInst *RMWI = dyn_cast(I1)) + return RMWI->getOperation() == cast(I2)->getOperation() && + RMWI->isVolatile() == cast(I2)->isVolatile() && + RMWI->getOrdering() == cast(I2)->getOrdering() && + RMWI->getSynchScope() == cast(I2)->getSynchScope(); + + return true; +} + /// isIdenticalTo - Return true if the specified instruction is exactly /// identical to the current one. This means that all operands match and any /// extra information (e.g. load is volatile) agree. @@ -284,51 +336,13 @@ bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const { if (!std::equal(op_begin(), op_end(), I->op_begin())) return false; - // Check special state that is a part of some instructions. - if (const LoadInst *LI = dyn_cast(this)) - return LI->isVolatile() == cast(I)->isVolatile() && - LI->getAlignment() == cast(I)->getAlignment() && - LI->getOrdering() == cast(I)->getOrdering() && - LI->getSynchScope() == cast(I)->getSynchScope(); - if (const StoreInst *SI = dyn_cast(this)) - return SI->isVolatile() == cast(I)->isVolatile() && - SI->getAlignment() == cast(I)->getAlignment() && - SI->getOrdering() == cast(I)->getOrdering() && - SI->getSynchScope() == cast(I)->getSynchScope(); - if (const CmpInst *CI = dyn_cast(this)) - return CI->getPredicate() == cast(I)->getPredicate(); - if (const CallInst *CI = dyn_cast(this)) - return CI->isTailCall() == cast(I)->isTailCall() && - CI->getCallingConv() == cast(I)->getCallingConv() && - CI->getAttributes() == cast(I)->getAttributes(); - if (const InvokeInst *CI = dyn_cast(this)) - return CI->getCallingConv() == cast(I)->getCallingConv() && - CI->getAttributes() == cast(I)->getAttributes(); - if (const InsertValueInst *IVI = dyn_cast(this)) - return IVI->getIndices() == cast(I)->getIndices(); - if (const ExtractValueInst *EVI = dyn_cast(this)) - return EVI->getIndices() == cast(I)->getIndices(); - if (const FenceInst *FI = dyn_cast(this)) - return FI->getOrdering() == cast(FI)->getOrdering() && - FI->getSynchScope() == cast(FI)->getSynchScope(); - if (const AtomicCmpXchgInst *CXI = dyn_cast(this)) - return CXI->isVolatile() == cast(I)->isVolatile() && - CXI->getSuccessOrdering() == - cast(I)->getSuccessOrdering() && - CXI->getFailureOrdering() == - cast(I)->getFailureOrdering() && - CXI->getSynchScope() == cast(I)->getSynchScope(); - if (const AtomicRMWInst *RMWI = dyn_cast(this)) - return RMWI->getOperation() == cast(I)->getOperation() && - RMWI->isVolatile() == cast(I)->isVolatile() && - RMWI->getOrdering() == cast(I)->getOrdering() && - RMWI->getSynchScope() == cast(I)->getSynchScope(); if (const PHINode *thisPHI = dyn_cast(this)) { const PHINode *otherPHI = cast(I); return std::equal(thisPHI->block_begin(), thisPHI->block_end(), otherPHI->block_begin()); } - return true; + + return haveSameSpecialState(this, I); } // isSameOperationAs @@ -355,50 +369,7 @@ bool Instruction::isSameOperationAs(const Instruction *I, getOperand(i)->getType() != I->getOperand(i)->getType()) return false; - // Check special state that is a part of some instructions. - if (const LoadInst *LI = dyn_cast(this)) - return LI->isVolatile() == cast(I)->isVolatile() && - (LI->getAlignment() == cast(I)->getAlignment() || - IgnoreAlignment) && - LI->getOrdering() == cast(I)->getOrdering() && - LI->getSynchScope() == cast(I)->getSynchScope(); - if (const StoreInst *SI = dyn_cast(this)) - return SI->isVolatile() == cast(I)->isVolatile() && - (SI->getAlignment() == cast(I)->getAlignment() || - IgnoreAlignment) && - SI->getOrdering() == cast(I)->getOrdering() && - SI->getSynchScope() == cast(I)->getSynchScope(); - if (const CmpInst *CI = dyn_cast(this)) - return CI->getPredicate() == cast(I)->getPredicate(); - if (const CallInst *CI = dyn_cast(this)) - return CI->isTailCall() == cast(I)->isTailCall() && - CI->getCallingConv() == cast(I)->getCallingConv() && - CI->getAttributes() == cast(I)->getAttributes(); - if (const InvokeInst *CI = dyn_cast(this)) - return CI->getCallingConv() == cast(I)->getCallingConv() && - CI->getAttributes() == - cast(I)->getAttributes(); - if (const InsertValueInst *IVI = dyn_cast(this)) - return IVI->getIndices() == cast(I)->getIndices(); - if (const ExtractValueInst *EVI = dyn_cast(this)) - return EVI->getIndices() == cast(I)->getIndices(); - if (const FenceInst *FI = dyn_cast(this)) - return FI->getOrdering() == cast(I)->getOrdering() && - FI->getSynchScope() == cast(I)->getSynchScope(); - if (const AtomicCmpXchgInst *CXI = dyn_cast(this)) - return CXI->isVolatile() == cast(I)->isVolatile() && - CXI->getSuccessOrdering() == - cast(I)->getSuccessOrdering() && - CXI->getFailureOrdering() == - cast(I)->getFailureOrdering() && - CXI->getSynchScope() == cast(I)->getSynchScope(); - if (const AtomicRMWInst *RMWI = dyn_cast(this)) - return RMWI->getOperation() == cast(I)->getOperation() && - RMWI->isVolatile() == cast(I)->isVolatile() && - RMWI->getOrdering() == cast(I)->getOrdering() && - RMWI->getSynchScope() == cast(I)->getSynchScope(); - - return true; + return haveSameSpecialState(this, I, IgnoreAlignment); } /// isUsedOutsideOfBlock - Return true if there are any uses of I outside of the From 08f32401a95e27ca19ce6747105aa8d2a721ba7a Mon Sep 17 00:00:00 2001 From: "Arnaud A. de Grandmaison" Date: Tue, 27 May 2014 22:03:28 +0000 Subject: [PATCH 196/906] No need for those tests to go thru llvm-as and/or llvm-dis. opt can handle them by itself. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209689 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Assembler/half-constprop.ll | 2 +- test/Assembler/half-conv.ll | 2 +- test/DebugInfo/2010-03-19-DbgDeclare.ll | 2 +- test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll | 2 +- test/Transforms/JumpThreading/phi-eq.ll | 2 +- test/Transforms/LoopSimplify/2007-10-28-InvokeCrash.ll | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/test/Assembler/half-constprop.ll b/test/Assembler/half-constprop.ll index 03ccdda97e0a..9e24f7242ba9 100644 --- a/test/Assembler/half-constprop.ll +++ b/test/Assembler/half-constprop.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -O3 | llvm-dis | FileCheck %s +; RUN: opt < %s -O3 -S | FileCheck %s ; Testing half constant propagation. define half @abc() nounwind { diff --git a/test/Assembler/half-conv.ll b/test/Assembler/half-conv.ll index bf9ae5713979..70a6b86c393f 100644 --- a/test/Assembler/half-conv.ll +++ b/test/Assembler/half-conv.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -O3 | llvm-dis | FileCheck %s +; RUN: opt < %s -O3 -S | FileCheck %s ; Testing half to float conversion. define float @abc() nounwind { diff --git a/test/DebugInfo/2010-03-19-DbgDeclare.ll b/test/DebugInfo/2010-03-19-DbgDeclare.ll index 1ff7fa88bdc3..94aa259d31b6 100644 --- a/test/DebugInfo/2010-03-19-DbgDeclare.ll +++ b/test/DebugInfo/2010-03-19-DbgDeclare.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -verify -S | FileCheck %s +; RUN: opt < %s -verify -S | FileCheck %s ; CHECK: lang 0x8001 diff --git a/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll b/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll index bd174a8be3ff..4ea0b88fd0de 100644 --- a/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll +++ b/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as <%s | opt -ipsccp | llvm-dis | FileCheck %s +; RUN: opt < %s -ipsccp -S | FileCheck %s ; Don't constant-propagate byval pointers, since they are not pointers! ; PR5038 %struct.MYstr = type { i8, i32 } diff --git a/test/Transforms/JumpThreading/phi-eq.ll b/test/Transforms/JumpThreading/phi-eq.ll index 40d3c7edd05d..e05d5ee7c974 100644 --- a/test/Transforms/JumpThreading/phi-eq.ll +++ b/test/Transforms/JumpThreading/phi-eq.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -jump-threading | llvm-dis | FileCheck %s +; RUN: opt < %s -jump-threading -S | FileCheck %s ; Test whether two consecutive switches with identical structures assign the ; proper value to the proper variable. This is really testing ; Instruction::isIdenticalToWhenDefined, as previously that function was diff --git a/test/Transforms/LoopSimplify/2007-10-28-InvokeCrash.ll b/test/Transforms/LoopSimplify/2007-10-28-InvokeCrash.ll index e91d141cc6ff..0534a0bf7d06 100644 --- a/test/Transforms/LoopSimplify/2007-10-28-InvokeCrash.ll +++ b/test/Transforms/LoopSimplify/2007-10-28-InvokeCrash.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -loop-simplify -disable-output +; RUN: opt < %s -loop-simplify -disable-output ; PR1752 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-s0:0:64-f80:32:32" target triple = "i686-pc-mingw32" From 50adf380805c0c239be0e3806b568f1af8cdff45 Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Tue, 27 May 2014 22:35:00 +0000 Subject: [PATCH 197/906] Don't pre-populate the set of keys in the map with variable locations history. Current implementation of calculateDbgValueHistory already creates the keys in the expected order (user variables are listed in order of appearance), and should do so later by contract. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209690 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../AsmPrinter/DbgValueHistoryCalculator.cpp | 1 + lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 13 ++----------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp index c9bf1ecf9075..450d15413849 100644 --- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp +++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp @@ -144,6 +144,7 @@ void calculateDbgValueHistory(const MachineFunction *MF, continue; } + assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!"); const MDNode *Var = MI.getDebugVariable(); auto &History = Result[Var]; diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 967c7b1b5964..dad44b84a153 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1404,17 +1404,8 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Collect user variables, find the end of the prologue. for (const auto &MBB : *MF) { for (const auto &MI : MBB) { - if (MI.isDebugValue()) { - assert(MI.getNumOperands() > 1 && "Invalid machine instruction!"); - // Keep track of user variables in order of appearance. Create the - // empty history for each variable so that the order of keys in - // DbgValues is correct. Actual history will be populated in - // calculateDbgValueHistory() function. - const MDNode *Var = MI.getDebugVariable(); - DbgValues.insert( - std::make_pair(Var, SmallVector())); - } else if (!MI.getFlag(MachineInstr::FrameSetup) && - PrologEndLoc.isUnknown() && !MI.getDebugLoc().isUnknown()) { + if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) && + PrologEndLoc.isUnknown() && !MI.getDebugLoc().isUnknown()) { // First known non-DBG_VALUE and non-frame setup location marks // the beginning of the function body. PrologEndLoc = MI.getDebugLoc(); From 421b2c571cfbd4cad3a6b7834792ae45c87d9c64 Mon Sep 17 00:00:00 2001 From: Sebastian Pop Date: Tue, 27 May 2014 22:41:45 +0000 Subject: [PATCH 198/906] remove constant terms The delinearization is needed only to remove the non linearity induced by expressions involving multiplications of parameters and induction variables. There is no problem in dealing with constant times parameters, or constant times an induction variable. For this reason, the current patch discards all constant terms and multipliers before running the delinearization algorithm on the terms. The only thing remaining in the term expressions are parameters and multiply expressions of parameters: these simplified term expressions are passed to the array shape recognizer that will not recognize constant dimensions anymore: these will be recognized as different strides in parametric subscripts. The only important special case of a constant dimension is the size of elements. Instead of relying on the delinearization to infer the size of an element, compute the element size from the base address type. This is a much more precise way of computing the element size than before, as we would have mixed together the size of an element with the strides of the innermost dimension. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209691 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/DependenceAnalysis.h | 3 +- include/llvm/Analysis/ScalarEvolution.h | 6 +- .../Analysis/ScalarEvolutionExpressions.h | 3 +- lib/Analysis/Delinearization.cpp | 4 +- lib/Analysis/DependenceAnalysis.cpp | 13 +-- lib/Analysis/ScalarEvolution.cpp | 88 +++++++++++++++---- .../iv_times_constant_in_subscript.ll | 45 ++++++++++ test/Analysis/DependenceAnalysis/GCD.ll | 12 +-- 8 files changed, 140 insertions(+), 34 deletions(-) create mode 100644 test/Analysis/Delinearization/iv_times_constant_in_subscript.ll diff --git a/include/llvm/Analysis/DependenceAnalysis.h b/include/llvm/Analysis/DependenceAnalysis.h index 78a03ff2366d..279755e47622 100644 --- a/include/llvm/Analysis/DependenceAnalysis.h +++ b/include/llvm/Analysis/DependenceAnalysis.h @@ -910,7 +910,8 @@ namespace llvm { const Constraint &CurConstraint) const; bool tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV, - SmallVectorImpl &Pair) const; + SmallVectorImpl &Pair, + const SCEV *ElementSize) const; public: static char ID; // Class identification, replacement for typeinfo diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index 4db8686bae78..057082676824 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -894,10 +894,14 @@ namespace llvm { /// indirect operand. bool hasOperand(const SCEV *S, const SCEV *Op) const; + /// Return the size of an element read or written by Inst. + const SCEV *getElementSize(Instruction *Inst); + /// Compute the array dimensions Sizes from the set of Terms extracted from /// the memory access function of this SCEVAddRecExpr. void findArrayDimensions(SmallVectorImpl &Terms, - SmallVectorImpl &Sizes) const; + SmallVectorImpl &Sizes, + const SCEV *ElementSize) const; bool runOnFunction(Function &F) override; void releaseMemory() override; diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h index ba32e928d95e..b468fcd025d8 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpressions.h +++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -434,7 +434,8 @@ namespace llvm { /// Overall, we have: A[][n][m], and the access function: A[j+k][2i][5i]. const SCEV *delinearize(ScalarEvolution &SE, SmallVectorImpl &Subscripts, - SmallVectorImpl &Sizes) const; + SmallVectorImpl &Sizes, + const SCEV *ElementSize) const; }; //===--------------------------------------------------------------------===// diff --git a/lib/Analysis/Delinearization.cpp b/lib/Analysis/Delinearization.cpp index 1a588211a23f..6c8702787d47 100644 --- a/lib/Analysis/Delinearization.cpp +++ b/lib/Analysis/Delinearization.cpp @@ -108,8 +108,8 @@ void Delinearization::print(raw_ostream &O, const Module *) const { O << "AddRec: " << *AR << "\n"; SmallVector Subscripts, Sizes; - const SCEV *Res = AR->delinearize(*SE, Subscripts, Sizes); - if (Res == AR || Subscripts.size() == 0 || Sizes.size() == 0 || + const SCEV *Res = AR->delinearize(*SE, Subscripts, Sizes, SE->getElementSize(Inst)); + if (Subscripts.size() == 0 || Sizes.size() == 0 || Subscripts.size() != Sizes.size()) { O << "failed to delinearize\n"; continue; diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp index 57231b8325a3..33cb20685c0c 100644 --- a/lib/Analysis/DependenceAnalysis.cpp +++ b/lib/Analysis/DependenceAnalysis.cpp @@ -3180,9 +3180,10 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, /// source and destination array references are recurrences on a nested loop, /// this function flattens the nested recurrences into separate recurrences /// for each loop level. -bool -DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV, - SmallVectorImpl &Pair) const { +bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, + const SCEV *DstSCEV, + SmallVectorImpl &Pair, + const SCEV *ElementSize) const { const SCEVAddRecExpr *SrcAR = dyn_cast(SrcSCEV); const SCEVAddRecExpr *DstAR = dyn_cast(DstSCEV); if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) @@ -3195,7 +3196,7 @@ DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV, // Second step: find subscript sizes. SmallVector Sizes; - SE->findArrayDimensions(Terms, Sizes); + SE->findArrayDimensions(Terms, Sizes, ElementSize); // Third step: compute the access functions for each subscript. SmallVector SrcSubscripts, DstSubscripts; @@ -3353,7 +3354,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, } if (Delinearize && Pairs == 1 && CommonLevels > 1 && - tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) { + tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { DEBUG(dbgs() << " delinerized GEP\n"); Pairs = Pair.size(); } @@ -3777,7 +3778,7 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, } if (Delinearize && Pairs == 1 && CommonLevels > 1 && - tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) { + tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { DEBUG(dbgs() << " delinerized GEP\n"); Pairs = Pair.size(); } diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 1087e5df1636..4e4eb2143315 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -6944,7 +6944,7 @@ struct SCEVCollectTerms { : Terms(T) {} bool follow(const SCEV *S) { - if (isa(S) || isa(S) || isa(S)) { + if (isa(S) || isa(S)) { if (!containsUndefs(S)) Terms.push_back(S); @@ -7356,13 +7356,46 @@ static inline int numberOfTerms(const SCEV *S) { return 1; } +static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) { + if (isa(T)) + return nullptr; + + if (isa(T)) + return T; + + if (const SCEVMulExpr *M = dyn_cast(T)) { + SmallVector Factors; + for (const SCEV *Op : M->operands()) + if (!isa(Op)) + Factors.push_back(Op); + + return SE.getMulExpr(Factors); + } + + return T; +} + +/// Return the size of an element read or written by Inst. +const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) { + Type *Ty; + if (StoreInst *Store = dyn_cast(Inst)) + Ty = Store->getValueOperand()->getType(); + else if (LoadInst *Load = dyn_cast(Inst)) + Ty = Load->getPointerOperand()->getType(); + else + return nullptr; + + Type *ETy = getEffectiveSCEVType(PointerType::getUnqual(Ty)); + return getSizeOfExpr(ETy, Ty); +} + /// Second step of delinearization: compute the array dimensions Sizes from the /// set of Terms extracted from the memory access function of this SCEVAddRec. -void ScalarEvolution::findArrayDimensions( - SmallVectorImpl &Terms, - SmallVectorImpl &Sizes) const { +void ScalarEvolution::findArrayDimensions(SmallVectorImpl &Terms, + SmallVectorImpl &Sizes, + const SCEV *ElementSize) const { - if (Terms.size() < 2) + if (Terms.size() < 1) return; // Early return when Terms do not contain parameters: we do not delinearize @@ -7385,20 +7418,37 @@ void ScalarEvolution::findArrayDimensions( return numberOfTerms(LHS) > numberOfTerms(RHS); }); + ScalarEvolution &SE = *const_cast(this); + + // Divide all terms by the element size. + for (const SCEV *&Term : Terms) { + const SCEV *Q, *R; + SCEVDivision::divide(SE, Term, ElementSize, &Q, &R); + Term = Q; + } + + SmallVector NewTerms; + + // Remove constant factors. + for (const SCEV *T : Terms) + if (const SCEV *NewT = removeConstantFactors(SE, T)) + NewTerms.push_back(NewT); + DEBUG({ dbgs() << "Terms after sorting:\n"; - for (const SCEV *T : Terms) + for (const SCEV *T : NewTerms) dbgs() << *T << "\n"; }); - ScalarEvolution &SE = *const_cast(this); - bool Res = findArrayDimensionsRec(SE, Terms, Sizes); - - if (!Res) { + if (NewTerms.empty() || + !findArrayDimensionsRec(SE, NewTerms, Sizes)) { Sizes.clear(); return; } + // The last element to be pushed into Sizes is the size of an element. + Sizes.push_back(ElementSize); + DEBUG({ dbgs() << "Sizes:\n"; for (const SCEV *S : Sizes) @@ -7433,9 +7483,14 @@ const SCEV *SCEVAddRecExpr::computeAccessFunctions( Res = Q; + // Do not record the last subscript corresponding to the size of elements in + // the array. if (i == Last) { - // Do not record the last subscript corresponding to the size of elements - // in the array. + + // Bail out if the remainder is too complex. + if (isa(R)) + return nullptr; + Remainder = R; continue; } @@ -7507,10 +7562,9 @@ const SCEV *SCEVAddRecExpr::computeAccessFunctions( /// asking for the SCEV of the memory access with respect to all enclosing /// loops, calling SCEV->delinearize on that and printing the results. -const SCEV * -SCEVAddRecExpr::delinearize(ScalarEvolution &SE, - SmallVectorImpl &Subscripts, - SmallVectorImpl &Sizes) const { +const SCEV *SCEVAddRecExpr::delinearize( + ScalarEvolution &SE, SmallVectorImpl &Subscripts, + SmallVectorImpl &Sizes, const SCEV *ElementSize) const { // First step: collect parametric terms. SmallVector Terms; collectParametricTerms(SE, Terms); @@ -7519,7 +7573,7 @@ SCEVAddRecExpr::delinearize(ScalarEvolution &SE, return nullptr; // Second step: find subscript sizes. - SE.findArrayDimensions(Terms, Sizes); + SE.findArrayDimensions(Terms, Sizes, ElementSize); if (Sizes.empty()) return nullptr; diff --git a/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll b/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll new file mode 100644 index 000000000000..01a4b96b11a0 --- /dev/null +++ b/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -analyze -delinearize | FileCheck %s + +; Derived from the following code: +; +; void foo(long n, long m, long b, double A[n][m]) { +; for (long i = 0; i < n; i++) +; for (long j = 0; j < m; j++) +; A[2i+b][2j] = 1.0; +; } + +; AddRec: {{((%m * %b * sizeof(double)) + %A),+,(2 * %m * sizeof(double))}<%for.i>,+,(2 * sizeof(double))}<%for.j> +; CHECK: Base offset: %A +; CHECK: ArrayDecl[UnknownSize][%m] with elements of sizeof(double) bytes. +; CHECK: ArrayRef[{%b,+,2}<%for.i>][{0,+,2}<%for.j>] + + +define void @foo(i64 %n, i64 %m, i64 %b, double* %A) { +entry: + br label %for.i + +for.i: + %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ] + %outerdim = mul nsw i64 %i, 2 + %outerdim2 = add nsw i64 %outerdim, %b + %tmp = mul nsw i64 %outerdim2, %m + br label %for.j + +for.j: + %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j ] + %prodj = mul i64 %j, 2 + %vlaarrayidx.sum = add i64 %prodj, %tmp + %arrayidx = getelementptr inbounds double* %A, i64 %vlaarrayidx.sum + store double 1.0, double* %arrayidx + %j.inc = add nsw i64 %j, 1 + %j.exitcond = icmp eq i64 %j.inc, %m + br i1 %j.exitcond, label %for.i.inc, label %for.j + +for.i.inc: + %i.inc = add nsw i64 %i, 1 + %i.exitcond = icmp eq i64 %i.inc, %n + br i1 %i.exitcond, label %end, label %for.i + +end: + ret void +} diff --git a/test/Analysis/DependenceAnalysis/GCD.ll b/test/Analysis/DependenceAnalysis/GCD.ll index fd9173b924c2..7eca18ed262c 100644 --- a/test/Analysis/DependenceAnalysis/GCD.ll +++ b/test/Analysis/DependenceAnalysis/GCD.ll @@ -269,10 +269,10 @@ entry: ; CHECK: da analyze - none! ; DELIN: 'Dependence Analysis' for function 'gcd4' -; DELIN: da analyze - none! +; DELIN: da analyze - output [* *]! ; DELIN: da analyze - none! ; DELIN: da analyze - confused! -; DELIN: da analyze - none! +; DELIN: da analyze - input [* *]! ; DELIN: da analyze - confused! ; DELIN: da analyze - none! @@ -339,10 +339,10 @@ entry: ; CHECK: da analyze - none! ; DELIN: 'Dependence Analysis' for function 'gcd5' -; DELIN: da analyze - none! +; DELIN: da analyze - output [* *]! ; DELIN: da analyze - flow [<> *]! ; DELIN: da analyze - confused! -; DELIN: da analyze - none! +; DELIN: da analyze - input [* *]! ; DELIN: da analyze - confused! ; DELIN: da analyze - none! @@ -410,10 +410,10 @@ entry: ; CHECK: da analyze - output [* *]! ; DELIN: 'Dependence Analysis' for function 'gcd6' +; DELIN: da analyze - output [* *]! ; DELIN: da analyze - none! -; DELIN: da analyze - flow [=> =>|<]! ; DELIN: da analyze - confused! -; DELIN: da analyze - none! +; DELIN: da analyze - input [* *]! ; DELIN: da analyze - confused! ; DELIN: da analyze - output [* *]! From 79facc9e2921ee9d18d1adc94d1d03562775f431 Mon Sep 17 00:00:00 2001 From: Sebastian Pop Date: Tue, 27 May 2014 22:41:51 +0000 Subject: [PATCH 199/906] remove BasePointer before delinearizing No functional change is intended: instead of relying on the delinearization to come up with the base pointer as a remainder of the divisions in the delinearization, we just compute it from the array access and use that value. We substract the base pointer from the SCEV to be delinearized and that simplifies the work of the delinearizer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209692 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Analysis/ScalarEvolutionExpressions.h | 18 +++++----- lib/Analysis/Delinearization.cpp | 14 ++++++-- lib/Analysis/DependenceAnalysis.cpp | 21 ++++++----- lib/Analysis/ScalarEvolution.cpp | 35 +++++++++---------- 4 files changed, 49 insertions(+), 39 deletions(-) diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h index b468fcd025d8..01b034f8a011 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpressions.h +++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -362,14 +362,12 @@ namespace llvm { SmallVectorImpl &Terms) const; /// Return in Subscripts the access functions for each dimension in Sizes. - const SCEV * - computeAccessFunctions(ScalarEvolution &SE, - SmallVectorImpl &Subscripts, - SmallVectorImpl &Sizes) const; + void computeAccessFunctions(ScalarEvolution &SE, + SmallVectorImpl &Subscripts, + SmallVectorImpl &Sizes) const; /// Split this SCEVAddRecExpr into two vectors of SCEVs representing the - /// subscripts and sizes of an array access. Returns the remainder of the - /// delinearization that is the offset start of the array. + /// subscripts and sizes of an array access. /// /// The delinearization is a 3 step process: the first two steps compute the /// sizes of each subscript and the third step computes the access functions @@ -432,10 +430,10 @@ namespace llvm { /// The subscript of the outermost dimension is the Quotient: [j+k]. /// /// Overall, we have: A[][n][m], and the access function: A[j+k][2i][5i]. - const SCEV *delinearize(ScalarEvolution &SE, - SmallVectorImpl &Subscripts, - SmallVectorImpl &Sizes, - const SCEV *ElementSize) const; + void delinearize(ScalarEvolution &SE, + SmallVectorImpl &Subscripts, + SmallVectorImpl &Sizes, + const SCEV *ElementSize) const; }; //===--------------------------------------------------------------------===// diff --git a/lib/Analysis/Delinearization.cpp b/lib/Analysis/Delinearization.cpp index 6c8702787d47..9334cebe1802 100644 --- a/lib/Analysis/Delinearization.cpp +++ b/lib/Analysis/Delinearization.cpp @@ -95,26 +95,34 @@ void Delinearization::print(raw_ostream &O, const Module *) const { // Do not analyze memory accesses outside loops. for (Loop *L = LI->getLoopFor(BB); L != nullptr; L = L->getParentLoop()) { const SCEV *AccessFn = SE->getSCEVAtScope(getPointerOperand(*Inst), L); + + const SCEVUnknown *BasePointer = + dyn_cast(SE->getPointerBase(AccessFn)); + // Do not delinearize if we cannot find the base pointer. + if (!BasePointer) + break; + AccessFn = SE->getMinusSCEV(AccessFn, BasePointer); const SCEVAddRecExpr *AR = dyn_cast(AccessFn); // Do not try to delinearize memory accesses that are not AddRecs. if (!AR) break; + O << "\n"; O << "Inst:" << *Inst << "\n"; O << "In Loop with Header: " << L->getHeader()->getName() << "\n"; - O << "AddRec: " << *AR << "\n"; SmallVector Subscripts, Sizes; - const SCEV *Res = AR->delinearize(*SE, Subscripts, Sizes, SE->getElementSize(Inst)); + AR->delinearize(*SE, Subscripts, Sizes, SE->getElementSize(Inst)); if (Subscripts.size() == 0 || Sizes.size() == 0 || Subscripts.size() != Sizes.size()) { O << "failed to delinearize\n"; continue; } - O << "Base offset: " << *Res << "\n"; + + O << "Base offset: " << *BasePointer << "\n"; O << "ArrayDecl[UnknownSize]"; int Size = Subscripts.size(); for (int i = 0; i < Size - 1; i++) diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp index 33cb20685c0c..d0784f1e678d 100644 --- a/lib/Analysis/DependenceAnalysis.cpp +++ b/lib/Analysis/DependenceAnalysis.cpp @@ -3184,6 +3184,17 @@ bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV, SmallVectorImpl &Pair, const SCEV *ElementSize) const { + const SCEVUnknown *SrcBase = + dyn_cast(SE->getPointerBase(SrcSCEV)); + const SCEVUnknown *DstBase = + dyn_cast(SE->getPointerBase(DstSCEV)); + + if (!SrcBase || !DstBase || SrcBase != DstBase) + return false; + + SrcSCEV = SE->getMinusSCEV(SrcSCEV, SrcBase); + DstSCEV = SE->getMinusSCEV(DstSCEV, DstBase); + const SCEVAddRecExpr *SrcAR = dyn_cast(SrcSCEV); const SCEVAddRecExpr *DstAR = dyn_cast(DstSCEV); if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) @@ -3200,20 +3211,14 @@ bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, // Third step: compute the access functions for each subscript. SmallVector SrcSubscripts, DstSubscripts; - const SCEV *RemainderS = SrcAR->computeAccessFunctions(*SE, SrcSubscripts, Sizes); - const SCEV *RemainderD = DstAR->computeAccessFunctions(*SE, DstSubscripts, Sizes); + SrcAR->computeAccessFunctions(*SE, SrcSubscripts, Sizes); + DstAR->computeAccessFunctions(*SE, DstSubscripts, Sizes); // Fail when there is only a subscript: that's a linearized access function. if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || SrcSubscripts.size() != DstSubscripts.size()) return false; - // When the difference in remainders is different than a constant it might be - // that the base address of the arrays is not the same. - const SCEV *DiffRemainders = SE->getMinusSCEV(RemainderS, RemainderD); - if (!isa(DiffRemainders)) - return false; - int size = SrcSubscripts.size(); DEBUG({ diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 4e4eb2143315..35a825ad0566 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -7458,16 +7458,15 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl &Terms, /// Third step of delinearization: compute the access functions for the /// Subscripts based on the dimensions in Sizes. -const SCEV *SCEVAddRecExpr::computeAccessFunctions( +void SCEVAddRecExpr::computeAccessFunctions( ScalarEvolution &SE, SmallVectorImpl &Subscripts, SmallVectorImpl &Sizes) const { // Early exit in case this SCEV is not an affine multivariate function. if (Sizes.empty() || !this->isAffine()) - return nullptr; + return; - const SCEV *Zero = SE.getConstant(this->getType(), 0); - const SCEV *Res = this, *Remainder = Zero; + const SCEV *Res = this; int Last = Sizes.size() - 1; for (int i = Last; i >= 0; i--) { const SCEV *Q, *R; @@ -7488,10 +7487,12 @@ const SCEV *SCEVAddRecExpr::computeAccessFunctions( if (i == Last) { // Bail out if the remainder is too complex. - if (isa(R)) - return nullptr; + if (isa(R)) { + Subscripts.clear(); + Sizes.clear(); + return; + } - Remainder = R; continue; } @@ -7510,7 +7511,6 @@ const SCEV *SCEVAddRecExpr::computeAccessFunctions( for (const SCEV *S : Subscripts) dbgs() << *S << "\n"; }); - return Remainder; } /// Splits the SCEV into two vectors of SCEVs representing the subscripts and @@ -7562,27 +7562,28 @@ const SCEV *SCEVAddRecExpr::computeAccessFunctions( /// asking for the SCEV of the memory access with respect to all enclosing /// loops, calling SCEV->delinearize on that and printing the results. -const SCEV *SCEVAddRecExpr::delinearize( - ScalarEvolution &SE, SmallVectorImpl &Subscripts, - SmallVectorImpl &Sizes, const SCEV *ElementSize) const { +void SCEVAddRecExpr::delinearize(ScalarEvolution &SE, + SmallVectorImpl &Subscripts, + SmallVectorImpl &Sizes, + const SCEV *ElementSize) const { // First step: collect parametric terms. SmallVector Terms; collectParametricTerms(SE, Terms); if (Terms.empty()) - return nullptr; + return; // Second step: find subscript sizes. SE.findArrayDimensions(Terms, Sizes, ElementSize); if (Sizes.empty()) - return nullptr; + return; // Third step: compute the access functions for each subscript. - const SCEV *Remainder = computeAccessFunctions(SE, Subscripts, Sizes); + computeAccessFunctions(SE, Subscripts, Sizes); - if (!Remainder || Subscripts.empty()) - return nullptr; + if (Subscripts.empty()) + return; DEBUG({ dbgs() << "succeeded to delinearize " << *this << "\n"; @@ -7595,8 +7596,6 @@ const SCEV *SCEVAddRecExpr::delinearize( dbgs() << "[" << *S << "]"; dbgs() << "\n"; }); - - return Remainder; } //===----------------------------------------------------------------------===// From bf48d8ae51e9eff282c2b26226aa11e577ea76a2 Mon Sep 17 00:00:00 2001 From: Sebastian Pop Date: Tue, 27 May 2014 22:41:56 +0000 Subject: [PATCH 200/906] do not use the GCD to compute the delinearization strides We do not need to compute the GCD anymore after we removed the constant coefficients from the terms: the terms are now all parametric expressions and there is no need to recognize constant terms that divide only a subset of the terms. We only rely on the size of the terms, i.e., the number of operands in the multiply expressions, to sort the terms and recognize the parametric dimensions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209693 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 67 ++++---------------------------- 1 file changed, 8 insertions(+), 59 deletions(-) diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 35a825ad0566..4d85948489d6 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -7211,82 +7211,31 @@ struct SCEVDivision : public SCEVVisitor { }; } -// Find the Greatest Common Divisor of A and B. -static const SCEV * -findGCD(ScalarEvolution &SE, const SCEV *A, const SCEV *B) { - - if (const SCEVConstant *CA = dyn_cast(A)) - if (const SCEVConstant *CB = dyn_cast(B)) - return SE.getConstant(gcd(CA, CB)); - - const SCEV *One = SE.getConstant(A->getType(), 1); - if (isa(A) && isa(B)) - return One; - if (isa(A) && isa(B)) - return One; - - const SCEV *Q, *R; - if (const SCEVMulExpr *M = dyn_cast(A)) { - SmallVector Qs; - for (const SCEV *Op : M->operands()) - Qs.push_back(findGCD(SE, Op, B)); - return SE.getMulExpr(Qs); - } - if (const SCEVMulExpr *M = dyn_cast(B)) { - SmallVector Qs; - for (const SCEV *Op : M->operands()) - Qs.push_back(findGCD(SE, A, Op)); - return SE.getMulExpr(Qs); - } - - SCEVDivision::divide(SE, A, B, &Q, &R); - if (R->isZero()) - return B; - - SCEVDivision::divide(SE, B, A, &Q, &R); - if (R->isZero()) - return A; - - return One; -} - -// Find the Greatest Common Divisor of all the SCEVs in Terms. -static const SCEV * -findGCD(ScalarEvolution &SE, SmallVectorImpl &Terms) { - assert(Terms.size() > 0 && "Terms vector is empty"); - - const SCEV *GCD = Terms[0]; - for (const SCEV *T : Terms) - GCD = findGCD(SE, GCD, T); - - return GCD; -} - static bool findArrayDimensionsRec(ScalarEvolution &SE, SmallVectorImpl &Terms, SmallVectorImpl &Sizes) { - // The GCD of all Terms is the dimension of the innermost dimension. - const SCEV *GCD = findGCD(SE, Terms); + int Last = Terms.size() - 1; + const SCEV *Step = Terms[Last]; // End of recursion. - if (Terms.size() == 1) { - if (const SCEVMulExpr *M = dyn_cast(GCD)) { + if (Last == 0) { + if (const SCEVMulExpr *M = dyn_cast(Step)) { SmallVector Qs; for (const SCEV *Op : M->operands()) if (!isa(Op)) Qs.push_back(Op); - GCD = SE.getMulExpr(Qs); + Step = SE.getMulExpr(Qs); } - Sizes.push_back(GCD); + Sizes.push_back(Step); return true; } for (const SCEV *&Term : Terms) { // Normalize the terms before the next call to findArrayDimensionsRec. const SCEV *Q, *R; - SCEVDivision::divide(SE, Term, GCD, &Q, &R); + SCEVDivision::divide(SE, Term, Step, &Q, &R); // Bail out when GCD does not evenly divide one of the terms. if (!R->isZero()) @@ -7305,7 +7254,7 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE, if (!findArrayDimensionsRec(SE, Terms, Sizes)) return false; - Sizes.push_back(GCD); + Sizes.push_back(Step); return true; } From 5013d1d5e4586269fb2dd56fd4f20563a1fdf93d Mon Sep 17 00:00:00 2001 From: Sebastian Pop Date: Tue, 27 May 2014 22:42:00 +0000 Subject: [PATCH 201/906] avoid type mismatch when building SCEVs This is a corner case I have stumbled upon when dealing with ARM64 type conversions. I was not able to extract a testcase for the community codebase to fail on. The patch conservatively discards a division that would have ended up in an ICE due to a type mismatch when building a multiply expression. I have also added code to a place that builds add expressions and in which we should be careful not to pass in operands of different types. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209694 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 4d85948489d6..42a7aa238969 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -7131,9 +7131,19 @@ struct SCEVDivision : public SCEVVisitor { void visitAddExpr(const SCEVAddExpr *Numerator) { SmallVector Qs, Rs; + Type *Ty = Denominator->getType(); + for (const SCEV *Op : Numerator->operands()) { const SCEV *Q, *R; divide(SE, Op, Denominator, &Q, &R); + + // Bail out if types do not match. + if (Ty != Q->getType() || Ty != R->getType()) { + Quotient = Zero; + Remainder = Numerator; + return; + } + Qs.push_back(Q); Rs.push_back(R); } @@ -7150,9 +7160,17 @@ struct SCEVDivision : public SCEVVisitor { void visitMulExpr(const SCEVMulExpr *Numerator) { SmallVector Qs; + Type *Ty = Denominator->getType(); bool FoundDenominatorTerm = false; for (const SCEV *Op : Numerator->operands()) { + // Bail out if types do not match. + if (Ty != Op->getType()) { + Quotient = Zero; + Remainder = Numerator; + return; + } + if (FoundDenominatorTerm) { Qs.push_back(Op); continue; @@ -7165,6 +7183,14 @@ struct SCEVDivision : public SCEVVisitor { Qs.push_back(Op); continue; } + + // Bail out if types do not match. + if (Ty != Q->getType()) { + Quotient = Zero; + Remainder = Numerator; + return; + } + FoundDenominatorTerm = true; Qs.push_back(Q); } From f9e42bc162e351f8eeb973beeb0a090ff2019502 Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Tue, 27 May 2014 22:47:41 +0000 Subject: [PATCH 202/906] Factor out looking for prologue end into a function git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209697 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index dad44b84a153..e4e19cc91f3b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1364,6 +1364,17 @@ void DwarfDebug::identifyScopeMarkers() { } } +static DebugLoc findPrologueEndLoc(const MachineFunction *MF) { + // First known non-DBG_VALUE and non-frame setup location marks + // the beginning of the function body. + for (const auto &MBB : *MF) + for (const auto &MI : MBB) + if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) && + !MI.getDebugLoc().isUnknown()) + return MI.getDebugLoc(); + return DebugLoc(); +} + // Gather pre-function debug information. Assumes being called immediately // after the function entry point has been emitted. void DwarfDebug::beginFunction(const MachineFunction *MF) { @@ -1401,18 +1412,6 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Assumes in correct section after the entry point. Asm->OutStreamer.EmitLabel(FunctionBeginSym); - // Collect user variables, find the end of the prologue. - for (const auto &MBB : *MF) { - for (const auto &MI : MBB) { - if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) && - PrologEndLoc.isUnknown() && !MI.getDebugLoc().isUnknown()) { - // First known non-DBG_VALUE and non-frame setup location marks - // the beginning of the function body. - PrologEndLoc = MI.getDebugLoc(); - } - } - } - // Calculate history for local variables. calculateDbgValueHistory(MF, Asm->TM.getRegisterInfo(), DbgValues); @@ -1441,6 +1440,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { PrevLabel = FunctionBeginSym; // Record beginning of function. + PrologEndLoc = findPrologueEndLoc(MF); if (!PrologEndLoc.isUnknown()) { DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc(MF->getFunction()->getContext()); From a807d6783a88310e48b1194948bf45c52a2ada15 Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Tue, 27 May 2014 23:09:50 +0000 Subject: [PATCH 203/906] Change representation of instruction ranges where variable is accessible. Use more straightforward way to represent the set of instruction ranges where the location of a user variable is defined - vector of pairs of instructions (defining start/end of each range), instead of a flattened vector of instructions where some instructions are supposed to start the range, and the rest are supposed to "clobber" it. Simplify the code which generates actual .debug_loc entries. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209698 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../AsmPrinter/DbgValueHistoryCalculator.cpp | 100 ++++++++++-------- .../AsmPrinter/DbgValueHistoryCalculator.h | 31 ++++-- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 78 ++++++-------- 3 files changed, 110 insertions(+), 99 deletions(-) diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp index 450d15413849..61032548ea8e 100644 --- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp +++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp @@ -20,11 +20,6 @@ namespace llvm { -namespace { -// Maps physreg numbers to the variables they describe. -typedef std::map> RegDescribedVarsMap; -} - // \brief If @MI is a DBG_VALUE with debug value described by a // defined register, returns the number of this register. // In the other case, returns 0. @@ -36,6 +31,47 @@ static unsigned isDescribedByReg(const MachineInstr &MI) { return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0; } +void DbgValueHistoryMap::startInstrRange(const MDNode *Var, + const MachineInstr &MI) { + // Instruction range should start with a DBG_VALUE instruction for the + // variable. + assert(MI.isDebugValue() && MI.getDebugVariable() == Var); + auto &Ranges = VarInstrRanges[Var]; + if (!Ranges.empty() && Ranges.back().second == nullptr && + Ranges.back().first->isIdenticalTo(&MI)) { + DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" + << "\t" << Ranges.back().first << "\t" << MI << "\n"); + return; + } + Ranges.push_back(std::make_pair(&MI, nullptr)); +} + +void DbgValueHistoryMap::endInstrRange(const MDNode *Var, + const MachineInstr &MI) { + auto &Ranges = VarInstrRanges[Var]; + // Verify that the current instruction range is not yet closed. + assert(!Ranges.empty() && Ranges.back().second == nullptr); + // For now, instruction ranges are not allowed to cross basic block + // boundaries. + assert(Ranges.back().first->getParent() == MI.getParent()); + Ranges.back().second = &MI; +} + +unsigned DbgValueHistoryMap::getRegisterForVar(const MDNode *Var) const { + const auto &I = VarInstrRanges.find(Var); + if (I == VarInstrRanges.end()) + return 0; + const auto &Ranges = I->second; + if (Ranges.empty() || Ranges.back().second != nullptr) + return 0; + return isDescribedByReg(*Ranges.back().first); +} + +namespace { +// Maps physreg numbers to the variables they describe. +typedef std::map> RegDescribedVarsMap; +} + // \brief Claim that @Var is not described by @RegNo anymore. static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo, const MDNode *Var) { @@ -54,16 +90,9 @@ static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo, const MDNode *Var) { assert(RegNo != 0U); - RegVars[RegNo].push_back(Var); -} - -static void clobberVariableLocation(SmallVectorImpl &VarHistory, - const MachineInstr &ClobberingInstr) { - assert(!VarHistory.empty()); - // DBG_VALUE we're clobbering should belong to the same MBB. - assert(VarHistory.back()->isDebugValue()); - assert(VarHistory.back()->getParent() == ClobberingInstr.getParent()); - VarHistory.push_back(&ClobberingInstr); + auto &VarSet = RegVars[RegNo]; + assert(std::find(VarSet.begin(), VarSet.end(), Var) == VarSet.end()); + VarSet.push_back(Var); } // \brief Terminate the location range for variables described by register @@ -77,11 +106,11 @@ static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo, // Iterate over all variables described by this register and add this // instruction to their history, clobbering it. for (const auto &Var : I->second) - clobberVariableLocation(HistMap[Var], ClobberingInstr); + HistMap.endInstrRange(Var, ClobberingInstr); RegVars.erase(I); } -// \brief Terminate the location range for all variables, described by registers +// \brief Terminate location ranges for all variables, described by registers // clobbered by @MI. static void clobberRegisterUses(RegDescribedVarsMap &RegVars, const MachineInstr &MI, @@ -105,31 +134,10 @@ static void clobberAllRegistersUses(RegDescribedVarsMap &RegVars, const MachineInstr &ClobberingInstr) { for (const auto &I : RegVars) for (const auto &Var : I.second) - clobberVariableLocation(HistMap[Var], ClobberingInstr); + HistMap.endInstrRange(Var, ClobberingInstr); RegVars.clear(); } -// \brief Update the register that describes location of @Var in @RegVars map. -static void -updateRegForVariable(RegDescribedVarsMap &RegVars, const MDNode *Var, - const SmallVectorImpl &VarHistory, - const MachineInstr &MI) { - if (!VarHistory.empty()) { - const MachineInstr &Prev = *VarHistory.back(); - // Check if Var is currently described by a register by instruction in the - // same basic block. - if (Prev.isDebugValue() && Prev.getDebugVariable() == Var && - Prev.getParent() == MI.getParent()) { - if (unsigned PrevReg = isDescribedByReg(Prev)) - dropRegDescribedVar(RegVars, PrevReg, Var); - } - } - - assert(MI.getDebugVariable() == Var); - if (unsigned MIReg = isDescribedByReg(MI)) - addRegDescribedVar(RegVars, MIReg, Var); -} - void calculateDbgValueHistory(const MachineFunction *MF, const TargetRegisterInfo *TRI, DbgValueHistoryMap &Result) { @@ -146,16 +154,14 @@ void calculateDbgValueHistory(const MachineFunction *MF, assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!"); const MDNode *Var = MI.getDebugVariable(); - auto &History = Result[Var]; - if (!History.empty() && History.back()->isIdenticalTo(&MI)) { - DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" - << "\t" << History.back() << "\t" << MI << "\n"); - continue; - } + if (unsigned PrevReg = Result.getRegisterForVar(Var)) + dropRegDescribedVar(RegVars, PrevReg, Var); + + Result.startInstrRange(Var, MI); - updateRegForVariable(RegVars, Var, History, MI); - History.push_back(&MI); + if (unsigned NewReg = isDescribedByReg(MI)) + addRegDescribedVar(RegVars, NewReg, Var); } // Make sure locations for register-described variables are valid only diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h index db5116df1bb2..b9177f05950e 100644 --- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h +++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h @@ -20,12 +20,31 @@ class MachineInstr; class MDNode; class TargetRegisterInfo; -// For each user variable, keep a list of DBG_VALUE instructions for it -// in the order of appearance. The list can also contain another -// instructions, which are assumed to clobber the previous DBG_VALUE. -// The variables are listed in order of appearance. -typedef MapVector> -DbgValueHistoryMap; +// For each user variable, keep a list of instruction ranges where this variable +// is accessible. The variables are listed in order of appearance. +class DbgValueHistoryMap { + // Each instruction range starts with a DBG_VALUE instruction, specifying the + // location of a variable, which is assumed to be valid until the end of the + // range. If end is not specified, location is valid until the start + // instruction of the next instruction range, or until the end of the + // function. + typedef std::pair InstrRange; + typedef SmallVector InstrRanges; + typedef MapVector InstrRangesMap; + InstrRangesMap VarInstrRanges; + +public: + void startInstrRange(const MDNode *Var, const MachineInstr &MI); + void endInstrRange(const MDNode *Var, const MachineInstr &MI); + // Returns register currently describing @Var. If @Var is currently + // unaccessible or is not described by a register, returns 0. + unsigned getRegisterForVar(const MDNode *Var) const; + + bool empty() const { return VarInstrRanges.empty(); } + void clear() { VarInstrRanges.clear(); } + InstrRangesMap::const_iterator begin() const { return VarInstrRanges.begin(); } + InstrRangesMap::const_iterator end() const { return VarInstrRanges.end(); } +}; void calculateDbgValueHistory(const MachineFunction *MF, const TargetRegisterInfo *TRI, diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index e4e19cc91f3b..2a0615d74f61 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1153,12 +1153,10 @@ DwarfDebug::collectVariableInfo(SmallPtrSet &Processed) { if (Processed.count(DV)) continue; - // History contains relevant DBG_VALUE instructions for DV and instructions - // clobbering it. - const SmallVectorImpl &History = I.second; - if (History.empty()) + // Instruction ranges, specifying where DV is accessible. + const auto &Ranges = I.second; + if (Ranges.empty()) continue; - const MachineInstr *MInsn = History.front(); LexicalScope *Scope = nullptr; if (DV.getTag() == dwarf::DW_TAG_arg_variable && @@ -1175,6 +1173,7 @@ DwarfDebug::collectVariableInfo(SmallPtrSet &Processed) { continue; Processed.insert(DV); + const MachineInstr *MInsn = Ranges.front().first; assert(MInsn->isDebugValue() && "History must begin with debug value"); DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc()); DbgVariable *RegVar = new DbgVariable(DV, AbsVar, this); @@ -1183,9 +1182,8 @@ DwarfDebug::collectVariableInfo(SmallPtrSet &Processed) { if (AbsVar) AbsVar->setMInsn(MInsn); - // Simplify ranges that are fully coalesced. - if (History.size() <= 1 || - (History.size() == 2 && MInsn->isIdenticalTo(History.back()))) { + // Check if the first DBG_VALUE is valid for the rest of the function. + if (Ranges.size() == 1 && Ranges.front().second == nullptr) { RegVar->setMInsn(MInsn); continue; } @@ -1198,42 +1196,31 @@ DwarfDebug::collectVariableInfo(SmallPtrSet &Processed) { LocList.Label = Asm->GetTempSymbol("debug_loc", DotDebugLocEntries.size() - 1); SmallVector &DebugLoc = LocList.List; - for (SmallVectorImpl::const_iterator - HI = History.begin(), - HE = History.end(); - HI != HE; ++HI) { - const MachineInstr *Begin = *HI; + for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { + const MachineInstr *Begin = I->first; + const MachineInstr *End = I->second; assert(Begin->isDebugValue() && "Invalid History entry"); - // Check if DBG_VALUE is truncating a range. + // Check if a variable is unaccessible in this range. if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg() && !Begin->getOperand(0).getReg()) continue; - // Compute the range for a register location. - const MCSymbol *FLabel = getLabelBeforeInsn(Begin); - const MCSymbol *SLabel = nullptr; - - if (HI + 1 == HE) - // If Begin is the last instruction in History then its value is valid - // until the end of the function. - SLabel = FunctionEndSym; - else { - const MachineInstr *End = HI[1]; - DEBUG(dbgs() << "DotDebugLoc Pair:\n" - << "\t" << *Begin << "\t" << *End << "\n"); - if (End->isDebugValue() && End->getDebugVariable() == DV) - SLabel = getLabelBeforeInsn(End); - else { - // End is clobbering the range. - SLabel = getLabelAfterInsn(End); - assert(SLabel && "Forgot label after clobber instruction"); - ++HI; - } - } + const MCSymbol *StartLabel = getLabelBeforeInsn(Begin); + assert(StartLabel && "Forgot label before DBG_VALUE starting a range!"); + + const MCSymbol *EndLabel; + if (End != nullptr) + EndLabel = getLabelAfterInsn(End); + else if (std::next(I) == Ranges.end()) + EndLabel = FunctionEndSym; + else + EndLabel = getLabelBeforeInsn(std::next(I)->first); + assert(EndLabel && "Forgot label after instruction ending a range!"); - // The value is valid until the next DBG_VALUE or clobber. - DebugLocEntry Loc(FLabel, SLabel, getDebugLocValue(Begin), TheCU); + DEBUG(dbgs() << "DotDebugLoc Pair:\n" + << "\t" << *Begin << "\t" << *End << "\n"); + DebugLocEntry Loc(StartLabel, EndLabel, getDebugLocValue(Begin), TheCU); if (DebugLoc.empty() || !DebugLoc.back().Merge(Loc)) DebugLoc.push_back(std::move(Loc)); } @@ -1416,9 +1403,9 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { calculateDbgValueHistory(MF, Asm->TM.getRegisterInfo(), DbgValues); // Request labels for the full history. - for (auto &I : DbgValues) { - const SmallVectorImpl &History = I.second; - if (History.empty()) + for (const auto &I : DbgValues) { + const auto &Ranges = I.second; + if (Ranges.empty()) continue; // The first mention of a function argument gets the FunctionBeginSym @@ -1426,13 +1413,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { DIVariable DV(I.first); if (DV.isVariable() && DV.getTag() == dwarf::DW_TAG_arg_variable && getDISubprogram(DV.getContext()).describes(MF->getFunction())) - LabelsBeforeInsn[History.front()] = FunctionBeginSym; + LabelsBeforeInsn[Ranges.front().first] = FunctionBeginSym; - for (const MachineInstr *MI : History) { - if (MI->isDebugValue() && MI->getDebugVariable() == DV) - requestLabelBeforeInsn(MI); - else - requestLabelAfterInsn(MI); + for (const auto &Range : Ranges) { + requestLabelBeforeInsn(Range.first); + if (Range.second) + requestLabelAfterInsn(Range.second); } } From 4cbbe0d97e432da394b9db9b16d7f892016694f8 Mon Sep 17 00:00:00 2001 From: Tilmann Scheller Date: Wed, 28 May 2014 05:44:14 +0000 Subject: [PATCH 204/906] [AArch64] Add load post-index update folding regression tests for the load/store optimizer. Add regression tests for the following transformation: ldr X, [x20] ... add x20, x20, #32 -> ldr X, [x20], #32 with X being either w0, x0, s0, d0 or q0. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209711 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/AArch64/ldst-opt.ll | 136 +++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll index 103c23c737aa..1ce5c954d61d 100644 --- a/test/CodeGen/AArch64/ldst-opt.ll +++ b/test/CodeGen/AArch64/ldst-opt.ll @@ -163,3 +163,139 @@ bar: tail call void @bar_double(%s.double* %c, double %val) ret void } + +; Check the following transform: +; +; ldr X, [x20] +; ... +; add x20, x20, #32 +; -> +; ldr X, [x20], #32 +; +; with X being either w0, x0, s0, d0 or q0. + +define void @load-post-indexed-word(i32* %array, i64 %count) nounwind { +; CHECK-LABEL: load-post-indexed-word +; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}], #16 +entry: + %gep1 = getelementptr i32* %array, i64 2 + br label %body + +body: + %iv2 = phi i32* [ %gep3, %body ], [ %gep1, %entry ] + %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] + %gep2 = getelementptr i32* %iv2, i64 -1 + %load = load i32* %gep2 + call void @use-word(i32 %load) + %load2 = load i32* %iv2 + call void @use-word(i32 %load2) + %iv.next = add i64 %iv, -4 + %gep3 = getelementptr i32* %iv2, i64 4 + %cond = icmp eq i64 %iv.next, 0 + br i1 %cond, label %exit, label %body + +exit: + ret void +} + +define void @load-post-indexed-doubleword(i64* %array, i64 %count) nounwind { +; CHECK-LABEL: load-post-indexed-doubleword +; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #32 +entry: + %gep1 = getelementptr i64* %array, i64 2 + br label %body + +body: + %iv2 = phi i64* [ %gep3, %body ], [ %gep1, %entry ] + %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] + %gep2 = getelementptr i64* %iv2, i64 -1 + %load = load i64* %gep2 + call void @use-doubleword(i64 %load) + %load2 = load i64* %iv2 + call void @use-doubleword(i64 %load2) + %iv.next = add i64 %iv, -4 + %gep3 = getelementptr i64* %iv2, i64 4 + %cond = icmp eq i64 %iv.next, 0 + br i1 %cond, label %exit, label %body + +exit: + ret void +} + +define void @load-post-indexed-quadword(<2 x i64>* %array, i64 %count) nounwind { +; CHECK-LABEL: load-post-indexed-quadword +; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}], #64 +entry: + %gep1 = getelementptr <2 x i64>* %array, i64 2 + br label %body + +body: + %iv2 = phi <2 x i64>* [ %gep3, %body ], [ %gep1, %entry ] + %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] + %gep2 = getelementptr <2 x i64>* %iv2, i64 -1 + %load = load <2 x i64>* %gep2 + call void @use-quadword(<2 x i64> %load) + %load2 = load <2 x i64>* %iv2 + call void @use-quadword(<2 x i64> %load2) + %iv.next = add i64 %iv, -4 + %gep3 = getelementptr <2 x i64>* %iv2, i64 4 + %cond = icmp eq i64 %iv.next, 0 + br i1 %cond, label %exit, label %body + +exit: + ret void +} + +define void @load-post-indexed-float(float* %array, i64 %count) nounwind { +; CHECK-LABEL: load-post-indexed-float +; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}], #16 +entry: + %gep1 = getelementptr float* %array, i64 2 + br label %body + +body: + %iv2 = phi float* [ %gep3, %body ], [ %gep1, %entry ] + %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] + %gep2 = getelementptr float* %iv2, i64 -1 + %load = load float* %gep2 + call void @use-float(float %load) + %load2 = load float* %iv2 + call void @use-float(float %load2) + %iv.next = add i64 %iv, -4 + %gep3 = getelementptr float* %iv2, i64 4 + %cond = icmp eq i64 %iv.next, 0 + br i1 %cond, label %exit, label %body + +exit: + ret void +} + +define void @load-post-indexed-double(double* %array, i64 %count) nounwind { +; CHECK-LABEL: load-post-indexed-double +; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}], #32 +entry: + %gep1 = getelementptr double* %array, i64 2 + br label %body + +body: + %iv2 = phi double* [ %gep3, %body ], [ %gep1, %entry ] + %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] + %gep2 = getelementptr double* %iv2, i64 -1 + %load = load double* %gep2 + call void @use-double(double %load) + %load2 = load double* %iv2 + call void @use-double(double %load2) + %iv.next = add i64 %iv, -4 + %gep3 = getelementptr double* %iv2, i64 4 + %cond = icmp eq i64 %iv.next, 0 + br i1 %cond, label %exit, label %body + +exit: + ret void +} + +declare void @use-word(i32) +declare void @use-doubleword(i64) +declare void @use-quadword(<2 x i64>) +declare void @use-float(float) +declare void @use-double(double) From e8dee1a67b7feee07bd560d8e7828e475ff79cb8 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Wed, 28 May 2014 05:45:17 +0000 Subject: [PATCH 205/906] utils: Teach lldbDataFormatters to load automatically Add an __lldb_init_module function so that importing the lldbDataFormatters script automatically adds the formatters. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209712 91177308-0d34-0410-b5e6-96231b3b80d8 --- utils/lldbDataFormatters.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/utils/lldbDataFormatters.py b/utils/lldbDataFormatters.py index 1baf398aa533..352448d535cd 100644 --- a/utils/lldbDataFormatters.py +++ b/utils/lldbDataFormatters.py @@ -1,10 +1,18 @@ """ -Load into LLDB with: -script import lldbDataFormatters -type synthetic add -x "^llvm::SmallVectorImpl<.+>$" -l lldbDataFormatters.SmallVectorSynthProvider -type synthetic add -x "^llvm::SmallVector<.+,.+>$" -l lldbDataFormatters.SmallVectorSynthProvider +LLDB Formatters for LLVM data types. + +Load into LLDB with 'command script import /path/to/lldbDataFormatters.py' """ +def __lldb_init_module(debugger, internal_dict): + debugger.HandleCommand('type category define -e llvm -l c++') + debugger.HandleCommand('type synthetic add -w llvm ' + '-l lldbDataFormatters.SmallVectorSynthProvider ' + '-x "^llvm::SmallVectorImpl<.+>$"') + debugger.HandleCommand('type synthetic add -w llvm ' + '-l lldbDataFormatters.SmallVectorSynthProvider ' + '-x "^llvm::SmallVector<.+,.+>$"') + # Pretty printer for llvm::SmallVector/llvm::SmallVectorImpl class SmallVectorSynthProvider: def __init__(self, valobj, dict): From d8ba67b97b57de51b302f4de8084b5c3dcce0280 Mon Sep 17 00:00:00 2001 From: Tilmann Scheller Date: Wed, 28 May 2014 06:43:00 +0000 Subject: [PATCH 206/906] [AArch64] Add store post-index update folding regression tests for the load/store optimizer. Add regression tests for the following transformation: str X, [x20] ... add x20, x20, #32 -> str X, [x20], #32 with X being either w0, x0, s0, d0 or q0. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209715 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/AArch64/ldst-opt.ll | 125 +++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll index 1ce5c954d61d..5518db3275e2 100644 --- a/test/CodeGen/AArch64/ldst-opt.ll +++ b/test/CodeGen/AArch64/ldst-opt.ll @@ -294,6 +294,131 @@ exit: ret void } +; Check the following transform: +; +; str X, [x20] +; ... +; add x20, x20, #32 +; -> +; str X, [x20], #32 +; +; with X being either w0, x0, s0, d0 or q0. + +define void @store-post-indexed-word(i32* %array, i64 %count, i32 %val) nounwind { +; CHECK-LABEL: store-post-indexed-word +; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}], #16 +entry: + %gep1 = getelementptr i32* %array, i64 2 + br label %body + +body: + %iv2 = phi i32* [ %gep3, %body ], [ %gep1, %entry ] + %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] + %gep2 = getelementptr i32* %iv2, i64 -1 + %load = load i32* %gep2 + call void @use-word(i32 %load) + store i32 %val, i32* %iv2 + %iv.next = add i64 %iv, -4 + %gep3 = getelementptr i32* %iv2, i64 4 + %cond = icmp eq i64 %iv.next, 0 + br i1 %cond, label %exit, label %body + +exit: + ret void +} + +define void @store-post-indexed-doubleword(i64* %array, i64 %count, i64 %val) nounwind { +; CHECK-LABEL: store-post-indexed-doubleword +; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #32 +entry: + %gep1 = getelementptr i64* %array, i64 2 + br label %body + +body: + %iv2 = phi i64* [ %gep3, %body ], [ %gep1, %entry ] + %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] + %gep2 = getelementptr i64* %iv2, i64 -1 + %load = load i64* %gep2 + call void @use-doubleword(i64 %load) + store i64 %val, i64* %iv2 + %iv.next = add i64 %iv, -4 + %gep3 = getelementptr i64* %iv2, i64 4 + %cond = icmp eq i64 %iv.next, 0 + br i1 %cond, label %exit, label %body + +exit: + ret void +} + +define void @store-post-indexed-quadword(<2 x i64>* %array, i64 %count, <2 x i64> %val) nounwind { +; CHECK-LABEL: store-post-indexed-quadword +; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}], #64 +entry: + %gep1 = getelementptr <2 x i64>* %array, i64 2 + br label %body + +body: + %iv2 = phi <2 x i64>* [ %gep3, %body ], [ %gep1, %entry ] + %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] + %gep2 = getelementptr <2 x i64>* %iv2, i64 -1 + %load = load <2 x i64>* %gep2 + call void @use-quadword(<2 x i64> %load) + store <2 x i64> %val, <2 x i64>* %iv2 + %iv.next = add i64 %iv, -4 + %gep3 = getelementptr <2 x i64>* %iv2, i64 4 + %cond = icmp eq i64 %iv.next, 0 + br i1 %cond, label %exit, label %body + +exit: + ret void +} + +define void @store-post-indexed-float(float* %array, i64 %count, float %val) nounwind { +; CHECK-LABEL: store-post-indexed-float +; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}], #16 +entry: + %gep1 = getelementptr float* %array, i64 2 + br label %body + +body: + %iv2 = phi float* [ %gep3, %body ], [ %gep1, %entry ] + %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] + %gep2 = getelementptr float* %iv2, i64 -1 + %load = load float* %gep2 + call void @use-float(float %load) + store float %val, float* %iv2 + %iv.next = add i64 %iv, -4 + %gep3 = getelementptr float* %iv2, i64 4 + %cond = icmp eq i64 %iv.next, 0 + br i1 %cond, label %exit, label %body + +exit: + ret void +} + +define void @store-post-indexed-double(double* %array, i64 %count, double %val) nounwind { +; CHECK-LABEL: store-post-indexed-double +; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}], #32 +entry: + %gep1 = getelementptr double* %array, i64 2 + br label %body + +body: + %iv2 = phi double* [ %gep3, %body ], [ %gep1, %entry ] + %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] + %gep2 = getelementptr double* %iv2, i64 -1 + %load = load double* %gep2 + call void @use-double(double %load) + store double %val, double* %iv2 + %iv.next = add i64 %iv, -4 + %gep3 = getelementptr double* %iv2, i64 4 + %cond = icmp eq i64 %iv.next, 0 + br i1 %cond, label %exit, label %body + +exit: + ret void +} + declare void @use-word(i32) declare void @use-doubleword(i64) declare void @use-quadword(<2 x i64>) From 0949668398f546d97b05076befb40a9cf6359da0 Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Wed, 28 May 2014 09:26:46 +0000 Subject: [PATCH 207/906] [asancov] Don't emit extra runtime calls when compiling without coverage. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209721 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/AddressSanitizer.cpp | 8 +++++--- test/Instrumentation/AddressSanitizer/coverage.ll | 4 ++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 95fca75392af..f8cdb9f8f3d8 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -1030,9 +1030,11 @@ bool AddressSanitizerModule::runOnModule(Module &M) { assert(CtorFunc); IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator()); - Function *CovFunc = M.getFunction(kAsanCovName); - int nCov = CovFunc ? CovFunc->getNumUses() : 0; - IRB.CreateCall(AsanCovModuleInit, ConstantInt::get(IntptrTy, nCov)); + if (ClCoverage > 0) { + Function *CovFunc = M.getFunction(kAsanCovName); + int nCov = CovFunc ? CovFunc->getNumUses() : 0; + IRB.CreateCall(AsanCovModuleInit, ConstantInt::get(IntptrTy, nCov)); + } size_t n = GlobalsToChange.size(); if (n == 0) return false; diff --git a/test/Instrumentation/AddressSanitizer/coverage.ll b/test/Instrumentation/AddressSanitizer/coverage.ll index 5bc510316aab..7e0ef1c8ebc7 100644 --- a/test/Instrumentation/AddressSanitizer/coverage.ll +++ b/test/Instrumentation/AddressSanitizer/coverage.ll @@ -1,3 +1,4 @@ +; RUN: opt < %s -asan -asan-module -asan-coverage=0 -S | FileCheck %s --check-prefix=CHECK0 ; RUN: opt < %s -asan -asan-module -asan-coverage=1 -S | FileCheck %s --check-prefix=CHECK1 ; RUN: opt < %s -asan -asan-module -asan-coverage=2 -S | FileCheck %s --check-prefix=CHECK2 ; RUN: opt < %s -asan -asan-module -asan-coverage=2 -asan-coverage-block-threshold=10 -S | FileCheck %s --check-prefix=CHECK2 @@ -17,6 +18,9 @@ entry: ret void } +; CHECK0-NOT: call void @__sanitizer_cov( +; CHECK0-NOT: call void @__sanitizer_cov_module_init( + ; CHECK1-LABEL: define void @foo ; CHECK1: %0 = load atomic i8* @__asan_gen_cov_foo monotonic, align 1 ; CHECK1: %1 = icmp eq i8 0, %0 From 60aa82b5b60fd78a2bc92b9849eccfc62faeb27a Mon Sep 17 00:00:00 2001 From: Joerg Sonnenberger Date: Wed, 28 May 2014 15:12:55 +0000 Subject: [PATCH 208/906] Don't hard-code ld when extracting host linker version, use ${LD} if it is set. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209742 91177308-0d34-0410-b5e6-96231b3b80d8 --- autoconf/m4/link_options.m4 | 2 +- configure | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/autoconf/m4/link_options.m4 b/autoconf/m4/link_options.m4 index b58d61745f97..abf6596f7c6f 100644 --- a/autoconf/m4/link_options.m4 +++ b/autoconf/m4/link_options.m4 @@ -6,7 +6,7 @@ AC_DEFUN([AC_LINK_GET_VERSION], [AC_CACHE_CHECK([for linker version],[llvm_cv_link_version], [ - version_string="$(ld -v 2>&1 | head -1)" + version_string="$(${LD:-ld} -v 2>&1 | head -1)" # Check for ld64. if (echo "$version_string" | grep -q "ld64"); then diff --git a/configure b/configure index e1959dfee6c0..8c6270dd89fd 100755 --- a/configure +++ b/configure @@ -7612,7 +7612,7 @@ if test "${llvm_cv_link_version+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else - version_string="$(ld -v 2>&1 | head -1)" + version_string="$(${LD:-ld} -v 2>&1 | head -1)" # Check for ld64. if (echo "$version_string" | grep -q "ld64"); then From b8af23fe1ecc74021e2cc5a3ad1ecdcbbcb65c4e Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Wed, 28 May 2014 15:25:06 +0000 Subject: [PATCH 209/906] Revert "[PPC] Use alias symbols in address computation." This reverts commit r209638 because it broke self-hosting on ppc64/Linux. (the Clang-compiled TableGen would segfault because it jumped to an invalid address from within _ZNK4llvm17ManagedStaticBase21RegisterManagedStaticEPFPvvEPFvS1_E (which is within the command-line parameter registration process)). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209745 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCAsmPrinter.cpp | 36 ++++++++++++++++---------- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 13 ++++++++-- test/CodeGen/PowerPC/alias.ll | 31 ---------------------- 3 files changed, 34 insertions(+), 46 deletions(-) delete mode 100644 test/CodeGen/PowerPC/alias.ll diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index e89fb2d58a1c..2174b18715f1 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -380,12 +380,15 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { bool IsAvailExt = false; if (MO.isGlobal()) { - const GlobalValue *GV = MO.getGlobal(); - MOSymbol = getSymbol(GV); - IsExternal = GV->isDeclaration(); - IsCommon = GV->hasCommonLinkage(); - IsFunction = GV->getType()->getElementType()->isFunctionTy(); - IsAvailExt = GV->hasAvailableExternallyLinkage(); + const GlobalValue *GValue = MO.getGlobal(); + const GlobalAlias *GAlias = dyn_cast(GValue); + const GlobalValue *RealGValue = GAlias ? GAlias->getAliasee() : GValue; + MOSymbol = getSymbol(RealGValue); + const GlobalVariable *GVar = dyn_cast(RealGValue); + IsExternal = GVar && !GVar->hasInitializer(); + IsCommon = GVar && RealGValue->hasCommonLinkage(); + IsFunction = !GVar; + IsAvailExt = GVar && RealGValue->hasAvailableExternallyLinkage(); } else if (MO.isCPI()) MOSymbol = GetCPISymbol(MO.getIndex()); else if (MO.isJTI()) @@ -424,9 +427,13 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } else if (MO.isGlobal()) { const GlobalValue *GValue = MO.getGlobal(); - MOSymbol = getSymbol(GValue); - if (GValue->isDeclaration() || GValue->hasCommonLinkage() || - GValue->hasAvailableExternallyLinkage() || + const GlobalAlias *GAlias = dyn_cast(GValue); + const GlobalValue *RealGValue = GAlias ? GAlias->getAliasee() : GValue; + MOSymbol = getSymbol(RealGValue); + const GlobalVariable *GVar = dyn_cast(RealGValue); + + if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() || + RealGValue->hasAvailableExternallyLinkage() || TM.getCodeModel() == CodeModel::Large) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); } @@ -453,10 +460,13 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { bool IsFunction = false; if (MO.isGlobal()) { - const GlobalValue *GV = MO.getGlobal(); - MOSymbol = getSymbol(GV); - IsExternal = GV->isDeclaration(); - IsFunction = GV->getType()->getElementType()->isFunctionTy(); + const GlobalValue *GValue = MO.getGlobal(); + const GlobalAlias *GAlias = dyn_cast(GValue); + const GlobalValue *RealGValue = GAlias ? GAlias->getAliasee() : GValue; + MOSymbol = getSymbol(RealGValue); + const GlobalVariable *GVar = dyn_cast(RealGValue); + IsExternal = GVar && !GVar->hasInitializer(); + IsFunction = !GVar; } else if (MO.isCPI()) MOSymbol = GetCPISymbol(MO.getIndex()); diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 251e8b6246f6..f6e075d27193 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1472,8 +1472,17 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { if (GlobalAddressSDNode *G = dyn_cast(GA)) { const GlobalValue *GValue = G->getGlobal(); - if (GValue->isDeclaration() || GValue->hasCommonLinkage() || - GValue->hasAvailableExternallyLinkage()) + const GlobalAlias *GAlias = dyn_cast(GValue); + const GlobalValue *RealGValue = GAlias ? GAlias->getAliasee() : GValue; + const GlobalVariable *GVar = dyn_cast(RealGValue); + assert((GVar || isa(RealGValue)) && + "Unexpected global value subclass!"); + + // An external variable is one without an initializer. For these, + // for variables with common linkage, and for Functions, generate + // the LDtocL form. + if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() || + RealGValue->hasAvailableExternallyLinkage()) return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, SDValue(Tmp, 0)); } diff --git a/test/CodeGen/PowerPC/alias.ll b/test/CodeGen/PowerPC/alias.ll deleted file mode 100644 index 86e41148a0d7..000000000000 --- a/test/CodeGen/PowerPC/alias.ll +++ /dev/null @@ -1,31 +0,0 @@ -; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -code-model=medium| FileCheck --check-prefix=CHECK --check-prefix=MEDIUM %s -; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -code-model=large | FileCheck --check-prefix=CHECK --check-prefix=LARGE %s - -@foo = global i32 42 -@fooa = alias i32* @foo - -@foo2 = global i64 42 -@foo2a = alias i64* @foo2 - -; CHECK-LABEL: bar: -define i32 @bar() { -; MEDIUM: addis 3, 2, fooa@toc@ha -; LARGE: addis 3, 2, .LC1@toc@ha - %a = load i32* @fooa - ret i32 %a -} - -; CHECK-LABEL: bar2: -define i64 @bar2() { -; MEDIUM: addis 3, 2, foo2a@toc@ha -; MEDIUM: addi 3, 3, foo2a@toc@l -; LARGE: addis 3, 2, .LC3@toc@ha - %a = load i64* @foo2a - ret i64 %a -} - -; LARGE: .LC1: -; LARGE-NEXT: .tc fooa[TC],fooa - -; LARGE: .LC3: -; LARGE-NEXT: .tc foo2a[TC],foo2a From e8075c6877d1f2e1be6c2646135912406616249c Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Wed, 28 May 2014 15:30:40 +0000 Subject: [PATCH 210/906] InstCombine: Improvement to check if signed addition overflows. This patch implements two things: 1. If we know one number is positive and another is negative, we return true as signed addition of two opposite signed numbers will never overflow. 2. Implemented TODO : If one of the operands only has one non-zero bit, and if the other operand has a known-zero bit in a more significant place than it (not including the sign bit) the ripple may go up to and fill the zero, but won't change the sign. e.x - (x & ~4) + 1 We make sure that we are ignoring 0 at MSB. Patch by Suyog Sarda. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209746 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineAddSub.cpp | 50 +++++++++++++++-- test/Transforms/InstCombine/AddOverflow.ll | 56 +++++++++++++++++++ 2 files changed, 100 insertions(+), 6 deletions(-) create mode 100644 test/Transforms/InstCombine/AddOverflow.ll diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index c37a9cf2ef9f..eca4e4a78702 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -889,11 +889,34 @@ static inline Value *dyn_castFoldableMul(Value *V, Constant *&CST) { return nullptr; } +// If one of the operands only has one non-zero bit, and if the other +// operand has a known-zero bit in a more significant place than it (not +// including the sign bit) the ripple may go up to and fill the zero, but +// won't change the sign. For example, (X & ~4) + 1. +// FIXME: Handle case where LHS has a zero before the 1 in the RHS, but also +// has one after. +static bool CheckRippleForAdd(APInt Op0KnownZero, APInt Op0KnownOne, + APInt Op1KnownZero, APInt Op1KnownOne) { + // Make sure that one of the operand has only one bit set to 1 and all other + // bit set to 0. + if ((~Op1KnownZero).countPopulation() == 1) { + int BitWidth = Op0KnownZero.getBitWidth(); + // Ignore Sign Bit. + Op0KnownZero.clearBit(BitWidth - 1); + int Op1OnePosition = BitWidth - Op1KnownOne.countLeadingZeros() - 1; + int Op0ZeroPosition = BitWidth - Op0KnownZero.countLeadingZeros() - 1; + if ((Op0ZeroPosition != (BitWidth - 1)) && + (Op0ZeroPosition >= Op1OnePosition)) + return true; + } + return false; +} /// WillNotOverflowSignedAdd - Return true if we can prove that: /// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS)) /// This basically requires proving that the add in the original type would not /// overflow to change the sign bit or have a carry out. +/// TODO: Handle this for Vectors. bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { // There are different heuristics we can use for this. Here are some simple // ones. @@ -905,14 +928,29 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1) return true; + if (IntegerType *IT = dyn_cast(LHS->getType())) { - // If one of the operands only has one non-zero bit, and if the other operand - // has a known-zero bit in a more significant place than it (not including the - // sign bit) the ripple may go up to and fill the zero, but won't change the - // sign. For example, (X & ~4) + 1. - - // TODO: Implement. + int BitWidth = IT->getBitWidth(); + APInt LHSKnownZero(BitWidth, 0, /*isSigned*/ true); + APInt LHSKnownOne(BitWidth, 0, /*isSigned*/ true); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); + APInt RHSKnownZero(BitWidth, 0, /*isSigned*/ true); + APInt RHSKnownOne(BitWidth, 0, /*isSigned*/ true); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); + + // Addition of two 2's compliment numbers having opposite signs will never + // overflow. + if ((LHSKnownOne[BitWidth - 1] && RHSKnownZero[BitWidth - 1]) || + (LHSKnownZero[BitWidth - 1] && RHSKnownOne[BitWidth - 1])) + return true; + + // Check if carry bit of addition will not cause overflow. + if (CheckRippleForAdd(LHSKnownZero, LHSKnownOne, RHSKnownZero, RHSKnownOne)) + return true; + if (CheckRippleForAdd(RHSKnownZero, RHSKnownOne, LHSKnownZero, LHSKnownOne)) + return true; + } return false; } diff --git a/test/Transforms/InstCombine/AddOverflow.ll b/test/Transforms/InstCombine/AddOverflow.ll new file mode 100644 index 000000000000..1bbd1fc59a5d --- /dev/null +++ b/test/Transforms/InstCombine/AddOverflow.ll @@ -0,0 +1,56 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: @ripple( +; CHECK: add nsw i16 %tmp1, 1 +define i32 @ripple(i16 signext %x) { +bb: + %tmp = sext i16 %x to i32 + %tmp1 = and i32 %tmp, -5 + %tmp2 = trunc i32 %tmp1 to i16 + %tmp3 = sext i16 %tmp2 to i32 + %tmp4 = add i32 %tmp3, 1 + ret i32 %tmp4 +} + +; CHECK-LABEL: @ripplenot( +; CHECK: add i32 %tmp3, 4 +define i32 @ripplenot(i16 signext %x) { +bb: + %tmp = sext i16 %x to i32 + %tmp1 = and i32 %tmp, -3 + %tmp2 = trunc i32 %tmp1 to i16 + %tmp3 = sext i16 %tmp2 to i32 + %tmp4 = add i32 %tmp3, 4 + ret i32 %tmp4 +} + +; CHECK-LABEL: @oppositesign( +; CHECK: add nsw i16 %tmp1, 4 +define i32 @oppositesign(i16 signext %x) { +bb: + %tmp = sext i16 %x to i32 + %tmp1 = or i32 %tmp, 32768 + %tmp2 = trunc i32 %tmp1 to i16 + %tmp3 = sext i16 %tmp2 to i32 + %tmp4 = add i32 %tmp3, 4 + ret i32 %tmp4 +} + +; CHECK-LABEL: @ripplenot_var( +; CHECK: add i32 %tmp6, %tmp7 +define i32 @ripplenot_var(i16 signext %x, i16 signext %y) { +bb: + %tmp = sext i16 %x to i32 + %tmp1 = and i32 %tmp, -5 + %tmp2 = trunc i32 %tmp1 to i16 + %tmp3 = sext i16 %y to i32 + %tmp4 = or i32 %tmp3, 2 + %tmp5 = trunc i32 %tmp4 to i16 + %tmp6 = sext i16 %tmp5 to i32 + %tmp7 = sext i16 %tmp2 to i32 + %tmp8 = add i32 %tmp6, %tmp7 + ret i32 %tmp8 +} From 9b7716192728fbdd8ae2ba9a13dcd65d7fe6509f Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Wed, 28 May 2014 15:33:19 +0000 Subject: [PATCH 211/906] Revert "[DAGCombiner] Split up an indexed load if only the base pointer value is live" This reverts r208640 (I've just XFAILed the test) because it broke ppc64/Linux self-hosting. Because nearly every regression test triggers a segfault, I hope this will be easy to fix. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209747 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 37 ++++--------------- .../arm64-dagcombiner-dead-indexed-load.ll | 3 ++ 2 files changed, 10 insertions(+), 30 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2d2fd53447ee..c4089446f08d 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -167,7 +167,6 @@ namespace { bool CombineToPreIndexedLoadStore(SDNode *N); bool CombineToPostIndexedLoadStore(SDNode *N); - SDValue SplitIndexingFromLoad(LoadSDNode *LD); bool SliceUpLoad(SDNode *N); void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); @@ -762,14 +761,10 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { // If the operands of this node are only used by the node, they will now // be dead. Make sure to visit them first to delete dead nodes early. - for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) { - SDNode *Op = TLO.Old.getNode()->getOperand(i).getNode(); - // For an operand generating multiple values, one of the values may - // become dead allowing further simplification (e.g. split index - // arithmetic from an indexed load). - if (Op->hasOneUse() || Op->getNumValues() > 1) - AddToWorkList(Op); - } + for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) + if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse()) + AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode()); + DAG.DeleteNode(TLO.Old.getNode()); } } @@ -7849,17 +7844,6 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { return false; } -/// \brief Return the base-pointer arithmetic from an indexed \p LD. -SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { - ISD::MemIndexedMode AM = LD->getAddressingMode(); - assert(AM != ISD::UNINDEXED); - SDValue BP = LD->getOperand(1); - SDValue Inc = LD->getOperand(2); - unsigned Opc = - (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB); - return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc); -} - SDValue DAGCombiner::visitLOAD(SDNode *N) { LoadSDNode *LD = cast(N); SDValue Chain = LD->getChain(); @@ -7896,16 +7880,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { } else { // Indexed loads. assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); - if (!N->hasAnyUseOfValue(0)) { + if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) { SDValue Undef = DAG.getUNDEF(N->getValueType(0)); - SDValue Index; - if (N->hasAnyUseOfValue(1)) { - Index = SplitIndexingFromLoad(LD); - // Try to fold the base pointer arithmetic into subsequent loads and - // stores. - AddUsersToWorkList(N); - } else - Index = DAG.getUNDEF(N->getValueType(1)); DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG); dbgs() << "\nWith: "; @@ -7913,7 +7889,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { dbgs() << " and 2 other values\n"); WorkListRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), + DAG.getUNDEF(N->getValueType(1))); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); removeFromWorkList(N); DAG.DeleteNode(N); diff --git a/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll b/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll index 2cf01357324b..6eed48bf62e3 100644 --- a/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll +++ b/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll @@ -1,5 +1,8 @@ ; RUN: llc -mcpu=cyclone < %s | FileCheck %s +; r208640 broke ppc64/Linux self-hosting; xfailing while this is worked on. +; XFAIL: * + target datalayout = "e-i64:64-n32:64-S128" target triple = "arm64-apple-ios" From a5423f2598c8579fd5438f71048467db9d13abcd Mon Sep 17 00:00:00 2001 From: Louis Gerbarg Date: Wed, 28 May 2014 17:38:31 +0000 Subject: [PATCH 212/906] Add support for combining GEPs across PHI nodes Currently LLVM will generally merge GEPs. This allows backends to use more complex addressing modes. In some cases this is not happening because there is PHI inbetween the two GEPs: GEP1--\ |-->PHI1-->GEP3 GEP2--/ This patch checks to see if GEP1 and GEP2 are similiar enough that they can be cloned (GEP12) in GEP3's BB, allowing GEP->GEP merging (GEP123): GEP1--\ --\ --\ |-->PHI1-->GEP3 ==> |-->PHI2->GEP12->GEP3 == > |-->PHI2->GEP123 GEP2--/ --/ --/ This also breaks certain use chains that are preventing GEP->GEP merges that the the existing instcombine would merge otherwise. Tests included. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209755 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstructionCombining.cpp | 79 +++++++++++++++++++ test/Transforms/InstCombine/gepphigep.ll | 56 +++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 test/Transforms/InstCombine/gepphigep.ll diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 4c36887f6285..80eec1b311bf 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1220,6 +1220,85 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (MadeChange) return &GEP; } + // Check to see if the inputs to the PHI node are getelementptr instructions. + if (PHINode *PN = dyn_cast(PtrOp)) { + GetElementPtrInst *Op1 = dyn_cast(PN->getOperand(0)); + if (!Op1) + return nullptr; + + signed DI = -1; + + for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) { + GetElementPtrInst *Op2 = dyn_cast(*I); + if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands()) + return nullptr; + + for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) { + if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType()) + return nullptr; + + if (Op1->getOperand(J) != Op2->getOperand(J)) { + if (DI == -1) { + // We have not seen any differences yet in the GEPs feeding the + // PHI yet, so we record this one if it is allowed to be a + // variable. + + // The first two arguments can vary for any GEP, the rest have to be + // static for struct slots + if (J > 1) { + SmallVector Idxs(GEP.idx_begin(), GEP.idx_begin()+J); + Type *Ty = + GetElementPtrInst::getIndexedType(Op1->getOperand(0)->getType(), + Idxs); + if (Ty->isStructTy()) + return nullptr; + } + + DI = J; + } else { + // The GEP is different by more than one input. While this could be + // extended to support GEPs that vary by more than one variable it + // doesn't make sense since it greatly increases the complexity and + // would result in an R+R+R addressing mode which no backend + // directly supports and would need to be broken into several + // simpler instructions anyway. + return nullptr; + } + } + } + } + + GetElementPtrInst *NewGEP = cast(Op1->clone()); + + if (DI == -1) { + // All the GEPs feeding the PHI are identical. Clone one down into our + // BB so that it can be merged with the current GEP. + GEP.getParent()->getInstList().insert(GEP.getParent()->getFirstNonPHI(), + NewGEP); + } else { + // All the GEPs feeding the PHI differ at a single offset. Clone a GEP + // into the current block so it can be merged, and create a new PHI to + // set that index. + Instruction *InsertPt = Builder->GetInsertPoint(); + Builder->SetInsertPoint(PN); + PHINode *NewPN = Builder->CreatePHI(Op1->getOperand(DI)->getType(), + PN->getNumOperands()); + Builder->SetInsertPoint(InsertPt); + + for (auto &I : PN->operands()) + NewPN->addIncoming(cast(I)->getOperand(DI), + PN->getIncomingBlock(I)); + + NewGEP->setOperand(DI, NewPN); + GEP.getParent()->getInstList().insert(GEP.getParent()->getFirstNonPHI(), + NewGEP); + NewGEP->setOperand(DI, NewPN); + } + + GEP.setOperand(0, NewGEP); + PtrOp = NewGEP; + } + // Combine Indices - If the source pointer to this getelementptr instruction // is a getelementptr instruction, combine the indices of the two // getelementptr instructions into a single instruction. diff --git a/test/Transforms/InstCombine/gepphigep.ll b/test/Transforms/InstCombine/gepphigep.ll new file mode 100644 index 000000000000..9aab609901e2 --- /dev/null +++ b/test/Transforms/InstCombine/gepphigep.ll @@ -0,0 +1,56 @@ +; RUN: opt -instcombine -S < %s | FileCheck %s + +%struct1 = type { %struct2*, i32, i32, i32 } +%struct2 = type { i32, i32 } + +define i32 @test1(%struct1* %dm, i1 %tmp4, i64 %tmp9, i64 %tmp19) { +bb: + %tmp = getelementptr inbounds %struct1* %dm, i64 0, i32 0 + %tmp1 = load %struct2** %tmp, align 8 + br i1 %tmp4, label %bb1, label %bb2 + +bb1: + %tmp10 = getelementptr inbounds %struct2* %tmp1, i64 %tmp9 + %tmp11 = getelementptr inbounds %struct2* %tmp10, i64 0, i32 0 + store i32 0, i32* %tmp11, align 4 + br label %bb3 + +bb2: + %tmp20 = getelementptr inbounds %struct2* %tmp1, i64 %tmp19 + %tmp21 = getelementptr inbounds %struct2* %tmp20, i64 0, i32 0 + store i32 0, i32* %tmp21, align 4 + br label %bb3 + +bb3: + %phi = phi %struct2* [ %tmp10, %bb1 ], [ %tmp20, %bb2 ] + %tmp24 = getelementptr inbounds %struct2* %phi, i64 0, i32 1 + %tmp25 = load i32* %tmp24, align 4 + ret i32 %tmp25 + +; CHECK-LABEL: @test1( +; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp9, i32 0 +; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp19, i32 0 +; CHECK: %[[PHI:[0-9A-Za-z]+]] = phi i64 [ %tmp9, %bb1 ], [ %tmp19, %bb2 ] +; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %[[PHI]], i32 1 + +} + +define i32 @test2(%struct1* %dm, i1 %tmp4, i64 %tmp9, i64 %tmp19) { +bb: + %tmp = getelementptr inbounds %struct1* %dm, i64 0, i32 0 + %tmp1 = load %struct2** %tmp, align 8 + %tmp10 = getelementptr inbounds %struct2* %tmp1, i64 %tmp9 + %tmp11 = getelementptr inbounds %struct2* %tmp10, i64 0, i32 0 + store i32 0, i32* %tmp11, align 4 + %tmp20 = getelementptr inbounds %struct2* %tmp1, i64 %tmp19 + %tmp21 = getelementptr inbounds %struct2* %tmp20, i64 0, i32 0 + store i32 0, i32* %tmp21, align 4 + %tmp24 = getelementptr inbounds %struct2* %tmp10, i64 0, i32 1 + %tmp25 = load i32* %tmp24, align 4 + ret i32 %tmp25 + +; CHECK-LABEL: @test2( +; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp9, i32 0 +; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp19, i32 0 +; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp9, i32 1 +} From 665d42accf60bba6444ef7be8cd9e89d7aac177a Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Wed, 28 May 2014 18:15:43 +0000 Subject: [PATCH 213/906] [pr19844] Add thread local mode to aliases. This matches gcc's behavior. It also seems natural given that aliases contain other properties that govern how it is accessed (linkage, visibility, dll storage). Clang still has to be updated to expose this feature to C. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209759 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/LangRef.rst | 51 ++++++++++++------- include/llvm/IR/GlobalValue.h | 33 ++++++++++-- include/llvm/IR/GlobalVariable.h | 21 -------- lib/AsmParser/LLParser.cpp | 48 +++++++++++------ lib/AsmParser/LLParser.h | 6 ++- lib/Bitcode/Reader/BitcodeReader.cpp | 2 + lib/Bitcode/Writer/BitcodeWriter.cpp | 4 +- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 9 +--- lib/IR/AsmWriter.cpp | 1 + lib/IR/Globals.cpp | 6 ++- lib/Target/AArch64/AArch64FastISel.cpp | 12 ++--- lib/Target/PowerPC/PPCFastISel.cpp | 8 +-- lib/Target/TargetMachine.cpp | 15 +++--- lib/Target/X86/X86FastISel.cpp | 13 +---- test/CodeGen/Mips/tls-alias.ll | 2 +- .../X86/2008-03-12-ThreadLocalAlias.ll | 2 +- test/CodeGen/X86/aliases.ll | 18 ++++++- test/Feature/alias2.ll | 3 ++ 18 files changed, 143 insertions(+), 111 deletions(-) diff --git a/docs/LangRef.rst b/docs/LangRef.rst index fa40363a7548..650012e7f96f 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -464,6 +464,34 @@ DLL storage class: exists for defining a dll interface, the compiler, assembler and linker know it is externally referenced and must refrain from deleting the symbol. +.. _tls_model: + +Thread Local Storage Models +--------------------------- + +A variable may be defined as ``thread_local``, which means that it will +not be shared by threads (each thread will have a separated copy of the +variable). Not all targets support thread-local variables. Optionally, a +TLS model may be specified: + +``localdynamic`` + For variables that are only used within the current shared library. +``initialexec`` + For variables in modules that will not be loaded dynamically. +``localexec`` + For variables defined in the executable and only used within it. + +If no explicit model is given, the "general dynamic" model is used. + +The models correspond to the ELF TLS models; see `ELF Handling For +Thread-Local Storage `_ for +more information on under which circumstances the different models may +be used. The target may choose a different TLS model if the specified +model is not supported, or if a better choice of model can be made. + +A model can also be specified in a alias, but then it only governs how +the alias is accessed. It will not have any effect in the aliasee. + .. _namedtypes: Structure Types @@ -497,24 +525,6 @@ to be placed in, and may have an optional explicit alignment specified. Global variables in other translation units can also be declared, in which case they don't have an initializer. -A variable may be defined as ``thread_local``, which means that it will -not be shared by threads (each thread will have a separated copy of the -variable). Not all targets support thread-local variables. Optionally, a -TLS model may be specified: - -``localdynamic`` - For variables that are only used within the current shared library. -``initialexec`` - For variables in modules that will not be loaded dynamically. -``localexec`` - For variables defined in the executable and only used within it. - -The models correspond to the ELF TLS models; see `ELF Handling For -Thread-Local Storage `_ for -more information on under which circumstances the different models may -be used. The target may choose a different TLS model if the specified -model is not supported, or if a better choice of model can be made. - A variable may be defined as a global ``constant``, which indicates that the contents of the variable will **never** be modified (enabling better optimization, allowing the global data to be placed in the read-only @@ -572,6 +582,9 @@ iteration. Globals can also have a :ref:`DLL storage class `. +Variables and aliasaes can have a +:ref:`Thread Local Storage Model `. + Syntax:: [@ =] [Linkage] [Visibility] [DLLStorageClass] [ThreadLocal] @@ -674,7 +687,7 @@ Aliases may have an optional :ref:`linkage type `, an optional Syntax:: - @ = [Visibility] [DLLStorageClass] alias [Linkage] @ + @ = [Visibility] [DLLStorageClass] [ThreadLocal] alias [Linkage] @ The linkage must be one of ``private``, ``internal``, ``linkonce``, ``weak``, ``linkonce_odr``, ``weak_odr``, ``external``. Note that some system linkers diff --git a/include/llvm/IR/GlobalValue.h b/include/llvm/IR/GlobalValue.h index 10df372945a9..04c97a01d667 100644 --- a/include/llvm/IR/GlobalValue.h +++ b/include/llvm/IR/GlobalValue.h @@ -63,7 +63,8 @@ class GlobalValue : public Constant { LinkageTypes Linkage, const Twine &Name) : Constant(Ty, VTy, Ops, NumOps), Linkage(Linkage), Visibility(DefaultVisibility), UnnamedAddr(0), - DllStorageClass(DefaultStorageClass), Parent(nullptr) { + DllStorageClass(DefaultStorageClass), + ThreadLocal(NotThreadLocal), Parent(nullptr) { setName(Name); } @@ -74,21 +75,32 @@ class GlobalValue : public Constant { unsigned UnnamedAddr : 1; // This value's address is not significant unsigned DllStorageClass : 2; // DLL storage class + unsigned ThreadLocal : 3; // Is this symbol "Thread Local", if so, what is + // the desired model? + private: // Give subclasses access to what otherwise would be wasted padding. - // (22 + 2 + 1 + 2 + 5) == 32. - unsigned SubClassData : 22; + // (19 + 3 + 2 + 1 + 2 + 5) == 32. + unsigned SubClassData : 19; protected: unsigned getGlobalValueSubClassData() const { return SubClassData; } void setGlobalValueSubClassData(unsigned V) { - assert(V < (1 << 22) && "It will not fit"); + assert(V < (1 << 19) && "It will not fit"); SubClassData = V; } Module *Parent; // The containing module. public: + enum ThreadLocalMode { + NotThreadLocal = 0, + GeneralDynamicTLSModel, + LocalDynamicTLSModel, + InitialExecTLSModel, + LocalExecTLSModel + }; + ~GlobalValue() { removeDeadConstantUsers(); // remove any dead constants using this. } @@ -110,6 +122,19 @@ class GlobalValue : public Constant { Visibility = V; } + /// If the value is "Thread Local", its value isn't shared by the threads. + bool isThreadLocal() const { return getThreadLocalMode() != NotThreadLocal; } + void setThreadLocal(bool Val) { + setThreadLocalMode(Val ? GeneralDynamicTLSModel : NotThreadLocal); + } + void setThreadLocalMode(ThreadLocalMode Val) { + assert(Val == NotThreadLocal || getValueID() != Value::FunctionVal); + ThreadLocal = Val; + } + ThreadLocalMode getThreadLocalMode() const { + return static_cast(ThreadLocal); + } + DLLStorageClassTypes getDLLStorageClass() const { return DLLStorageClassTypes(DllStorageClass); } diff --git a/include/llvm/IR/GlobalVariable.h b/include/llvm/IR/GlobalVariable.h index 8cd4332b1ad8..4189ccb90a54 100644 --- a/include/llvm/IR/GlobalVariable.h +++ b/include/llvm/IR/GlobalVariable.h @@ -41,9 +41,6 @@ class GlobalVariable : public GlobalObject, public ilist_node { void setParent(Module *parent); bool isConstantGlobal : 1; // Is this a global constant? - unsigned threadLocalMode : 3; // Is this symbol "Thread Local", - // if so, what is the desired - // model? bool isExternallyInitializedConstant : 1; // Is this a global whose value // can change from its initial // value before global @@ -55,14 +52,6 @@ class GlobalVariable : public GlobalObject, public ilist_node { return User::operator new(s, 1); } - enum ThreadLocalMode { - NotThreadLocal = 0, - GeneralDynamicTLSModel, - LocalDynamicTLSModel, - InitialExecTLSModel, - LocalExecTLSModel - }; - /// GlobalVariable ctor - If a parent module is specified, the global is /// automatically inserted into the end of the specified modules global list. GlobalVariable(Type *Ty, bool isConstant, LinkageTypes Linkage, @@ -155,16 +144,6 @@ class GlobalVariable : public GlobalObject, public ilist_node { bool isConstant() const { return isConstantGlobal; } void setConstant(bool Val) { isConstantGlobal = Val; } - /// If the value is "Thread Local", its value isn't shared by the threads. - bool isThreadLocal() const { return threadLocalMode != NotThreadLocal; } - void setThreadLocal(bool Val) { - threadLocalMode = Val ? GeneralDynamicTLSModel : NotThreadLocal; - } - void setThreadLocalMode(ThreadLocalMode Val) { threadLocalMode = Val; } - ThreadLocalMode getThreadLocalMode() const { - return static_cast(threadLocalMode); - } - bool isExternallyInitialized() const { return isExternallyInitializedConstant; } diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 3282e8a23ba7..f0efa9414d6a 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -259,10 +259,13 @@ bool LLParser::ParseTopLevelEntities() { case lltok::kw_extern_weak: // OptionalLinkage case lltok::kw_external: { // OptionalLinkage unsigned Linkage, Visibility, DLLStorageClass; + GlobalVariable::ThreadLocalMode TLM; if (ParseOptionalLinkage(Linkage) || ParseOptionalVisibility(Visibility) || ParseOptionalDLLStorageClass(DLLStorageClass) || - ParseGlobal("", SMLoc(), Linkage, true, Visibility, DLLStorageClass)) + ParseOptionalThreadLocal(TLM) || + ParseGlobal("", SMLoc(), Linkage, true, Visibility, DLLStorageClass, + TLM)) return true; break; } @@ -270,18 +273,28 @@ bool LLParser::ParseTopLevelEntities() { case lltok::kw_hidden: // OptionalVisibility case lltok::kw_protected: { // OptionalVisibility unsigned Visibility, DLLStorageClass; + GlobalVariable::ThreadLocalMode TLM; if (ParseOptionalVisibility(Visibility) || ParseOptionalDLLStorageClass(DLLStorageClass) || - ParseGlobal("", SMLoc(), 0, false, Visibility, DLLStorageClass)) + ParseOptionalThreadLocal(TLM) || + ParseGlobal("", SMLoc(), 0, false, Visibility, DLLStorageClass, TLM)) + return true; + break; + } + + case lltok::kw_thread_local: { // OptionalThreadLocal + GlobalVariable::ThreadLocalMode TLM; + if (ParseOptionalThreadLocal(TLM) || + ParseGlobal("", SMLoc(), 0, false, 0, 0, TLM)) return true; break; } - case lltok::kw_thread_local: // OptionalThreadLocal case lltok::kw_addrspace: // OptionalAddrSpace case lltok::kw_constant: // GlobalType case lltok::kw_global: // GlobalType - if (ParseGlobal("", SMLoc(), 0, false, 0, 0)) return true; + if (ParseGlobal("", SMLoc(), 0, false, 0, 0, GlobalValue::NotThreadLocal)) + return true; break; case lltok::kw_attributes: if (ParseUnnamedAttrGrp()) return true; break; @@ -470,15 +483,17 @@ bool LLParser::ParseUnnamedGlobal() { bool HasLinkage; unsigned Linkage, Visibility, DLLStorageClass; + GlobalVariable::ThreadLocalMode TLM; if (ParseOptionalLinkage(Linkage, HasLinkage) || ParseOptionalVisibility(Visibility) || - ParseOptionalDLLStorageClass(DLLStorageClass)) + ParseOptionalDLLStorageClass(DLLStorageClass) || + ParseOptionalThreadLocal(TLM)) return true; if (HasLinkage || Lex.getKind() != lltok::kw_alias) return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility, - DLLStorageClass); - return ParseAlias(Name, NameLoc, Visibility, DLLStorageClass); + DLLStorageClass, TLM); + return ParseAlias(Name, NameLoc, Visibility, DLLStorageClass, TLM); } /// ParseNamedGlobal: @@ -493,16 +508,18 @@ bool LLParser::ParseNamedGlobal() { bool HasLinkage; unsigned Linkage, Visibility, DLLStorageClass; + GlobalVariable::ThreadLocalMode TLM; if (ParseToken(lltok::equal, "expected '=' in global variable") || ParseOptionalLinkage(Linkage, HasLinkage) || ParseOptionalVisibility(Visibility) || - ParseOptionalDLLStorageClass(DLLStorageClass)) + ParseOptionalDLLStorageClass(DLLStorageClass) || + ParseOptionalThreadLocal(TLM)) return true; if (HasLinkage || Lex.getKind() != lltok::kw_alias) return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility, - DLLStorageClass); - return ParseAlias(Name, NameLoc, Visibility, DLLStorageClass); + DLLStorageClass, TLM); + return ParseAlias(Name, NameLoc, Visibility, DLLStorageClass, TLM); } // MDString: @@ -639,7 +656,8 @@ static bool isValidVisibilityForLinkage(unsigned V, unsigned L) { /// Everything through DLL storage class has already been parsed. /// bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, - unsigned Visibility, unsigned DLLStorageClass) { + unsigned Visibility, unsigned DLLStorageClass, + GlobalVariable::ThreadLocalMode TLM) { assert(Lex.getKind() == lltok::kw_alias); Lex.Lex(); LocTy LinkageLoc = Lex.getLoc(); @@ -699,6 +717,7 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, std::unique_ptr GA( GlobalAlias::create(Ty, AddrSpace, (GlobalValue::LinkageTypes)Linkage, Name, Aliasee, /*Parent*/ nullptr)); + GA->setThreadLocalMode(TLM); GA->setVisibility((GlobalValue::VisibilityTypes)Visibility); GA->setDLLStorageClass((GlobalValue::DLLStorageClassTypes)DLLStorageClass); @@ -753,21 +772,20 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, /// bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, unsigned Linkage, bool HasLinkage, - unsigned Visibility, unsigned DLLStorageClass) { + unsigned Visibility, unsigned DLLStorageClass, + GlobalVariable::ThreadLocalMode TLM) { if (!isValidVisibilityForLinkage(Visibility, Linkage)) return Error(NameLoc, "symbol with local linkage must have default visibility"); unsigned AddrSpace; bool IsConstant, UnnamedAddr, IsExternallyInitialized; - GlobalVariable::ThreadLocalMode TLM; LocTy UnnamedAddrLoc; LocTy IsExternallyInitializedLoc; LocTy TyLoc; Type *Ty = nullptr; - if (ParseOptionalThreadLocal(TLM) || - ParseOptionalAddrSpace(AddrSpace) || + if (ParseOptionalAddrSpace(AddrSpace) || ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr, &UnnamedAddrLoc) || ParseOptionalToken(lltok::kw_externally_initialized, diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index e2bf46290b38..1257b0aadc4a 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -239,9 +239,11 @@ namespace llvm { bool ParseNamedGlobal(); bool ParseGlobal(const std::string &Name, LocTy Loc, unsigned Linkage, bool HasLinkage, unsigned Visibility, - unsigned DLLStorageClass); + unsigned DLLStorageClass, + GlobalVariable::ThreadLocalMode TLM); bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Visibility, - unsigned DLLStorageClass); + unsigned DLLStorageClass, + GlobalVariable::ThreadLocalMode TLM); bool ParseStandaloneMetadata(); bool ParseNamedMetadata(); bool ParseMDString(MDString *&Result); diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 4170f98567c2..a8fd8fab5072 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2017,6 +2017,8 @@ error_code BitcodeReader::ParseModule(bool Resume) { NewGA->setDLLStorageClass(GetDecodedDLLStorageClass(Record[4])); else UpgradeDLLImportExportLinkage(NewGA, Record[2]); + if (Record.size() > 5) + NewGA->setThreadLocalMode(GetDecodedThreadLocalMode(Record[5])); ValueList.push_back(NewGA); AliasInits.push_back(std::make_pair(NewGA, Record[1])); break; diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index cc73b842e338..dddcbc6f7e08 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -511,7 +511,7 @@ static unsigned getEncodedDLLStorageClass(const GlobalValue &GV) { llvm_unreachable("Invalid DLL storage class"); } -static unsigned getEncodedThreadLocalMode(const GlobalVariable &GV) { +static unsigned getEncodedThreadLocalMode(const GlobalValue &GV) { switch (GV.getThreadLocalMode()) { case GlobalVariable::NotThreadLocal: return 0; case GlobalVariable::GeneralDynamicTLSModel: return 1; @@ -668,6 +668,8 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, Vals.push_back(getEncodedLinkage(A)); Vals.push_back(getEncodedVisibility(A)); Vals.push_back(getEncodedDLLStorageClass(A)); + if (A.isThreadLocal()) + Vals.push_back(getEncodedThreadLocalMode(A)); unsigned AbbrevToUse = 0; Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals, AbbrevToUse); Vals.clear(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b1b8035a7d9a..51ae11dea21a 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1190,15 +1190,8 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, if (BitWidth < 64) Offset = SignExtend64(Offset, BitWidth); - const GlobalVariable *GVar = dyn_cast(GV); - if (!GVar) { - // If GV is an alias then use the aliasee for determining thread-localness. - if (const GlobalAlias *GA = dyn_cast(GV)) - GVar = dyn_cast_or_null(GA->getAliasee()); - } - unsigned Opc; - if (GVar && GVar->isThreadLocal()) + if (GV->isThreadLocal()) Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress; else Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress; diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index 0fef0d0a188f..8aee77ac0717 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -1488,6 +1488,7 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) { } PrintVisibility(GA->getVisibility(), Out); PrintDLLStorageClass(GA->getDLLStorageClass(), Out); + PrintThreadLocalModel(GA->getThreadLocalMode(), Out); Out << "alias "; diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp index c905cfe31e2d..344a08d8f338 100644 --- a/lib/IR/Globals.cpp +++ b/lib/IR/Globals.cpp @@ -113,8 +113,9 @@ GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link, : GlobalObject(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal, OperandTraits::op_begin(this), InitVal != nullptr, Link, Name), - isConstantGlobal(constant), threadLocalMode(TLMode), + isConstantGlobal(constant), isExternallyInitializedConstant(isExternallyInitialized) { + setThreadLocalMode(TLMode); if (InitVal) { assert(InitVal->getType() == Ty && "Initializer should be the same type as the GlobalVariable!"); @@ -132,8 +133,9 @@ GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant, : GlobalObject(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal, OperandTraits::op_begin(this), InitVal != nullptr, Link, Name), - isConstantGlobal(constant), threadLocalMode(TLMode), + isConstantGlobal(constant), isExternallyInitializedConstant(isExternallyInitialized) { + setThreadLocalMode(TLMode); if (InitVal) { assert(InitVal->getType() == Ty && "Initializer should be the same type as the GlobalVariable!"); diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index c3b53692fb2a..f97cfb943d7d 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -240,21 +240,15 @@ unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) { } unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) { - // We can't handle thread-local variables quickly yet. Unfortunately we have - // to peer through any aliases to find out if that rule applies. - const GlobalValue *TLSGV = GV; - if (const GlobalAlias *GA = dyn_cast(GV)) - TLSGV = GA->getAliasee(); + // We can't handle thread-local variables quickly yet. + if (GV->isThreadLocal()) + return 0; // MachO still uses GOT for large code-model accesses, but ELF requires // movz/movk sequences, which FastISel doesn't handle yet. if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO()) return 0; - if (const GlobalVariable *GVar = dyn_cast(TLSGV)) - if (GVar->isThreadLocal()) - return 0; - unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); EVT DestEVT = TLI.getValueType(GV->getType(), true); diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index ed3cb4d3293d..f55984ea9dd3 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -1859,15 +1859,9 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { // handle switches; if that changes, we need them as well. For now, // what follows assumes everything's a generic (or TLS) global address. const GlobalVariable *GVar = dyn_cast(GV); - if (!GVar) { - // If GV is an alias, use the aliasee for determining thread-locality. - if (const GlobalAlias *GA = dyn_cast(GV)) - GVar = dyn_cast_or_null(GA->getAliasee()); - } // FIXME: We don't yet handle the complexity of TLS. - bool IsTLS = GVar && GVar->isThreadLocal(); - if (IsTLS) + if (GV->isThreadLocal()) return 0; // For small code model, generate a simple TOC load. diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 8365f64dc54a..95c8cb66f402 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -88,8 +88,8 @@ CodeModel::Model TargetMachine::getCodeModel() const { } /// Get the IR-specified TLS model for Var. -static TLSModel::Model getSelectedTLSModel(const GlobalVariable *Var) { - switch (Var->getThreadLocalMode()) { +static TLSModel::Model getSelectedTLSModel(const GlobalValue *GV) { + switch (GV->getThreadLocalMode()) { case GlobalVariable::NotThreadLocal: llvm_unreachable("getSelectedTLSModel for non-TLS variable"); break; @@ -127,13 +127,10 @@ TLSModel::Model TargetMachine::getTLSModel(const GlobalValue *GV) const { Model = TLSModel::InitialExec; } - const GlobalVariable *Var = dyn_cast(GV); - if (Var) { - // If the user specified a more specific model, use that. - TLSModel::Model SelectedModel = getSelectedTLSModel(Var); - if (SelectedModel > Model) - return SelectedModel; - } + // If the user specified a more specific model, use that. + TLSModel::Model SelectedModel = getSelectedTLSModel(GV); + if (SelectedModel > Model) + return SelectedModel; return Model; } diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 56bcfa30ff90..2ef4bf29ccd1 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -355,17 +355,8 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) { return false; // Can't handle TLS yet. - if (const GlobalVariable *GVar = dyn_cast(GV)) - if (GVar->isThreadLocal()) - return false; - - // Can't handle TLS yet, part 2 (this is slightly crazy, but this is how - // it works...). - if (const GlobalAlias *GA = dyn_cast(GV)) - if (const GlobalVariable *GVar = - dyn_cast_or_null(GA->getAliasee())) - if (GVar->isThreadLocal()) - return false; + if (GV->isThreadLocal()) + return false; // RIP-relative addresses can't have additional register operands, so if // we've already folded stuff into the addressing mode, just force the diff --git a/test/CodeGen/Mips/tls-alias.ll b/test/CodeGen/Mips/tls-alias.ll index 80fbe87a8d61..b61f84e03761 100644 --- a/test/CodeGen/Mips/tls-alias.ll +++ b/test/CodeGen/Mips/tls-alias.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=mipsel -relocation-model=pic -disable-mips-delay-filler < %s | FileCheck %s @foo = thread_local global i32 42 -@bar = hidden alias i32* @foo +@bar = hidden thread_local alias i32* @foo define i32* @zed() { ; CHECK-DAG: __tls_get_addr diff --git a/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll b/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll index e64375a2b361..a0106d7798d5 100644 --- a/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll +++ b/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll @@ -8,7 +8,7 @@ target triple = "i386-pc-linux-gnu" @__resp = thread_local global %struct.__res_state* @_res ; <%struct.__res_state**> [#uses=1] @_res = global %struct.__res_state zeroinitializer, section ".bss" ; <%struct.__res_state*> [#uses=1] -@__libc_resp = hidden alias %struct.__res_state** @__resp ; <%struct.__res_state**> [#uses=2] +@__libc_resp = hidden thread_local alias %struct.__res_state** @__resp ; <%struct.__res_state**> [#uses=2] define i32 @foo() { ; CHECK-LABEL: foo: diff --git a/test/CodeGen/X86/aliases.ll b/test/CodeGen/X86/aliases.ll index 8487c6082b72..d207880d6603 100644 --- a/test/CodeGen/X86/aliases.ll +++ b/test/CodeGen/X86/aliases.ll @@ -1,4 +1,20 @@ -; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=false | FileCheck %s +; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=false \ +; RUN: -relocation-model=pic | FileCheck %s + +@thread_var = thread_local global i32 42, align 4 +@thread_alias = thread_local(localdynamic) alias i32* @thread_var + +; CHECK-LABEL: get_thread_var +define i32* @get_thread_var() { +; CHECK: leal thread_var@TLSGD + ret i32* @thread_var +} + +; CHECK-LABEL: get_thread_alias +define i32* @get_thread_alias() { +; CHECK: leal thread_alias@TLSLD + ret i32* @thread_alias +} @bar = global i32 42 diff --git a/test/Feature/alias2.ll b/test/Feature/alias2.ll index 693ef7c9bef9..4334a6201710 100644 --- a/test/Feature/alias2.ll +++ b/test/Feature/alias2.ll @@ -17,3 +17,6 @@ @v6 = alias i16, i32* @v1 ; CHECK: @v6 = alias i16, i32* @v1 + +@v7 = thread_local(localdynamic) alias i32* @v1 +; CHECK: @v7 = thread_local(localdynamic) alias i32* @v1 From 296f55f618491316b36eba75e5127b00e56e0cd1 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Wed, 28 May 2014 18:19:55 +0000 Subject: [PATCH 214/906] Fix standard integer definitions for MSVC in DataTypes.h Previously, DataTypes.h would #define a variety of symbols any time they weren't already defined. However, some versions of Visual Studio do provide the appropriate headers, so if those headers are included after DataTypes.h, it can lead to macro redefinition warnings. The fix is to include the appropriate headers if they exist, and only #define the symbols if the required header does not exist. Patch by Zachary Turner! --- The big change here is that we no longer have our own stdint.h typedefs because now all supported toolchains have stdint.h. Hooray! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209760 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/DataTypes.h.cmake | 121 +++++-------------------- include/llvm/Support/DataTypes.h.in | 119 +++++------------------- 2 files changed, 46 insertions(+), 194 deletions(-) diff --git a/include/llvm/Support/DataTypes.h.cmake b/include/llvm/Support/DataTypes.h.cmake index a26070cdc9fe..1f0c8eba5e11 100644 --- a/include/llvm/Support/DataTypes.h.cmake +++ b/include/llvm/Support/DataTypes.h.cmake @@ -37,6 +37,16 @@ #include #endif +#ifdef HAVE_INTTYPES_H +#include +#endif + +#ifdef HAVE_STDINT_H +#include +#else +#error "Compiler must provide an implementation of stdint.h" +#endif + #ifndef _MSC_VER /* Note that this header's correct operation depends on __STDC_LIMIT_MACROS @@ -55,14 +65,6 @@ /* Note that includes , if this is a C99 system. */ #include -#ifdef HAVE_INTTYPES_H -#include -#endif - -#ifdef HAVE_STDINT_H -#include -#endif - #ifdef _AIX #include "llvm/Support/AIXDataTypesFix.h" #endif @@ -77,11 +79,6 @@ typedef u_int64_t uint64_t; #endif #else /* _MSC_VER */ -/* Visual C++ doesn't provide standard integer headers, but it does provide - built-in data types. */ -#ifdef HAVE_STDINT_H -#include -#endif #include #include #include @@ -90,93 +87,21 @@ typedef u_int64_t uint64_t; #else #include #endif -typedef __int64 int64_t; -typedef unsigned __int64 uint64_t; -typedef signed int int32_t; -typedef unsigned int uint32_t; -typedef short int16_t; -typedef unsigned short uint16_t; -typedef signed char int8_t; -typedef unsigned char uint8_t; + #if defined(_WIN64) - typedef signed __int64 ssize_t; +typedef signed __int64 ssize_t; #else - typedef signed int ssize_t; -#endif -#ifndef INT8_MAX -# define INT8_MAX 127 -#endif -#ifndef INT8_MIN -# define INT8_MIN -128 -#endif -#ifndef UINT8_MAX -# define UINT8_MAX 255 -#endif -#ifndef INT16_MAX -# define INT16_MAX 32767 -#endif -#ifndef INT16_MIN -# define INT16_MIN -32768 -#endif -#ifndef UINT16_MAX -# define UINT16_MAX 65535 -#endif -#ifndef INT32_MAX -# define INT32_MAX 2147483647 -#endif -#ifndef INT32_MIN -/* MSC treats -2147483648 as -(2147483648U). */ -# define INT32_MIN (-INT32_MAX - 1) -#endif -#ifndef UINT32_MAX -# define UINT32_MAX 4294967295U -#endif -/* Certain compatibility updates to VC++ introduce the `cstdint' - * header, which defines the INT*_C macros. On default installs they - * are absent. */ -#ifndef INT8_C -# define INT8_C(C) C##i8 -#endif -#ifndef UINT8_C -# define UINT8_C(C) C##ui8 -#endif -#ifndef INT16_C -# define INT16_C(C) C##i16 -#endif -#ifndef UINT16_C -# define UINT16_C(C) C##ui16 -#endif -#ifndef INT32_C -# define INT32_C(C) C##i32 -#endif -#ifndef UINT32_C -# define UINT32_C(C) C##ui32 -#endif -#ifndef INT64_C -# define INT64_C(C) C##i64 -#endif -#ifndef UINT64_C -# define UINT64_C(C) C##ui64 -#endif - -#ifndef PRId64 -# define PRId64 "I64d" -#endif -#ifndef PRIi64 -# define PRIi64 "I64i" -#endif -#ifndef PRIo64 -# define PRIo64 "I64o" -#endif -#ifndef PRIu64 -# define PRIu64 "I64u" -#endif -#ifndef PRIx64 -# define PRIx64 "I64x" -#endif -#ifndef PRIX64 -# define PRIX64 "I64X" -#endif +typedef signed int ssize_t; +#endif /* _WIN64 */ + +#ifndef HAVE_INTTYPES_H +#define PRId64 "I64d" +#define PRIi64 "I64i" +#define PRIo64 "I64o" +#define PRIu64 "I64u" +#define PRIx64 "I64x" +#define PRIX64 "I64X" +#endif /* HAVE_INTTYPES_H */ #endif /* _MSC_VER */ diff --git a/include/llvm/Support/DataTypes.h.in b/include/llvm/Support/DataTypes.h.in index 7fc9b725244e..09cfcdf3b56b 100644 --- a/include/llvm/Support/DataTypes.h.in +++ b/include/llvm/Support/DataTypes.h.in @@ -37,6 +37,16 @@ #include #endif +#ifdef HAVE_INTTYPES_H +#include +#endif + +#ifdef HAVE_STDINT_H +#include +#else +#error "Compiler must provide an implementation of stdint.h" +#endif + #ifndef _MSC_VER /* Note that this header's correct operation depends on __STDC_LIMIT_MACROS @@ -55,14 +65,6 @@ /* Note that includes , if this is a C99 system. */ #include -#ifdef HAVE_INTTYPES_H -#include -#endif - -#ifdef HAVE_STDINT_H -#include -#endif - #ifdef _AIX #include "llvm/Support/AIXDataTypesFix.h" #endif @@ -77,8 +79,6 @@ typedef u_int64_t uint64_t; #endif #else /* _MSC_VER */ -/* Visual C++ doesn't provide standard integer headers, but it does provide - built-in data types. */ #include #include #include @@ -87,94 +87,21 @@ typedef u_int64_t uint64_t; #else #include #endif -typedef __int64 int64_t; -typedef unsigned __int64 uint64_t; -typedef signed int int32_t; -typedef unsigned int uint32_t; -typedef short int16_t; -typedef unsigned short uint16_t; -typedef signed char int8_t; -typedef unsigned char uint8_t; + #if defined(_WIN64) - typedef signed __int64 ssize_t; +typedef signed __int64 ssize_t; #else - typedef signed int ssize_t; -#endif - -#ifndef INT8_MAX -# define INT8_MAX 127 -#endif -#ifndef INT8_MIN -# define INT8_MIN -128 -#endif -#ifndef UINT8_MAX -# define UINT8_MAX 255 -#endif -#ifndef INT16_MAX -# define INT16_MAX 32767 -#endif -#ifndef INT16_MIN -# define INT16_MIN -32768 -#endif -#ifndef UINT16_MAX -# define UINT16_MAX 65535 -#endif -#ifndef INT32_MAX -# define INT32_MAX 2147483647 -#endif -#ifndef INT32_MIN -/* MSC treats -2147483648 as -(2147483648U). */ -# define INT32_MIN (-INT32_MAX - 1) -#endif -#ifndef UINT32_MAX -# define UINT32_MAX 4294967295U -#endif -/* Certain compatibility updates to VC++ introduce the `cstdint' - * header, which defines the INT*_C macros. On default installs they - * are absent. */ -#ifndef INT8_C -# define INT8_C(C) C##i8 -#endif -#ifndef UINT8_C -# define UINT8_C(C) C##ui8 -#endif -#ifndef INT16_C -# define INT16_C(C) C##i16 -#endif -#ifndef UINT16_C -# define UINT16_C(C) C##ui16 -#endif -#ifndef INT32_C -# define INT32_C(C) C##i32 -#endif -#ifndef UINT32_C -# define UINT32_C(C) C##ui32 -#endif -#ifndef INT64_C -# define INT64_C(C) C##i64 -#endif -#ifndef UINT64_C -# define UINT64_C(C) C##ui64 -#endif - -#ifndef PRId64 -# define PRId64 "I64d" -#endif -#ifndef PRIi64 -# define PRIi64 "I64i" -#endif -#ifndef PRIo64 -# define PRIo64 "I64o" -#endif -#ifndef PRIu64 -# define PRIu64 "I64u" -#endif -#ifndef PRIx64 -# define PRIx64 "I64x" -#endif -#ifndef PRIX64 -# define PRIX64 "I64X" -#endif +typedef signed int ssize_t; +#endif /* _WIN64 */ + +#ifndef HAVE_INTTYPES_H +#define PRId64 "I64d" +#define PRIi64 "I64i" +#define PRIo64 "I64o" +#define PRIu64 "I64u" +#define PRIx64 "I64x" +#define PRIX64 "I64X" +#endif /* HAVE_INTTYPES_H */ #endif /* _MSC_VER */ From e04c0e3f8d7ae06db85079948745a855f299ba14 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Wed, 28 May 2014 18:48:10 +0000 Subject: [PATCH 215/906] Revert "InstCombine: Improvement to check if signed addition overflows." This reverts commit r209746. It looks it is causing a crash while building libcxx. I am trying to get a reduced testcase. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209762 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineAddSub.cpp | 50 ++--------------- test/Transforms/InstCombine/AddOverflow.ll | 56 ------------------- 2 files changed, 6 insertions(+), 100 deletions(-) delete mode 100644 test/Transforms/InstCombine/AddOverflow.ll diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index eca4e4a78702..c37a9cf2ef9f 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -889,34 +889,11 @@ static inline Value *dyn_castFoldableMul(Value *V, Constant *&CST) { return nullptr; } -// If one of the operands only has one non-zero bit, and if the other -// operand has a known-zero bit in a more significant place than it (not -// including the sign bit) the ripple may go up to and fill the zero, but -// won't change the sign. For example, (X & ~4) + 1. -// FIXME: Handle case where LHS has a zero before the 1 in the RHS, but also -// has one after. -static bool CheckRippleForAdd(APInt Op0KnownZero, APInt Op0KnownOne, - APInt Op1KnownZero, APInt Op1KnownOne) { - // Make sure that one of the operand has only one bit set to 1 and all other - // bit set to 0. - if ((~Op1KnownZero).countPopulation() == 1) { - int BitWidth = Op0KnownZero.getBitWidth(); - // Ignore Sign Bit. - Op0KnownZero.clearBit(BitWidth - 1); - int Op1OnePosition = BitWidth - Op1KnownOne.countLeadingZeros() - 1; - int Op0ZeroPosition = BitWidth - Op0KnownZero.countLeadingZeros() - 1; - if ((Op0ZeroPosition != (BitWidth - 1)) && - (Op0ZeroPosition >= Op1OnePosition)) - return true; - } - return false; -} /// WillNotOverflowSignedAdd - Return true if we can prove that: /// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS)) /// This basically requires proving that the add in the original type would not /// overflow to change the sign bit or have a carry out. -/// TODO: Handle this for Vectors. bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { // There are different heuristics we can use for this. Here are some simple // ones. @@ -928,29 +905,14 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1) return true; - if (IntegerType *IT = dyn_cast(LHS->getType())) { - int BitWidth = IT->getBitWidth(); - APInt LHSKnownZero(BitWidth, 0, /*isSigned*/ true); - APInt LHSKnownOne(BitWidth, 0, /*isSigned*/ true); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); + // If one of the operands only has one non-zero bit, and if the other operand + // has a known-zero bit in a more significant place than it (not including the + // sign bit) the ripple may go up to and fill the zero, but won't change the + // sign. For example, (X & ~4) + 1. + + // TODO: Implement. - APInt RHSKnownZero(BitWidth, 0, /*isSigned*/ true); - APInt RHSKnownOne(BitWidth, 0, /*isSigned*/ true); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); - - // Addition of two 2's compliment numbers having opposite signs will never - // overflow. - if ((LHSKnownOne[BitWidth - 1] && RHSKnownZero[BitWidth - 1]) || - (LHSKnownZero[BitWidth - 1] && RHSKnownOne[BitWidth - 1])) - return true; - - // Check if carry bit of addition will not cause overflow. - if (CheckRippleForAdd(LHSKnownZero, LHSKnownOne, RHSKnownZero, RHSKnownOne)) - return true; - if (CheckRippleForAdd(RHSKnownZero, RHSKnownOne, LHSKnownZero, LHSKnownOne)) - return true; - } return false; } diff --git a/test/Transforms/InstCombine/AddOverflow.ll b/test/Transforms/InstCombine/AddOverflow.ll deleted file mode 100644 index 1bbd1fc59a5d..000000000000 --- a/test/Transforms/InstCombine/AddOverflow.ll +++ /dev/null @@ -1,56 +0,0 @@ -; RUN: opt < %s -instcombine -S | FileCheck %s - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; CHECK-LABEL: @ripple( -; CHECK: add nsw i16 %tmp1, 1 -define i32 @ripple(i16 signext %x) { -bb: - %tmp = sext i16 %x to i32 - %tmp1 = and i32 %tmp, -5 - %tmp2 = trunc i32 %tmp1 to i16 - %tmp3 = sext i16 %tmp2 to i32 - %tmp4 = add i32 %tmp3, 1 - ret i32 %tmp4 -} - -; CHECK-LABEL: @ripplenot( -; CHECK: add i32 %tmp3, 4 -define i32 @ripplenot(i16 signext %x) { -bb: - %tmp = sext i16 %x to i32 - %tmp1 = and i32 %tmp, -3 - %tmp2 = trunc i32 %tmp1 to i16 - %tmp3 = sext i16 %tmp2 to i32 - %tmp4 = add i32 %tmp3, 4 - ret i32 %tmp4 -} - -; CHECK-LABEL: @oppositesign( -; CHECK: add nsw i16 %tmp1, 4 -define i32 @oppositesign(i16 signext %x) { -bb: - %tmp = sext i16 %x to i32 - %tmp1 = or i32 %tmp, 32768 - %tmp2 = trunc i32 %tmp1 to i16 - %tmp3 = sext i16 %tmp2 to i32 - %tmp4 = add i32 %tmp3, 4 - ret i32 %tmp4 -} - -; CHECK-LABEL: @ripplenot_var( -; CHECK: add i32 %tmp6, %tmp7 -define i32 @ripplenot_var(i16 signext %x, i16 signext %y) { -bb: - %tmp = sext i16 %x to i32 - %tmp1 = and i32 %tmp, -5 - %tmp2 = trunc i32 %tmp1 to i16 - %tmp3 = sext i16 %y to i32 - %tmp4 = or i32 %tmp3, 2 - %tmp5 = trunc i32 %tmp4 to i16 - %tmp6 = sext i16 %tmp5 to i32 - %tmp7 = sext i16 %tmp2 to i32 - %tmp8 = add i32 %tmp6, %tmp7 - ret i32 %tmp8 -} From f558122fe5461652e960403592aa25c4cb9def7f Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 28 May 2014 19:03:33 +0000 Subject: [PATCH 216/906] test check-in: added missing parenthesis in comment git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209763 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolutionNormalization.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp index e9db295a4acf..3ccefb01101d 100644 --- a/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -241,7 +241,7 @@ TransformSubExpr(const SCEV *S, Instruction *User, Value *OperandValToReplace) { } /// Top level driver for transforming an expression DAG into its requested -/// post-inc form (either "Normalized" or "Denormalized". +/// post-inc form (either "Normalized" or "Denormalized"). const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, const SCEV *S, Instruction *User, From 913982386604ffdf70efa903b57588b505a8b269 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 28 May 2014 20:07:37 +0000 Subject: [PATCH 217/906] fixed a few typos git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209768 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/CMake.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/CMake.rst b/docs/CMake.rst index fed283d3995a..988e12b73502 100644 --- a/docs/CMake.rst +++ b/docs/CMake.rst @@ -132,7 +132,7 @@ write the variable and the type on the CMake command line: Frequently-used CMake variables ------------------------------- -Here are listed some of the CMake variables that are used often, along with a +Here are some of the CMake variables that are used often, along with a brief explanation and LLVM-specific notes. For full documentation, check the CMake docs or execute ``cmake --help-variable VARIABLE_NAME``. @@ -157,8 +157,8 @@ CMake docs or execute ``cmake --help-variable VARIABLE_NAME``. Extra flags to use when compiling C++ source files. **BUILD_SHARED_LIBS**:BOOL - Flag indicating is shared libraries will be built. Its default value is - OFF. Shared libraries are not supported on Windows and not recommended in the + Flag indicating if shared libraries will be built. Its default value is + OFF. Shared libraries are not supported on Windows and not recommended on the other OSes. .. _LLVM-specific variables: From ca9f4b06350615e96b42c64007dbd29c15894083 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Wed, 28 May 2014 20:31:52 +0000 Subject: [PATCH 218/906] Update CREDITS to be at least moderately more current. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209770 91177308-0d34-0410-b5e6-96231b3b80d8 --- CREDITS.TXT | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CREDITS.TXT b/CREDITS.TXT index 311a661a7546..2b8b115516a9 100644 --- a/CREDITS.TXT +++ b/CREDITS.TXT @@ -162,10 +162,12 @@ D: Improvements for space efficiency N: James Grosbach E: grosbach@apple.com +I: grosbach D: SjLj exception handling support D: General fixes and improvements for the ARM back-end D: MCJIT D: ARM integrated assembler and assembly parser +D: Led effort for the backend formerly known as ARM64 N: Lang Hames E: lhames@gmail.com From e8d18694cbee58ffd7137014e89200fc424e3b78 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 28 May 2014 20:51:42 +0000 Subject: [PATCH 219/906] Fix wrong setcc result type when legalizing uaddo/usubo No test because no in-tree targets change the bitwidth of the setcc type depending on the bitwidth of the compared type. Patch by Ke Bai git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209771 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index a59e8954eb7c..accdd0a5e800 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3653,7 +3653,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { ISD::ADD : ISD::SUB, dl, LHS.getValueType(), LHS, RHS); Results.push_back(Sum); - EVT OType = Node->getValueType(1); + EVT ResultType = Node->getValueType(1); + EVT OType = getSetCCResultType(Node->getValueType(0)); SDValue Zero = DAG.getConstant(0, LHS.getValueType()); @@ -3676,7 +3677,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); - Results.push_back(Cmp); + Results.push_back(DAG.getBoolExtOrTrunc(Cmp, dl, ResultType)); break; } case ISD::UADDO: @@ -3687,9 +3688,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { ISD::ADD : ISD::SUB, dl, LHS.getValueType(), LHS, RHS); Results.push_back(Sum); - Results.push_back(DAG.getSetCC(dl, Node->getValueType(1), Sum, LHS, - Node->getOpcode () == ISD::UADDO ? - ISD::SETULT : ISD::SETUGT)); + + EVT ResultType = Node->getValueType(1); + EVT SetCCType = getSetCCResultType(Node->getValueType(0)); + ISD::CondCode CC + = Node->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT; + SDValue SetCC = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC); + + Results.push_back(DAG.getBoolExtOrTrunc(SetCC, dl, ResultType)); break; } case ISD::UMULO: From 836475b2e2b45d34bcc558497f47a87585157254 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Wed, 28 May 2014 21:41:21 +0000 Subject: [PATCH 220/906] Revert "Add support for combining GEPs across PHI nodes" This reverts commit r209755. it was the real cause of the libc++ build failure. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209775 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstructionCombining.cpp | 79 ------------------- test/Transforms/InstCombine/gepphigep.ll | 56 ------------- 2 files changed, 135 deletions(-) delete mode 100644 test/Transforms/InstCombine/gepphigep.ll diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 80eec1b311bf..4c36887f6285 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1220,85 +1220,6 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (MadeChange) return &GEP; } - // Check to see if the inputs to the PHI node are getelementptr instructions. - if (PHINode *PN = dyn_cast(PtrOp)) { - GetElementPtrInst *Op1 = dyn_cast(PN->getOperand(0)); - if (!Op1) - return nullptr; - - signed DI = -1; - - for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) { - GetElementPtrInst *Op2 = dyn_cast(*I); - if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands()) - return nullptr; - - for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) { - if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType()) - return nullptr; - - if (Op1->getOperand(J) != Op2->getOperand(J)) { - if (DI == -1) { - // We have not seen any differences yet in the GEPs feeding the - // PHI yet, so we record this one if it is allowed to be a - // variable. - - // The first two arguments can vary for any GEP, the rest have to be - // static for struct slots - if (J > 1) { - SmallVector Idxs(GEP.idx_begin(), GEP.idx_begin()+J); - Type *Ty = - GetElementPtrInst::getIndexedType(Op1->getOperand(0)->getType(), - Idxs); - if (Ty->isStructTy()) - return nullptr; - } - - DI = J; - } else { - // The GEP is different by more than one input. While this could be - // extended to support GEPs that vary by more than one variable it - // doesn't make sense since it greatly increases the complexity and - // would result in an R+R+R addressing mode which no backend - // directly supports and would need to be broken into several - // simpler instructions anyway. - return nullptr; - } - } - } - } - - GetElementPtrInst *NewGEP = cast(Op1->clone()); - - if (DI == -1) { - // All the GEPs feeding the PHI are identical. Clone one down into our - // BB so that it can be merged with the current GEP. - GEP.getParent()->getInstList().insert(GEP.getParent()->getFirstNonPHI(), - NewGEP); - } else { - // All the GEPs feeding the PHI differ at a single offset. Clone a GEP - // into the current block so it can be merged, and create a new PHI to - // set that index. - Instruction *InsertPt = Builder->GetInsertPoint(); - Builder->SetInsertPoint(PN); - PHINode *NewPN = Builder->CreatePHI(Op1->getOperand(DI)->getType(), - PN->getNumOperands()); - Builder->SetInsertPoint(InsertPt); - - for (auto &I : PN->operands()) - NewPN->addIncoming(cast(I)->getOperand(DI), - PN->getIncomingBlock(I)); - - NewGEP->setOperand(DI, NewPN); - GEP.getParent()->getInstList().insert(GEP.getParent()->getFirstNonPHI(), - NewGEP); - NewGEP->setOperand(DI, NewPN); - } - - GEP.setOperand(0, NewGEP); - PtrOp = NewGEP; - } - // Combine Indices - If the source pointer to this getelementptr instruction // is a getelementptr instruction, combine the indices of the two // getelementptr instructions into a single instruction. diff --git a/test/Transforms/InstCombine/gepphigep.ll b/test/Transforms/InstCombine/gepphigep.ll deleted file mode 100644 index 9aab609901e2..000000000000 --- a/test/Transforms/InstCombine/gepphigep.ll +++ /dev/null @@ -1,56 +0,0 @@ -; RUN: opt -instcombine -S < %s | FileCheck %s - -%struct1 = type { %struct2*, i32, i32, i32 } -%struct2 = type { i32, i32 } - -define i32 @test1(%struct1* %dm, i1 %tmp4, i64 %tmp9, i64 %tmp19) { -bb: - %tmp = getelementptr inbounds %struct1* %dm, i64 0, i32 0 - %tmp1 = load %struct2** %tmp, align 8 - br i1 %tmp4, label %bb1, label %bb2 - -bb1: - %tmp10 = getelementptr inbounds %struct2* %tmp1, i64 %tmp9 - %tmp11 = getelementptr inbounds %struct2* %tmp10, i64 0, i32 0 - store i32 0, i32* %tmp11, align 4 - br label %bb3 - -bb2: - %tmp20 = getelementptr inbounds %struct2* %tmp1, i64 %tmp19 - %tmp21 = getelementptr inbounds %struct2* %tmp20, i64 0, i32 0 - store i32 0, i32* %tmp21, align 4 - br label %bb3 - -bb3: - %phi = phi %struct2* [ %tmp10, %bb1 ], [ %tmp20, %bb2 ] - %tmp24 = getelementptr inbounds %struct2* %phi, i64 0, i32 1 - %tmp25 = load i32* %tmp24, align 4 - ret i32 %tmp25 - -; CHECK-LABEL: @test1( -; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp9, i32 0 -; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp19, i32 0 -; CHECK: %[[PHI:[0-9A-Za-z]+]] = phi i64 [ %tmp9, %bb1 ], [ %tmp19, %bb2 ] -; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %[[PHI]], i32 1 - -} - -define i32 @test2(%struct1* %dm, i1 %tmp4, i64 %tmp9, i64 %tmp19) { -bb: - %tmp = getelementptr inbounds %struct1* %dm, i64 0, i32 0 - %tmp1 = load %struct2** %tmp, align 8 - %tmp10 = getelementptr inbounds %struct2* %tmp1, i64 %tmp9 - %tmp11 = getelementptr inbounds %struct2* %tmp10, i64 0, i32 0 - store i32 0, i32* %tmp11, align 4 - %tmp20 = getelementptr inbounds %struct2* %tmp1, i64 %tmp19 - %tmp21 = getelementptr inbounds %struct2* %tmp20, i64 0, i32 0 - store i32 0, i32* %tmp21, align 4 - %tmp24 = getelementptr inbounds %struct2* %tmp10, i64 0, i32 1 - %tmp25 = load i32* %tmp24, align 4 - ret i32 %tmp25 - -; CHECK-LABEL: @test2( -; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp9, i32 0 -; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp19, i32 0 -; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp9, i32 1 -} From c81cf72ef34bfc846bc9f271f590e8e02e38061b Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Wed, 28 May 2014 21:43:52 +0000 Subject: [PATCH 221/906] Revert "Revert "InstCombine: Improvement to check if signed addition overflows."" This reverts commit r209762, bringing back r209746. It was not responsible for the libc++ build failure git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209776 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineAddSub.cpp | 50 +++++++++++++++-- test/Transforms/InstCombine/AddOverflow.ll | 56 +++++++++++++++++++ 2 files changed, 100 insertions(+), 6 deletions(-) create mode 100644 test/Transforms/InstCombine/AddOverflow.ll diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index c37a9cf2ef9f..eca4e4a78702 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -889,11 +889,34 @@ static inline Value *dyn_castFoldableMul(Value *V, Constant *&CST) { return nullptr; } +// If one of the operands only has one non-zero bit, and if the other +// operand has a known-zero bit in a more significant place than it (not +// including the sign bit) the ripple may go up to and fill the zero, but +// won't change the sign. For example, (X & ~4) + 1. +// FIXME: Handle case where LHS has a zero before the 1 in the RHS, but also +// has one after. +static bool CheckRippleForAdd(APInt Op0KnownZero, APInt Op0KnownOne, + APInt Op1KnownZero, APInt Op1KnownOne) { + // Make sure that one of the operand has only one bit set to 1 and all other + // bit set to 0. + if ((~Op1KnownZero).countPopulation() == 1) { + int BitWidth = Op0KnownZero.getBitWidth(); + // Ignore Sign Bit. + Op0KnownZero.clearBit(BitWidth - 1); + int Op1OnePosition = BitWidth - Op1KnownOne.countLeadingZeros() - 1; + int Op0ZeroPosition = BitWidth - Op0KnownZero.countLeadingZeros() - 1; + if ((Op0ZeroPosition != (BitWidth - 1)) && + (Op0ZeroPosition >= Op1OnePosition)) + return true; + } + return false; +} /// WillNotOverflowSignedAdd - Return true if we can prove that: /// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS)) /// This basically requires proving that the add in the original type would not /// overflow to change the sign bit or have a carry out. +/// TODO: Handle this for Vectors. bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { // There are different heuristics we can use for this. Here are some simple // ones. @@ -905,14 +928,29 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1) return true; + if (IntegerType *IT = dyn_cast(LHS->getType())) { - // If one of the operands only has one non-zero bit, and if the other operand - // has a known-zero bit in a more significant place than it (not including the - // sign bit) the ripple may go up to and fill the zero, but won't change the - // sign. For example, (X & ~4) + 1. - - // TODO: Implement. + int BitWidth = IT->getBitWidth(); + APInt LHSKnownZero(BitWidth, 0, /*isSigned*/ true); + APInt LHSKnownOne(BitWidth, 0, /*isSigned*/ true); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); + APInt RHSKnownZero(BitWidth, 0, /*isSigned*/ true); + APInt RHSKnownOne(BitWidth, 0, /*isSigned*/ true); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); + + // Addition of two 2's compliment numbers having opposite signs will never + // overflow. + if ((LHSKnownOne[BitWidth - 1] && RHSKnownZero[BitWidth - 1]) || + (LHSKnownZero[BitWidth - 1] && RHSKnownOne[BitWidth - 1])) + return true; + + // Check if carry bit of addition will not cause overflow. + if (CheckRippleForAdd(LHSKnownZero, LHSKnownOne, RHSKnownZero, RHSKnownOne)) + return true; + if (CheckRippleForAdd(RHSKnownZero, RHSKnownOne, LHSKnownZero, LHSKnownOne)) + return true; + } return false; } diff --git a/test/Transforms/InstCombine/AddOverflow.ll b/test/Transforms/InstCombine/AddOverflow.ll new file mode 100644 index 000000000000..1bbd1fc59a5d --- /dev/null +++ b/test/Transforms/InstCombine/AddOverflow.ll @@ -0,0 +1,56 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: @ripple( +; CHECK: add nsw i16 %tmp1, 1 +define i32 @ripple(i16 signext %x) { +bb: + %tmp = sext i16 %x to i32 + %tmp1 = and i32 %tmp, -5 + %tmp2 = trunc i32 %tmp1 to i16 + %tmp3 = sext i16 %tmp2 to i32 + %tmp4 = add i32 %tmp3, 1 + ret i32 %tmp4 +} + +; CHECK-LABEL: @ripplenot( +; CHECK: add i32 %tmp3, 4 +define i32 @ripplenot(i16 signext %x) { +bb: + %tmp = sext i16 %x to i32 + %tmp1 = and i32 %tmp, -3 + %tmp2 = trunc i32 %tmp1 to i16 + %tmp3 = sext i16 %tmp2 to i32 + %tmp4 = add i32 %tmp3, 4 + ret i32 %tmp4 +} + +; CHECK-LABEL: @oppositesign( +; CHECK: add nsw i16 %tmp1, 4 +define i32 @oppositesign(i16 signext %x) { +bb: + %tmp = sext i16 %x to i32 + %tmp1 = or i32 %tmp, 32768 + %tmp2 = trunc i32 %tmp1 to i16 + %tmp3 = sext i16 %tmp2 to i32 + %tmp4 = add i32 %tmp3, 4 + ret i32 %tmp4 +} + +; CHECK-LABEL: @ripplenot_var( +; CHECK: add i32 %tmp6, %tmp7 +define i32 @ripplenot_var(i16 signext %x, i16 signext %y) { +bb: + %tmp = sext i16 %x to i32 + %tmp1 = and i32 %tmp, -5 + %tmp2 = trunc i32 %tmp1 to i16 + %tmp3 = sext i16 %y to i32 + %tmp4 = or i32 %tmp3, 2 + %tmp5 = trunc i32 %tmp4 to i16 + %tmp6 = sext i16 %tmp5 to i32 + %tmp7 = sext i16 %tmp2 to i32 + %tmp8 = add i32 %tmp6, %tmp7 + ret i32 %tmp8 +} From 1d4e8baa9c7f9dfc14156a762d2cd0016ceca034 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Wed, 28 May 2014 22:49:12 +0000 Subject: [PATCH 222/906] Add a simple helper function to create a 64-bit integer. Add a function to combine two 32-bit integers into a 64-bit integer. There are no calls to this function yet, although a subsequent change will add some in LLDB. Reviewers: rnk Differential Revision: http://reviews.llvm.org/D3941 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209777 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/MathExtras.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h index f1f7b4feb580..6965faf8df8b 100644 --- a/include/llvm/Support/MathExtras.h +++ b/include/llvm/Support/MathExtras.h @@ -258,6 +258,12 @@ inline uint32_t Lo_32(uint64_t Value) { return static_cast(Value); } +/// Make_64 - This functions makes a 64-bit integer from a high / low pair of +/// 32-bit integers. +inline uint64_t Make_64(uint32_t High, uint32_t Low) { + return ((uint64_t)High << 32) | (uint64_t)Low; +} + /// isInt - Checks if an integer fits into the given bit width. template inline bool isInt(int64_t x) { From b18eabd2d88e95746c83a88d76ab31fed1e2903c Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Thu, 29 May 2014 00:51:15 +0000 Subject: [PATCH 223/906] [ASan] Use llvm.global_ctors to insert init-order checking calls into ASan runtime. Don't assume that dynamically initialized globals are all initialized from _GLOBAL__I_ function. Instead, scan the llvm.global_ctors and insert poison/unpoison calls to each function there. Patch by Nico Weber! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209780 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Instrumentation/AddressSanitizer.cpp | 57 ++++++++----------- .../instrument_initializer_metadata.ll | 2 + 2 files changed, 25 insertions(+), 34 deletions(-) diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index f8cdb9f8f3d8..94050e47da9d 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -391,6 +391,7 @@ class AddressSanitizerModule : public ModulePass { void initializeCallbacks(Module &M); bool ShouldInstrumentGlobal(GlobalVariable *G); + void poisonOneInitializer(Function &GlobalInit, GlobalValue *ModuleName); void createInitializerPoisonCalls(Module &M, GlobalValue *ModuleName); size_t MinRedzoneSizeForGlobal() const { return RedzoneSizeForScale(Mapping.Scale); @@ -851,48 +852,36 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, Crash->setDebugLoc(OrigIns->getDebugLoc()); } -void AddressSanitizerModule::createInitializerPoisonCalls( - Module &M, GlobalValue *ModuleName) { - // We do all of our poisoning and unpoisoning within a global constructor. - // These are called _GLOBAL__(sub_)?I_.*. - // TODO: Consider looking through the functions in - // M.getGlobalVariable("llvm.global_ctors") instead of using this stringly - // typed approach. - Function *GlobalInit = nullptr; - for (auto &F : M.getFunctionList()) { - StringRef FName = F.getName(); - - const char kGlobalPrefix[] = "_GLOBAL__"; - if (!FName.startswith(kGlobalPrefix)) - continue; - FName = FName.substr(strlen(kGlobalPrefix)); - - const char kOptionalSub[] = "sub_"; - if (FName.startswith(kOptionalSub)) - FName = FName.substr(strlen(kOptionalSub)); - - if (FName.startswith("I_")) { - GlobalInit = &F; - break; - } - } - // If that function is not present, this TU contains no globals, or they have - // all been optimized away - if (!GlobalInit) - return; - +void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit, + GlobalValue *ModuleName) { // Set up the arguments to our poison/unpoison functions. - IRBuilder<> IRB(GlobalInit->begin()->getFirstInsertionPt()); + IRBuilder<> IRB(GlobalInit.begin()->getFirstInsertionPt()); // Add a call to poison all external globals before the given function starts. Value *ModuleNameAddr = ConstantExpr::getPointerCast(ModuleName, IntptrTy); IRB.CreateCall(AsanPoisonGlobals, ModuleNameAddr); // Add calls to unpoison all globals before each return instruction. - for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end(); - I != E; ++I) { - if (ReturnInst *RI = dyn_cast(I->getTerminator())) { + for (auto &BB : GlobalInit.getBasicBlockList()) + if (ReturnInst *RI = dyn_cast(BB.getTerminator())) CallInst::Create(AsanUnpoisonGlobals, "", RI); +} + +void AddressSanitizerModule::createInitializerPoisonCalls( + Module &M, GlobalValue *ModuleName) { + GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); + + ConstantArray *CA = cast(GV->getInitializer()); + for (Use &OP : CA->operands()) { + if (isa(OP)) + continue; + ConstantStruct *CS = cast(OP); + + // Must have a function or null ptr. + // (CS->getOperand(0) is the init priority.) + if (Function* F = dyn_cast(CS->getOperand(1))) { + if (F->getName() != kAsanModuleCtorName) + poisonOneInitializer(*F, ModuleName); } } } diff --git a/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll b/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll index 1d00cfacafe4..05e18b5a01bd 100644 --- a/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll +++ b/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll @@ -23,6 +23,8 @@ entry: ret void } +@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }] + define internal void @_GLOBAL__I_a() sanitize_address section ".text.startup" { entry: call void @__cxx_global_var_init() From 257670a79d2e7ab8c90e497bec6c4fa180611468 Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Thu, 29 May 2014 01:10:14 +0000 Subject: [PATCH 224/906] Fix typo in variable name git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209784 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/AddressSanitizer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 94050e47da9d..ae51179a3005 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -70,7 +70,7 @@ static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E; static const char *const kAsanModuleCtorName = "asan.module_ctor"; static const char *const kAsanModuleDtorName = "asan.module_dtor"; -static const int kAsanCtorAndCtorPriority = 1; +static const int kAsanCtorAndDtorPriority = 1; static const char *const kAsanReportErrorTemplate = "__asan_report_"; static const char *const kAsanReportLoadN = "__asan_report_load_n"; static const char *const kAsanReportStoreN = "__asan_report_store_n"; @@ -1138,7 +1138,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) { IRB_Dtor.CreateCall2(AsanUnregisterGlobals, IRB.CreatePointerCast(AllGlobals, IntptrTy), ConstantInt::get(IntptrTy, n)); - appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndCtorPriority); + appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority); DEBUG(dbgs() << M); return true; @@ -1227,7 +1227,7 @@ bool AddressSanitizer::doInitialization(Module &M) { Mapping = getShadowMapping(M, LongSize); - appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority); + appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority); return true; } From bf933548ba94e8029aa3c0d12b8e6a551fc32322 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 29 May 2014 01:18:01 +0000 Subject: [PATCH 225/906] R600/SI: Fix pattern variable names. These are confusing enough since the order swaps, so give them more useful names. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209787 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 500fa7894c29..e64a1b548ea9 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -2139,8 +2139,8 @@ multiclass DSWritePat { >; def : Pat < - (frag vt:$src1, i32:$src0), - (inst 0, $src0, $src1, 0) + (frag vt:$val, i32:$ptr), + (inst 0, $ptr, $val, 0) >; } From 11ef9456a8062ba1db499f916661dca7c7dd6529 Mon Sep 17 00:00:00 2001 From: "Michael J. Spencer" Date: Thu, 29 May 2014 01:42:45 +0000 Subject: [PATCH 226/906] [x86] Fold extract_vector_elt of a load into the Load's address computation. An address only use of an extract element of a load can be simplified to a load. Without this the result of the extract element is spilled to the stack so that an address is available. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209788 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 214 +++++++++++++---------- test/CodeGen/X86/vec_splat.ll | 20 ++- 2 files changed, 143 insertions(+), 91 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c4089446f08d..f346dcfcddd9 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -169,6 +169,16 @@ namespace { bool CombineToPostIndexedLoadStore(SDNode *N); bool SliceUpLoad(SDNode *N); + /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed + /// load. + /// + /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. + /// \param InVecVT type of the input vector to EVE with bitcasts resolved. + /// \param EltNo index of the vector element to load. + /// \param OriginalLoad load that EVE came from to be replaced. + /// \returns EVE on success SDValue() on failure. + SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad( + SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad); void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); SDValue SExtPromoteOperand(SDValue Op, EVT PVT); @@ -9675,6 +9685,86 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } +SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( + SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { + EVT ResultVT = EVE->getValueType(0); + EVT VecEltVT = InVecVT.getVectorElementType(); + unsigned Align = OriginalLoad->getAlignment(); + unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment( + VecEltVT.getTypeForEVT(*DAG.getContext())); + + if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) + return SDValue(); + + Align = NewAlign; + + SDValue NewPtr = OriginalLoad->getBasePtr(); + SDValue Offset; + EVT PtrType = NewPtr.getValueType(); + MachinePointerInfo MPI; + if (auto *ConstEltNo = dyn_cast(EltNo)) { + int Elt = ConstEltNo->getZExtValue(); + unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; + if (TLI.isBigEndian()) + PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff; + Offset = DAG.getConstant(PtrOff, PtrType); + MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); + } else { + Offset = DAG.getNode( + ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo, + DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType())); + if (TLI.isBigEndian()) + Offset = DAG.getNode( + ISD::SUB, SDLoc(EVE), EltNo.getValueType(), + DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset); + MPI = OriginalLoad->getPointerInfo(); + } + NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset); + + // The replacement we need to do here is a little tricky: we need to + // replace an extractelement of a load with a load. + // Use ReplaceAllUsesOfValuesWith to do the replacement. + // Note that this replacement assumes that the extractvalue is the only + // use of the load; that's okay because we don't want to perform this + // transformation in other cases anyway. + SDValue Load; + SDValue Chain; + if (ResultVT.bitsGT(VecEltVT)) { + // If the result type of vextract is wider than the load, then issue an + // extending load instead. + ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, VecEltVT) + ? ISD::ZEXTLOAD + : ISD::EXTLOAD; + Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), + NewPtr, MPI, VecEltVT, OriginalLoad->isVolatile(), + OriginalLoad->isNonTemporal(), Align, + OriginalLoad->getTBAAInfo()); + Chain = Load.getValue(1); + } else { + Load = DAG.getLoad( + VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, + OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), + OriginalLoad->isInvariant(), Align, OriginalLoad->getTBAAInfo()); + Chain = Load.getValue(1); + if (ResultVT.bitsLT(VecEltVT)) + Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); + else + Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load); + } + WorkListRemover DeadNodes(*this); + SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; + SDValue To[] = { Load, Chain }; + DAG.ReplaceAllUsesOfValuesWith(From, To, 2); + // Since we're explicitly calling ReplaceAllUses, add the new node to the + // worklist explicitly as well. + AddToWorkList(Load.getNode()); + AddUsersToWorkList(Load.getNode()); // Add users too + // Make sure to revisit this node to clean it up; it will usually be dead. + AddToWorkList(EVE); + ++OpsNarrowed; + return SDValue(EVE, 0); +} + SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // (vextract (scalar_to_vector val, 0) -> val SDValue InVec = N->getOperand(0); @@ -9743,6 +9833,38 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } } + bool BCNumEltsChanged = false; + EVT ExtVT = VT.getVectorElementType(); + EVT LVT = ExtVT; + + // If the result of load has to be truncated, then it's not necessarily + // profitable. + if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) + return SDValue(); + + if (InVec.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + + EVT BCVT = InVec.getOperand(0).getValueType(); + if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) + return SDValue(); + if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) + BCNumEltsChanged = true; + InVec = InVec.getOperand(0); + ExtVT = BCVT.getVectorElementType(); + } + + // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size) + if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() && + ISD::isNormalLoad(InVec.getNode())) { + SDValue Index = N->getOperand(1); + if (LoadSDNode *OrigLoad = dyn_cast(InVec)) + return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, + OrigLoad); + } + // Perform only after legalization to ensure build_vector / vector_shuffle // optimizations have already been done. if (!LegalOperations) return SDValue(); @@ -9753,30 +9875,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (ConstEltNo) { int Elt = cast(EltNo)->getZExtValue(); - bool NewLoad = false; - bool BCNumEltsChanged = false; - EVT ExtVT = VT.getVectorElementType(); - EVT LVT = ExtVT; - - // If the result of load has to be truncated, then it's not necessarily - // profitable. - if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) - return SDValue(); - - if (InVec.getOpcode() == ISD::BITCAST) { - // Don't duplicate a load with other uses. - if (!InVec.hasOneUse()) - return SDValue(); - - EVT BCVT = InVec.getOperand(0).getValueType(); - if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) - return SDValue(); - if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) - BCNumEltsChanged = true; - InVec = InVec.getOperand(0); - ExtVT = BCVT.getVectorElementType(); - NewLoad = true; - } LoadSDNode *LN0 = nullptr; const ShuffleVectorSDNode *SVN = nullptr; @@ -9819,6 +9917,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast(InVec); Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; + EltNo = DAG.getConstant(Elt, EltNo.getValueType()); } } @@ -9831,72 +9930,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (Elt == -1) return DAG.getUNDEF(LVT); - unsigned Align = LN0->getAlignment(); - if (NewLoad) { - // Check the resultant load doesn't need a higher alignment than the - // original load. - unsigned NewAlign = - TLI.getDataLayout() - ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); - - if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT)) - return SDValue(); - - Align = NewAlign; - } - - SDValue NewPtr = LN0->getBasePtr(); - unsigned PtrOff = 0; - - if (Elt) { - PtrOff = LVT.getSizeInBits() * Elt / 8; - EVT PtrType = NewPtr.getValueType(); - if (TLI.isBigEndian()) - PtrOff = VT.getSizeInBits() / 8 - PtrOff; - NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr, - DAG.getConstant(PtrOff, PtrType)); - } - - // The replacement we need to do here is a little tricky: we need to - // replace an extractelement of a load with a load. - // Use ReplaceAllUsesOfValuesWith to do the replacement. - // Note that this replacement assumes that the extractvalue is the only - // use of the load; that's okay because we don't want to perform this - // transformation in other cases anyway. - SDValue Load; - SDValue Chain; - if (NVT.bitsGT(LVT)) { - // If the result type of vextract is wider than the load, then issue an - // extending load instead. - ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT) - ? ISD::ZEXTLOAD : ISD::EXTLOAD; - Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(), - NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), - LVT, LN0->isVolatile(), LN0->isNonTemporal(), - Align, LN0->getTBAAInfo()); - Chain = Load.getValue(1); - } else { - Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr, - LN0->getPointerInfo().getWithOffset(PtrOff), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), Align, LN0->getTBAAInfo()); - Chain = Load.getValue(1); - if (NVT.bitsLT(LVT)) - Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load); - else - Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load); - } - WorkListRemover DeadNodes(*this); - SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; - SDValue To[] = { Load, Chain }; - DAG.ReplaceAllUsesOfValuesWith(From, To, 2); - // Since we're explcitly calling ReplaceAllUses, add the new node to the - // worklist explicitly as well. - AddToWorkList(Load.getNode()); - AddUsersToWorkList(Load.getNode()); // Add users too - // Make sure to revisit this node to clean it up; it will usually be dead. - AddToWorkList(N); - return SDValue(N, 0); + return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0); } return SDValue(); diff --git a/test/CodeGen/X86/vec_splat.ll b/test/CodeGen/X86/vec_splat.ll index a02e3836078c..28f2a9074cb8 100644 --- a/test/CodeGen/X86/vec_splat.ll +++ b/test/CodeGen/X86/vec_splat.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse2 | FileCheck %s -check-prefix=SSE2 ; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse3 | FileCheck %s -check-prefix=SSE3 +; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s -check-prefix=AVX define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) nounwind { %tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0 ; <<4 x float>> [#uses=1] @@ -35,6 +36,23 @@ define void @test_v2sd(<2 x double>* %P, <2 x double>* %Q, double %X) nounwind { ; Fold extract of a load into the load's address computation. This avoids spilling to the stack. define <4 x float> @load_extract_splat(<4 x float>* nocapture readonly %ptr, i64 %i, i64 %j) nounwind { + %1 = getelementptr inbounds <4 x float>* %ptr, i64 %i + %2 = load <4 x float>* %1, align 16 + %3 = trunc i64 %j to i32 + %4 = extractelement <4 x float> %2, i32 %3 + %5 = insertelement <4 x float> undef, float %4, i32 0 + %6 = insertelement <4 x float> %5, float %4, i32 1 + %7 = insertelement <4 x float> %6, float %4, i32 2 + %8 = insertelement <4 x float> %7, float %4, i32 3 + ret <4 x float> %8 + +; AVX-LABEL: load_extract_splat +; AVX-NOT: rsp +; AVX: vbroadcastss +} + +; Fold extract of a load into the load's address computation. This avoids spilling to the stack. +define <4 x float> @load_extract_splat1(<4 x float>* nocapture readonly %ptr, i64 %i, i64 %j) nounwind { %1 = getelementptr inbounds <4 x float>* %ptr, i64 %i %2 = load <4 x float>* %1, align 16 %3 = extractelement <4 x float> %2, i64 %j @@ -44,7 +62,7 @@ define <4 x float> @load_extract_splat(<4 x float>* nocapture readonly %ptr, i64 %7 = insertelement <4 x float> %6, float %3, i32 3 ret <4 x float> %7 -; AVX-LABEL: load_extract_splat +; AVX-LABEL: load_extract_splat1 ; AVX-NOT: movs ; AVX: vbroadcastss } From 7728610d24763bd948fea59bfbce21e2085e6879 Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Thu, 29 May 2014 01:44:13 +0000 Subject: [PATCH 227/906] [ASan] Hoist blacklisting globals from init-order checking to Clang. Clang knows about the sanitizer blacklist and it makes no sense to add global to the list of llvm.asan.dynamically_initialized_globals if it will be blacklisted in the instrumentation pass anyway. Instead, we should do as much blacklisting as possible (if not all) in the frontend. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209790 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/AddressSanitizer.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index ae51179a3005..1730cff325a2 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -1069,8 +1069,6 @@ bool AddressSanitizerModule::runOnModule(Module &M) { // Determine whether this global should be poisoned in initialization. bool GlobalHasDynamicInitializer = DynamicallyInitializedGlobals.Contains(G); - // Don't check initialization order if this global is blacklisted. - GlobalHasDynamicInitializer &= !BL->isIn(*G, "init"); StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL); Constant *NewInitializer = ConstantStruct::get( From 8bfb46e790b8dbe3ccb7a79c530712fc3404fb50 Mon Sep 17 00:00:00 2001 From: "Michael J. Spencer" Date: Thu, 29 May 2014 01:55:07 +0000 Subject: [PATCH 228/906] Add LoadCombine pass. This pass is disabled by default. Use -combine-loads to enable in -O[1-3] Differential revision: http://reviews.llvm.org/D3580 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209791 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IRBuilder.h | 24 ++ include/llvm/InitializePasses.h | 1 + include/llvm/Transforms/Scalar.h | 7 + lib/Transforms/IPO/PassManagerBuilder.cpp | 11 + lib/Transforms/Scalar/CMakeLists.txt | 1 + lib/Transforms/Scalar/LoadCombine.cpp | 268 ++++++++++++++++++++ lib/Transforms/Scalar/Scalar.cpp | 1 + test/Transforms/LoadCombine/load-combine.ll | 190 ++++++++++++++ 8 files changed, 503 insertions(+) create mode 100644 lib/Transforms/Scalar/LoadCombine.cpp create mode 100644 test/Transforms/LoadCombine/load-combine.ll diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h index 580d33368337..cedb87cdb7cc 100644 --- a/include/llvm/IR/IRBuilder.h +++ b/include/llvm/IR/IRBuilder.h @@ -1464,6 +1464,30 @@ class IRBuilder : public IRBuilderBase, public Inserter { Value *Zeros = ConstantAggregateZero::get(VectorType::get(I32Ty, NumElts)); return CreateShuffleVector(V, Undef, Zeros, Name + ".splat"); } + + /// \brief Return a value that has been extracted from a larger integer type. + Value *CreateExtractInteger(const DataLayout &DL, Value *From, + IntegerType *ExtractedTy, uint64_t Offset, + const Twine &Name) { + IntegerType *IntTy = cast(From->getType()); + assert(DL.getTypeStoreSize(ExtractedTy) + Offset <= + DL.getTypeStoreSize(IntTy) && + "Element extends past full value"); + uint64_t ShAmt = 8 * Offset; + Value *V = From; + if (DL.isBigEndian()) + ShAmt = 8 * (DL.getTypeStoreSize(IntTy) - + DL.getTypeStoreSize(ExtractedTy) - Offset); + if (ShAmt) { + V = CreateLShr(V, ShAmt, Name + ".shift"); + } + assert(ExtractedTy->getBitWidth() <= IntTy->getBitWidth() && + "Cannot extract to a larger integer!"); + if (ExtractedTy != IntTy) { + V = CreateTrunc(V, ExtractedTy, Name + ".trunc"); + } + return V; + } }; // Create wrappers for C Binding types (see CBindingWrapping.h). diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index 8e536159db12..0466d11e35a8 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -272,6 +272,7 @@ void initializeSLPVectorizerPass(PassRegistry&); void initializeBBVectorizePass(PassRegistry&); void initializeMachineFunctionPrinterPassPass(PassRegistry&); void initializeStackMapLivenessPass(PassRegistry&); +void initializeLoadCombinePass(PassRegistry&); } #endif diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index cf1d65544075..8ecfd801d0d8 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -19,6 +19,7 @@ namespace llvm { +class BasicBlockPass; class FunctionPass; class Pass; class GetElementPtrInst; @@ -381,6 +382,12 @@ FunctionPass *createAddDiscriminatorsPass(); // FunctionPass *createSeparateConstOffsetFromGEPPass(); +//===----------------------------------------------------------------------===// +// +// LoadCombine - Combine loads into bigger loads. +// +BasicBlockPass *createLoadCombinePass(); + } // End llvm namespace #endif diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 38e1b8e16667..c20c717de5e7 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -53,6 +53,10 @@ static cl::opt RunLoopRerolling("reroll-loops", cl::Hidden, cl::desc("Run the loop rerolling pass")); +static cl::opt RunLoadCombine("combine-loads", cl::init(false), + cl::Hidden, + cl::desc("Run the load combining pass")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -65,6 +69,7 @@ PassManagerBuilder::PassManagerBuilder() { SLPVectorize = RunSLPVectorization; LoopVectorize = RunLoopVectorization; RerollLoops = RunLoopRerolling; + LoadCombine = RunLoadCombine; } PassManagerBuilder::~PassManagerBuilder() { @@ -236,6 +241,9 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createLoopUnrollPass()); } + if (LoadCombine) + MPM.add(createLoadCombinePass()); + MPM.add(createAggressiveDCEPass()); // Delete dead instructions MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createInstructionCombiningPass()); // Clean up after everything. @@ -352,6 +360,9 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, // More scalar chains could be vectorized due to more alias information PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + if (LoadCombine) + PM.add(createLoadCombinePass()); + // Cleanup and simplify the code after the scalar optimizations. PM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, PM); diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index 3ad1488d00a3..b2461fc627b2 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -12,6 +12,7 @@ add_llvm_library(LLVMScalarOpts IndVarSimplify.cpp JumpThreading.cpp LICM.cpp + LoadCombine.cpp LoopDeletion.cpp LoopIdiomRecognize.cpp LoopInstSimplify.cpp diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp new file mode 100644 index 000000000000..846aa703c9c3 --- /dev/null +++ b/lib/Transforms/Scalar/LoadCombine.cpp @@ -0,0 +1,268 @@ +//===- LoadCombine.cpp - Combine Adjacent Loads ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This transformation combines adjacent loads. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar.h" + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/TargetFolder.h" +#include "llvm/Pass.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "load-combine" + +STATISTIC(NumLoadsAnalyzed, "Number of loads analyzed for combining"); +STATISTIC(NumLoadsCombined, "Number of loads combined"); + +namespace { +struct PointerOffsetPair { + Value *Pointer; + uint64_t Offset; +}; + +struct LoadPOPPair { + LoadPOPPair(LoadInst *L, PointerOffsetPair P, unsigned O) + : Load(L), POP(P), InsertOrder(O) {} + LoadPOPPair() {} + LoadInst *Load; + PointerOffsetPair POP; + /// \brief The new load needs to be created before the first load in IR order. + unsigned InsertOrder; +}; + +class LoadCombine : public BasicBlockPass { + LLVMContext *C; + const DataLayout *DL; + +public: + LoadCombine() + : BasicBlockPass(ID), + C(nullptr), DL(nullptr) { + initializeSROAPass(*PassRegistry::getPassRegistry()); + } + bool doInitialization(Function &) override; + bool runOnBasicBlock(BasicBlock &BB) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + + const char *getPassName() const override { return "LoadCombine"; } + static char ID; + + typedef IRBuilder BuilderTy; + +private: + BuilderTy *Builder; + + PointerOffsetPair getPointerOffsetPair(LoadInst &); + bool combineLoads(DenseMap> &); + bool aggregateLoads(SmallVectorImpl &); + bool combineLoads(SmallVectorImpl &); +}; +} + +bool LoadCombine::doInitialization(Function &F) { + DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n"); + C = &F.getContext(); + DataLayoutPass *DLP = getAnalysisIfAvailable(); + if (!DLP) { + DEBUG(dbgs() << " Skipping LoadCombine -- no target data!\n"); + return false; + } + DL = &DLP->getDataLayout(); + return true; +} + +PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) { + PointerOffsetPair POP; + POP.Pointer = LI.getPointerOperand(); + POP.Offset = 0; + while (isa(POP.Pointer) || isa(POP.Pointer)) { + if (auto *GEP = dyn_cast(POP.Pointer)) { + unsigned BitWidth = DL->getPointerTypeSizeInBits(GEP->getType()); + APInt Offset(BitWidth, 0); + if (GEP->accumulateConstantOffset(*DL, Offset)) + POP.Offset += Offset.getZExtValue(); + else + // Can't handle GEPs with variable indices. + return POP; + POP.Pointer = GEP->getPointerOperand(); + } else if (auto *BC = dyn_cast(POP.Pointer)) + POP.Pointer = BC->getOperand(0); + } + return POP; +} + +bool LoadCombine::combineLoads( + DenseMap> &LoadMap) { + bool Combined = false; + for (auto &Loads : LoadMap) { + if (Loads.second.size() < 2) + continue; + std::sort(Loads.second.begin(), Loads.second.end(), + [](const LoadPOPPair &A, const LoadPOPPair &B) { + return A.POP.Offset < B.POP.Offset; + }); + if (aggregateLoads(Loads.second)) + Combined = true; + } + return Combined; +} + +/// \brief Try to aggregate loads from a sorted list of loads to be combined. +/// +/// It is guaranteed that no writes occur between any of the loads. All loads +/// have the same base pointer. There are at least two loads. +bool LoadCombine::aggregateLoads(SmallVectorImpl &Loads) { + assert(Loads.size() >= 2 && "Insufficient loads!"); + LoadInst *BaseLoad = nullptr; + SmallVector AggregateLoads; + bool Combined = false; + uint64_t PrevOffset = -1ull; + uint64_t PrevSize = 0; + for (auto &L : Loads) { + if (PrevOffset == -1ull) { + BaseLoad = L.Load; + PrevOffset = L.POP.Offset; + PrevSize = DL->getTypeStoreSize(L.Load->getType()); + AggregateLoads.push_back(L); + continue; + } + if (L.Load->getAlignment() > BaseLoad->getAlignment()) + continue; + if (L.POP.Offset > PrevOffset + PrevSize) { + // No other load will be combinable + if (combineLoads(AggregateLoads)) + Combined = true; + AggregateLoads.clear(); + PrevOffset = -1; + continue; + } + if (L.POP.Offset != PrevOffset + PrevSize) + // This load is offset less than the size of the last load. + // FIXME: We may want to handle this case. + continue; + PrevOffset = L.POP.Offset; + PrevSize = DL->getTypeStoreSize(L.Load->getType()); + AggregateLoads.push_back(L); + } + if (combineLoads(AggregateLoads)) + Combined = true; + return Combined; +} + +/// \brief Given a list of combinable load. Combine the maximum number of them. +bool LoadCombine::combineLoads(SmallVectorImpl &Loads) { + // Remove loads from the end while the size is not a power of 2. + unsigned TotalSize = 0; + for (const auto &L : Loads) + TotalSize += L.Load->getType()->getPrimitiveSizeInBits(); + while (TotalSize != 0 && !isPowerOf2_32(TotalSize)) + TotalSize -= Loads.pop_back_val().Load->getType()->getPrimitiveSizeInBits(); + if (Loads.size() < 2) + return false; + + DEBUG({ + dbgs() << "***** Combining Loads ******\n"; + for (const auto &L : Loads) { + dbgs() << L.POP.Offset << ": " << *L.Load << "\n"; + } + }); + + // Find first load. This is where we put the new load. + LoadPOPPair FirstLP; + FirstLP.InsertOrder = -1u; + for (const auto &L : Loads) + if (L.InsertOrder < FirstLP.InsertOrder) + FirstLP = L; + + unsigned AddressSpace = + FirstLP.POP.Pointer->getType()->getPointerAddressSpace(); + + Builder->SetInsertPoint(FirstLP.Load); + Value *Ptr = Builder->CreateConstGEP1_64( + Builder->CreatePointerCast(Loads[0].POP.Pointer, + Builder->getInt8PtrTy(AddressSpace)), + Loads[0].POP.Offset); + LoadInst *NewLoad = new LoadInst( + Builder->CreatePointerCast( + Ptr, PointerType::get(IntegerType::get(Ptr->getContext(), TotalSize), + Ptr->getType()->getPointerAddressSpace())), + Twine(Loads[0].Load->getName()) + ".combined", false, + Loads[0].Load->getAlignment(), FirstLP.Load); + + for (const auto &L : Loads) { + Builder->SetInsertPoint(L.Load); + Value *V = Builder->CreateExtractInteger( + *DL, NewLoad, cast(L.Load->getType()), + L.POP.Offset - Loads[0].POP.Offset, "combine.extract"); + L.Load->replaceAllUsesWith(V); + } + + NumLoadsCombined = NumLoadsCombined + Loads.size(); + return true; +} + +bool LoadCombine::runOnBasicBlock(BasicBlock &BB) { + if (skipOptnoneFunction(BB) || !DL) + return false; + + IRBuilder + TheBuilder(BB.getContext(), TargetFolder(DL)); + Builder = &TheBuilder; + + DenseMap> LoadMap; + + bool Combined = false; + unsigned Index = 0; + for (auto &I : BB) { + if (I.mayWriteToMemory() || I.mayThrow()) { + if (combineLoads(LoadMap)) + Combined = true; + LoadMap.clear(); + continue; + } + LoadInst *LI = dyn_cast(&I); + if (!LI) + continue; + ++NumLoadsAnalyzed; + if (!LI->isSimple() || !LI->getType()->isIntegerTy()) + continue; + auto POP = getPointerOffsetPair(*LI); + if (!POP.Pointer) + continue; + LoadMap[POP.Pointer].push_back(LoadPOPPair(LI, POP, Index++)); + } + if (combineLoads(LoadMap)) + Combined = true; + return Combined; +} + +void LoadCombine::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); +} + +char LoadCombine::ID = 0; + +BasicBlockPass *llvm::createLoadCombinePass() { + return new LoadCombine(); +} + +INITIALIZE_PASS(LoadCombine, "load-combine", "Combine Adjacent Loads", false, + false) diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index f8f828c84057..edf012d81171 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -65,6 +65,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeSinkingPass(Registry); initializeTailCallElimPass(Registry); initializeSeparateConstOffsetFromGEPPass(Registry); + initializeLoadCombinePass(Registry); } void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) { diff --git a/test/Transforms/LoadCombine/load-combine.ll b/test/Transforms/LoadCombine/load-combine.ll new file mode 100644 index 000000000000..c4d9241764d9 --- /dev/null +++ b/test/Transforms/LoadCombine/load-combine.ll @@ -0,0 +1,190 @@ +; RUN: opt < %s -load-combine -instcombine -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Combine read from char* idiom. +define i64 @LoadU64_x64_0(i64* %pData) { + %1 = bitcast i64* %pData to i8* + %2 = load i8* %1, align 1 + %3 = zext i8 %2 to i64 + %4 = shl nuw i64 %3, 56 + %5 = getelementptr inbounds i8* %1, i64 1 + %6 = load i8* %5, align 1 + %7 = zext i8 %6 to i64 + %8 = shl nuw nsw i64 %7, 48 + %9 = or i64 %8, %4 + %10 = getelementptr inbounds i8* %1, i64 2 + %11 = load i8* %10, align 1 + %12 = zext i8 %11 to i64 + %13 = shl nuw nsw i64 %12, 40 + %14 = or i64 %9, %13 + %15 = getelementptr inbounds i8* %1, i64 3 + %16 = load i8* %15, align 1 + %17 = zext i8 %16 to i64 + %18 = shl nuw nsw i64 %17, 32 + %19 = or i64 %14, %18 + %20 = getelementptr inbounds i8* %1, i64 4 + %21 = load i8* %20, align 1 + %22 = zext i8 %21 to i64 + %23 = shl nuw nsw i64 %22, 24 + %24 = or i64 %19, %23 + %25 = getelementptr inbounds i8* %1, i64 5 + %26 = load i8* %25, align 1 + %27 = zext i8 %26 to i64 + %28 = shl nuw nsw i64 %27, 16 + %29 = or i64 %24, %28 + %30 = getelementptr inbounds i8* %1, i64 6 + %31 = load i8* %30, align 1 + %32 = zext i8 %31 to i64 + %33 = shl nuw nsw i64 %32, 8 + %34 = or i64 %29, %33 + %35 = getelementptr inbounds i8* %1, i64 7 + %36 = load i8* %35, align 1 + %37 = zext i8 %36 to i64 + %38 = or i64 %34, %37 + ret i64 %38 +; CHECK-LABEL: @LoadU64_x64_0( +; CHECK: load i64* %{{.*}}, align 1 +; CHECK-NOT: load +} + +; Combine simple adjacent loads. +define i32 @"2xi16_i32"(i16* %x) { + %1 = load i16* %x, align 2 + %2 = getelementptr inbounds i16* %x, i64 1 + %3 = load i16* %2, align 2 + %4 = zext i16 %3 to i32 + %5 = shl nuw i32 %4, 16 + %6 = zext i16 %1 to i32 + %7 = or i32 %5, %6 + ret i32 %7 +; CHECK-LABEL: @"2xi16_i32"( +; CHECK: load i32* %{{.*}}, align 2 +; CHECK-NOT: load +} + +; Don't combine loads across stores. +define i32 @"2xi16_i32_store"(i16* %x, i16* %y) { + %1 = load i16* %x, align 2 + store i16 0, i16* %y, align 2 + %2 = getelementptr inbounds i16* %x, i64 1 + %3 = load i16* %2, align 2 + %4 = zext i16 %3 to i32 + %5 = shl nuw i32 %4, 16 + %6 = zext i16 %1 to i32 + %7 = or i32 %5, %6 + ret i32 %7 +; CHECK-LABEL: @"2xi16_i32_store"( +; CHECK: load i16* %{{.*}}, align 2 +; CHECK: store +; CHECK: load i16* %{{.*}}, align 2 +} + +; Don't combine loads with a gap. +define i32 @"2xi16_i32_gap"(i16* %x) { + %1 = load i16* %x, align 2 + %2 = getelementptr inbounds i16* %x, i64 2 + %3 = load i16* %2, align 2 + %4 = zext i16 %3 to i32 + %5 = shl nuw i32 %4, 16 + %6 = zext i16 %1 to i32 + %7 = or i32 %5, %6 + ret i32 %7 +; CHECK-LABEL: @"2xi16_i32_gap"( +; CHECK: load i16* %{{.*}}, align 2 +; CHECK: load i16* %{{.*}}, align 2 +} + +; Combine out of order loads. +define i32 @"2xi16_i32_order"(i16* %x) { + %1 = getelementptr inbounds i16* %x, i64 1 + %2 = load i16* %1, align 2 + %3 = zext i16 %2 to i32 + %4 = load i16* %x, align 2 + %5 = shl nuw i32 %3, 16 + %6 = zext i16 %4 to i32 + %7 = or i32 %5, %6 + ret i32 %7 +; CHECK-LABEL: @"2xi16_i32_order"( +; CHECK: load i32* %{{.*}}, align 2 +; CHECK-NOT: load +} + +; Overlapping loads. +define i32 @"2xi16_i32_overlap"(i8* %x) { + %1 = bitcast i8* %x to i16* + %2 = load i16* %1, align 2 + %3 = getelementptr inbounds i8* %x, i64 1 + %4 = bitcast i8* %3 to i16* + %5 = load i16* %4, align 2 + %6 = zext i16 %5 to i32 + %7 = shl nuw i32 %6, 16 + %8 = zext i16 %2 to i32 + %9 = or i32 %7, %8 + ret i32 %9 +; CHECK-LABEL: @"2xi16_i32_overlap"( +; CHECK: load i16* %{{.*}}, align 2 +; CHECK: load i16* %{{.*}}, align 2 +} + +; Combine valid alignments. +define i64 @"2xi16_i64_align"(i8* %x) { + %1 = bitcast i8* %x to i32* + %2 = load i32* %1, align 4 + %3 = getelementptr inbounds i8* %x, i64 4 + %4 = bitcast i8* %3 to i16* + %5 = load i16* %4, align 2 + %6 = getelementptr inbounds i8* %x, i64 6 + %7 = bitcast i8* %6 to i16* + %8 = load i16* %7, align 2 + %9 = zext i16 %8 to i64 + %10 = shl nuw i64 %9, 48 + %11 = zext i16 %5 to i64 + %12 = shl nuw nsw i64 %11, 32 + %13 = zext i32 %2 to i64 + %14 = or i64 %12, %13 + %15 = or i64 %14, %10 + ret i64 %15 +; CHECK-LABEL: @"2xi16_i64_align"( +; CHECK: load i64* %{{.*}}, align 4 +} + +; Non power of two. +define i64 @"2xi16_i64_npo2"(i8* %x) { + %1 = load i8* %x, align 1 + %2 = zext i8 %1 to i64 + %3 = getelementptr inbounds i8* %x, i64 1 + %4 = load i8* %3, align 1 + %5 = zext i8 %4 to i64 + %6 = shl nuw nsw i64 %5, 8 + %7 = or i64 %6, %2 + %8 = getelementptr inbounds i8* %x, i64 2 + %9 = load i8* %8, align 1 + %10 = zext i8 %9 to i64 + %11 = shl nuw nsw i64 %10, 16 + %12 = or i64 %11, %7 + %13 = getelementptr inbounds i8* %x, i64 3 + %14 = load i8* %13, align 1 + %15 = zext i8 %14 to i64 + %16 = shl nuw nsw i64 %15, 24 + %17 = or i64 %16, %12 + %18 = getelementptr inbounds i8* %x, i64 4 + %19 = load i8* %18, align 1 + %20 = zext i8 %19 to i64 + %21 = shl nuw nsw i64 %20, 32 + %22 = or i64 %21, %17 + %23 = getelementptr inbounds i8* %x, i64 5 + %24 = load i8* %23, align 1 + %25 = zext i8 %24 to i64 + %26 = shl nuw nsw i64 %25, 40 + %27 = or i64 %26, %22 + %28 = getelementptr inbounds i8* %x, i64 6 + %29 = load i8* %28, align 1 + %30 = zext i8 %29 to i64 + %31 = shl nuw nsw i64 %30, 48 + %32 = or i64 %31, %27 + ret i64 %32 +; CHECK-LABEL: @"2xi16_i64_npo2"( +; CHECK: load i32* %{{.*}}, align 1 +} From ed0e90576d3e0b4796aa7825320cf4d41745f81e Mon Sep 17 00:00:00 2001 From: "Michael J. Spencer" Date: Thu, 29 May 2014 02:05:37 +0000 Subject: [PATCH 229/906] [LoadCombine] Missed a file. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209792 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Transforms/IPO/PassManagerBuilder.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/llvm/Transforms/IPO/PassManagerBuilder.h b/include/llvm/Transforms/IPO/PassManagerBuilder.h index 023de0863e3b..50877d013702 100644 --- a/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -117,6 +117,7 @@ class PassManagerBuilder { bool SLPVectorize; bool LoopVectorize; bool RerollLoops; + bool LoadCombine; private: /// ExtensionList - This is list of all of the extensions that are registered. From 2a747bf1c566cbe9739f2a687bc093fdd5cfeda0 Mon Sep 17 00:00:00 2001 From: Dinesh Dwivedi Date: Thu, 29 May 2014 06:47:23 +0000 Subject: [PATCH 230/906] LCSSA should be performed on the outermost affected loop while unrolling loop. During loop-unroll, loop exits from the current loop may end up in in different outer loop. This requires to re-form LCSSA recursively for one level down from the outer most loop where loop exits are landed during unroll. This fixes PR18861. Differential Revision: http://reviews.llvm.org/D2976 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209796 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/LoopUnroll.cpp | 9 ++++++ test/Transforms/LoopUnroll/pr18861.ll | 43 +++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 test/Transforms/LoopUnroll/pr18861.ll diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index d953e3073109..16975b9e6374 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -487,6 +487,15 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, if (OuterL) { ScalarEvolution *SE = PP->getAnalysisIfAvailable(); simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE); + + // LCSSA must be performed on the outermost affected loop. The unrolled + // loop's last loop latch is guaranteed to be in the outermost loop after + // deleteLoopFromQueue updates LoopInfo. + Loop *LatchLoop = LI->getLoopFor(Latches.back()); + if (!OuterL->contains(LatchLoop)) + while (OuterL->getParentLoop() != LatchLoop) + OuterL = OuterL->getParentLoop(); + formLCSSARecursively(*OuterL, *DT, SE); } } diff --git a/test/Transforms/LoopUnroll/pr18861.ll b/test/Transforms/LoopUnroll/pr18861.ll new file mode 100644 index 000000000000..62f26106afb2 --- /dev/null +++ b/test/Transforms/LoopUnroll/pr18861.ll @@ -0,0 +1,43 @@ +; RUN: opt < %s -loop-unroll -indvars -disable-output + +@b = external global i32, align 4 + +; Function Attrs: nounwind uwtable +define void @fn1() #0 { +entry: + br label %for.cond1thread-pre-split + +for.cond1thread-pre-split: ; preds = %for.inc8, %entry + %storemerge1 = phi i32 [ 0, %entry ], [ %inc9, %for.inc8 ] + br i1 undef, label %for.inc8, label %for.cond2.preheader.lr.ph + +for.cond2.preheader.lr.ph: ; preds = %for.cond1thread-pre-split + br label %for.cond2.preheader + +for.cond2.preheader: ; preds = %for.inc5, %for.cond2.preheader.lr.ph + br label %for.cond2 + +for.cond2: ; preds = %for.body3, %for.cond2.preheader + %storemerge = phi i32 [ %add, %for.body3 ], [ 0, %for.cond2.preheader ] + %cmp = icmp slt i32 %storemerge, 1 + br i1 %cmp, label %for.body3, label %for.inc5 + +for.body3: ; preds = %for.cond2 + %tobool4 = icmp eq i32 %storemerge, 0 + %add = add nsw i32 %storemerge, 1 + br i1 %tobool4, label %for.cond2, label %if.then + +if.then: ; preds = %for.body3 + store i32 %storemerge1, i32* @b, align 4 + ret void + +for.inc5: ; preds = %for.cond2 + br i1 undef, label %for.cond1.for.inc8_crit_edge, label %for.cond2.preheader + +for.cond1.for.inc8_crit_edge: ; preds = %for.inc5 + br label %for.inc8 + +for.inc8: ; preds = %for.cond1.for.inc8_crit_edge, %for.cond1thread-pre-split + %inc9 = add nsw i32 %storemerge1, 1 + br label %for.cond1thread-pre-split +} From bb7f18abf8b3c526a8f82fcc1d8caed419f907fc Mon Sep 17 00:00:00 2001 From: Hao Liu Date: Thu, 29 May 2014 09:19:07 +0000 Subject: [PATCH 231/906] Fix an assertion failure caused by v1i64 in DAGCombiner Shrink. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209798 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 4 +++ lib/Target/AArch64/AArch64ISelLowering.cpp | 25 +++++++------------ .../AArch64/2014-05-16-shrink-v1i64.ll | 14 +++++++++++ 3 files changed, 27 insertions(+), 16 deletions(-) create mode 100644 test/CodeGen/AArch64/2014-05-16-shrink-v1i64.ll diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index b75d80541ea2..3731aeae3748 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -327,6 +327,10 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, assert(Op.getNode()->getNumValues() == 1 && "ShrinkDemandedOp only supports nodes with one result!"); + // Early return, as this function cannot handle vector types. + if (Op.getValueType().isVector()) + return false; + // Don't do this if the node has another user, which may require the // full value. if (!Op.getNode()->hasOneUse()) diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 80d6669cbf3d..f77a21ac9a21 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6047,18 +6047,14 @@ bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); - if (NumBits1 <= NumBits2) - return false; - return true; + return NumBits1 > NumBits2; } bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { - if (!VT1.isInteger() || !VT2.isInteger()) + if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger()) return false; unsigned NumBits1 = VT1.getSizeInBits(); unsigned NumBits2 = VT2.getSizeInBits(); - if (NumBits1 <= NumBits2) - return false; - return true; + return NumBits1 > NumBits2; } // All 32-bit GPR operations implicitly zero the high-half of the corresponding @@ -6068,18 +6064,14 @@ bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); - if (NumBits1 == 32 && NumBits2 == 64) - return true; - return false; + return NumBits1 == 32 && NumBits2 == 64; } bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { - if (!VT1.isInteger() || !VT2.isInteger()) + if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger()) return false; unsigned NumBits1 = VT1.getSizeInBits(); unsigned NumBits2 = VT2.getSizeInBits(); - if (NumBits1 == 32 && NumBits2 == 64) - return true; - return false; + return NumBits1 == 32 && NumBits2 == 64; } bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { @@ -6092,8 +6084,9 @@ bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { return false; // 8-, 16-, and 32-bit integer loads all implicitly zero-extend. - return (VT1.isSimple() && VT1.isInteger() && VT2.isSimple() && - VT2.isInteger() && VT1.getSizeInBits() <= 32); + return (VT1.isSimple() && !VT1.isVector() && VT1.isInteger() && + VT2.isSimple() && !VT2.isVector() && VT2.isInteger() && + VT1.getSizeInBits() <= 32); } bool AArch64TargetLowering::hasPairedLoad(Type *LoadedType, diff --git a/test/CodeGen/AArch64/2014-05-16-shrink-v1i64.ll b/test/CodeGen/AArch64/2014-05-16-shrink-v1i64.ll new file mode 100644 index 000000000000..f31a5702761c --- /dev/null +++ b/test/CodeGen/AArch64/2014-05-16-shrink-v1i64.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=arm64 < %s + +; The DAGCombiner tries to do following shrink: +; Convert x+y to (VT)((SmallVT)x+(SmallVT)y) +; But currently it can't handle vector type and will trigger an assertion failure +; when it tries to generate an add mixed using vector type and scaler type. +; This test checks that such assertion failur should not happen. +define <1 x i64> @dotest(<1 x i64> %in0) { +entry: + %0 = add <1 x i64> %in0, %in0 + %vshl_n = shl <1 x i64> %0, + %vsra_n = ashr <1 x i64> %vshl_n, + ret <1 x i64> %vsra_n +} From 086a708135db02636c15e8cecb53ba757b487c18 Mon Sep 17 00:00:00 2001 From: Hao Liu Date: Thu, 29 May 2014 09:21:23 +0000 Subject: [PATCH 232/906] Rename a test case to contain correct date info. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209799 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../{2014-05-16-shrink-v1i64.ll => 2014-05-29-shrink-v1i64.ll} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test/CodeGen/AArch64/{2014-05-16-shrink-v1i64.ll => 2014-05-29-shrink-v1i64.ll} (100%) diff --git a/test/CodeGen/AArch64/2014-05-16-shrink-v1i64.ll b/test/CodeGen/AArch64/2014-05-29-shrink-v1i64.ll similarity index 100% rename from test/CodeGen/AArch64/2014-05-16-shrink-v1i64.ll rename to test/CodeGen/AArch64/2014-05-29-shrink-v1i64.ll From ce538c783448d6b8d848212286b5a4e500bc9956 Mon Sep 17 00:00:00 2001 From: Simon Atanasyan Date: Thu, 29 May 2014 11:05:31 +0000 Subject: [PATCH 233/906] [elf2yaml][ELF] Move Info field to the RelocationSection structure. This field represents ELF section header sh_info field and does not have any sense for regular sections. Its interpretation depends on section type. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209801 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Object/ELFYAML.h | 2 +- lib/Object/ELFYAML.cpp | 2 +- test/Object/obj2yaml.test | 6 +++--- tools/obj2yaml/elf2yaml.cpp | 31 ++++++++++++++++++++++--------- 4 files changed, 27 insertions(+), 14 deletions(-) diff --git a/include/llvm/Object/ELFYAML.h b/include/llvm/Object/ELFYAML.h index 524e55b07e1c..699a38671bb6 100644 --- a/include/llvm/Object/ELFYAML.h +++ b/include/llvm/Object/ELFYAML.h @@ -76,7 +76,6 @@ struct Section { ELF_SHF Flags; llvm::yaml::Hex64 Address; StringRef Link; - StringRef Info; llvm::yaml::Hex64 AddressAlign; Section(SectionKind Kind) : Kind(Kind) {} virtual ~Section(); @@ -96,6 +95,7 @@ struct Relocation { StringRef Symbol; }; struct RelocationSection : Section { + StringRef Info; std::vector Relocations; RelocationSection() : Section(SectionKind::Relocation) {} static bool classof(const Section *S) { diff --git a/lib/Object/ELFYAML.cpp b/lib/Object/ELFYAML.cpp index 7d50f23417b2..4c52f9c0be07 100644 --- a/lib/Object/ELFYAML.cpp +++ b/lib/Object/ELFYAML.cpp @@ -664,7 +664,6 @@ static void commonSectionMapping(IO &IO, ELFYAML::Section &Section) { IO.mapOptional("Flags", Section.Flags, ELFYAML::ELF_SHF(0)); IO.mapOptional("Address", Section.Address, Hex64(0)); IO.mapOptional("Link", Section.Link, StringRef()); - IO.mapOptional("Info", Section.Info, StringRef()); IO.mapOptional("AddressAlign", Section.AddressAlign, Hex64(0)); } @@ -676,6 +675,7 @@ static void sectionMapping(IO &IO, ELFYAML::RawContentSection &Section) { static void sectionMapping(IO &IO, ELFYAML::RelocationSection &Section) { commonSectionMapping(IO, Section); + IO.mapOptional("Info", Section.Info, StringRef()); IO.mapOptional("Relocations", Section.Relocations); } diff --git a/test/Object/obj2yaml.test b/test/Object/obj2yaml.test index 1c1526349fd5..98b40d5cdab8 100644 --- a/test/Object/obj2yaml.test +++ b/test/Object/obj2yaml.test @@ -201,8 +201,8 @@ ELF-MIPSEL-NEXT: Content: 0000023C00004224E8FFBD271400BFAF1000B0AF21 ELF-MIPSEL-NEXT: - Name: .rel.text ELF-MIPSEL-NEXT: Type: SHT_REL ELF-MIPSEL-NEXT: Link: .symtab -ELF-MIPSEL-NEXT: Info: .text ELF-MIPSEL-NEXT: AddressAlign: 0x0000000000000004 +ELF-MIPSEL-NEXT: Info: .text ELF-MIPSEL-NEXT: Relocations: ELF-MIPSEL-NEXT: - Offset: 0 ELF-MIPSEL-NEXT: Symbol: _gp_disp @@ -300,8 +300,8 @@ ELF-MIPS64EL-NEXT: Content: '00000000000000000000000000000000' ELF-MIPS64EL-NEXT: - Name: .rela.data ELF-MIPS64EL-NEXT: Type: SHT_RELA ELF-MIPS64EL-NEXT: Link: .symtab -ELF-MIPS64EL-NEXT: Info: .data ELF-MIPS64EL-NEXT: AddressAlign: 0x0000000000000008 +ELF-MIPS64EL-NEXT: Info: .data ELF-MIPS64EL-NEXT: Relocations: ELF-MIPS64EL-NEXT: - Offset: 0 ELF-MIPS64EL-NEXT: Symbol: zed @@ -370,8 +370,8 @@ ELF-X86-64-NEXT: - Name: .rela.text ELF-X86-64-NEXT: Type: SHT_RELA ELF-X86-64-NEXT: Address: 0x0000000000000038 ELF-X86-64-NEXT: Link: .symtab -ELF-X86-64-NEXT: Info: .text ELF-X86-64-NEXT: AddressAlign: 0x0000000000000008 +ELF-X86-64-NEXT: Info: .text ELF-X86-64-NEXT: Relocations: ELF-X86-64-NEXT: - Offset: 0x000000000000000D ELF-X86-64-NEXT: Symbol: .rodata.str1.1 diff --git a/tools/obj2yaml/elf2yaml.cpp b/tools/obj2yaml/elf2yaml.cpp index 7642921b4855..5d19f9c7e6a1 100644 --- a/tools/obj2yaml/elf2yaml.cpp +++ b/tools/obj2yaml/elf2yaml.cpp @@ -28,6 +28,8 @@ class ELFDumper { error_code dumpSymbol(Elf_Sym_Iter Sym, ELFYAML::Symbol &S); error_code dumpCommonSection(const Elf_Shdr *Shdr, ELFYAML::Section &S); + error_code dumpCommonRelocationSection(const Elf_Shdr *Shdr, + ELFYAML::RelocationSection &S); template error_code dumpRelocation(const Elf_Shdr *Shdr, const RelT *Rel, ELFYAML::Relocation &R); @@ -84,6 +86,7 @@ ErrorOr ELFDumper::dump() { Y->Sections.push_back(std::unique_ptr(S.get())); break; } + // FIXME: Support SHT_GROUP section format. default: { ErrorOr S = dumpContentSection(&Sec); if (error_code EC = S.getError()) @@ -190,14 +193,24 @@ error_code ELFDumper::dumpCommonSection(const Elf_Shdr *Shdr, S.Link = NameOrErr.get(); } } - if (Shdr->sh_info != ELF::SHN_UNDEF) { - if (const Elf_Shdr *InfoSection = Obj.getSection(Shdr->sh_info)) { - NameOrErr = Obj.getSectionName(InfoSection); - if (error_code EC = NameOrErr.getError()) - return EC; - S.Info = NameOrErr.get(); - } + + return obj2yaml_error::success; +} + +template +error_code +ELFDumper::dumpCommonRelocationSection(const Elf_Shdr *Shdr, + ELFYAML::RelocationSection &S) { + if (error_code EC = dumpCommonSection(Shdr, S)) + return EC; + + if (const Elf_Shdr *InfoSection = Obj.getSection(Shdr->sh_info)) { + ErrorOr NameOrErr = Obj.getSectionName(InfoSection); + if (error_code EC = NameOrErr.getError()) + return EC; + S.Info = NameOrErr.get(); } + return obj2yaml_error::success; } @@ -207,7 +220,7 @@ ELFDumper::dumpRelSection(const Elf_Shdr *Shdr) { assert(Shdr->sh_type == ELF::SHT_REL && "Section type is not SHT_REL"); auto S = make_unique(); - if (error_code EC = dumpCommonSection(Shdr, *S)) + if (error_code EC = dumpCommonRelocationSection(Shdr, *S)) return EC; for (auto RI = Obj.begin_rel(Shdr), RE = Obj.end_rel(Shdr); RI != RE; @@ -227,7 +240,7 @@ ELFDumper::dumpRelaSection(const Elf_Shdr *Shdr) { assert(Shdr->sh_type == ELF::SHT_RELA && "Section type is not SHT_RELA"); auto S = make_unique(); - if (error_code EC = dumpCommonSection(Shdr, *S)) + if (error_code EC = dumpCommonRelocationSection(Shdr, *S)) return EC; for (auto RI = Obj.begin_rela(Shdr), RE = Obj.end_rela(Shdr); RI != RE; From 684122e84ed10cde3e462ea073345f2d1b165e07 Mon Sep 17 00:00:00 2001 From: Artyom Skrobov Date: Thu, 29 May 2014 11:26:15 +0000 Subject: [PATCH 234/906] Add missing check when MatchInstructionImpl() reports failure git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209802 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 2 ++ test/MC/AArch64/basic-a64-diagnostics.s | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 65b77c547dc9..33371189644a 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -3794,6 +3794,8 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_InvalidLabel: case Match_MSR: case Match_MRS: { + if (ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); // Any time we get here, there's nothing fancy to do. Just get the // operand SMLoc and display the diagnostic. SMLoc ErrorLoc = ((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(); diff --git a/test/MC/AArch64/basic-a64-diagnostics.s b/test/MC/AArch64/basic-a64-diagnostics.s index a4a3b1379c9b..118df368fd1d 100644 --- a/test/MC/AArch64/basic-a64-diagnostics.s +++ b/test/MC/AArch64/basic-a64-diagnostics.s @@ -395,6 +395,7 @@ cmn w11, w12, lsr #-1 cmn w11, w12, lsr #32 cmn w19, wzr, asr #-1 + cmn wsp, w0 cmn wzr, wzr, asr #32 cmn x9, x10, lsl #-1 cmn x9, x10, lsl #64 @@ -417,6 +418,9 @@ // CHECK-ERROR-NEXT: error: expected integer shift amount // CHECK-ERROR-NEXT: cmn w19, wzr, asr #-1 // CHECK-ERROR-NEXT: ^ +// CHECK-ERROR-NEXT: error: too few operands for instruction +// CHECK-ERROR-NEXT: cmn wsp, w0 +// CHECK-ERROR-NEXT: ^ // CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31] // CHECK-ERROR-NEXT: cmn wzr, wzr, asr #32 // CHECK-ERROR-NEXT: ^ From 897fd5f0edd15ddfc569d3f4ff2a4bcbbb644af2 Mon Sep 17 00:00:00 2001 From: Artyom Skrobov Date: Thu, 29 May 2014 11:34:50 +0000 Subject: [PATCH 235/906] Restore getInvertedCondCode() from the phased-out backend, fixing disassembly for NV git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209803 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/Utils/AArch64BaseInfo.h | 20 +++---------------- .../AArch64/basic-a64-instructions.txt | 6 ++++-- 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 9e4c389cc2e9..9d2ce21c9626 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -233,23 +233,9 @@ inline static const char *getCondCodeName(CondCode Code) { } inline static CondCode getInvertedCondCode(CondCode Code) { - switch (Code) { - default: llvm_unreachable("Unknown condition code"); - case EQ: return NE; - case NE: return EQ; - case HS: return LO; - case LO: return HS; - case MI: return PL; - case PL: return MI; - case VS: return VC; - case VC: return VS; - case HI: return LS; - case LS: return HI; - case GE: return LT; - case LT: return GE; - case GT: return LE; - case LE: return GT; - } + // To reverse a condition it's necessary to only invert the low bit: + + return static_cast(static_cast(Code) ^ 0x1); } /// Given a condition code, return NZCV flags that would satisfy that condition. diff --git a/test/MC/Disassembler/AArch64/basic-a64-instructions.txt b/test/MC/Disassembler/AArch64/basic-a64-instructions.txt index 397a39eb6cf0..70c45c851330 100644 --- a/test/MC/Disassembler/AArch64/basic-a64-instructions.txt +++ b/test/MC/Disassembler/AArch64/basic-a64-instructions.txt @@ -965,16 +965,18 @@ # CHECK: cinv w3, w5, gt # CHECK: cinv wzr, w4, le -# CHECK: csetm w9, lt +# CHECK: csetm w9, lt # CHECK: cinv x3, x5, gt # CHECK: cinv xzr, x4, le -# CHECK: csetm x9, lt +# CHECK: csetm x9, lt +# CHECK: cinv x0, x0, nv 0xa3 0xd0 0x85 0x5a 0x9f 0xc0 0x84 0x5a 0xe9 0xa3 0x9f 0x5a 0xa3 0xd0 0x85 0xda 0x9f 0xc0 0x84 0xda 0xe9 0xa3 0x9f 0xda +0x00 0xe0 0x80 0xda # CHECK: cneg w3, w5, gt # CHECK: cneg wzr, w4, le From 843ac4747a6e02884a93e04a5463de340d044a0f Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 29 May 2014 14:39:16 +0000 Subject: [PATCH 236/906] Revert "Revert "Revert "InstCombine: Improvement to check if signed addition overflows.""" This reverts commit r209776. It was miscompiling llvm::SelectionDAGISel::MorphNode. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209817 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineAddSub.cpp | 50 ++--------------- test/Transforms/InstCombine/AddOverflow.ll | 56 ------------------- 2 files changed, 6 insertions(+), 100 deletions(-) delete mode 100644 test/Transforms/InstCombine/AddOverflow.ll diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index eca4e4a78702..c37a9cf2ef9f 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -889,34 +889,11 @@ static inline Value *dyn_castFoldableMul(Value *V, Constant *&CST) { return nullptr; } -// If one of the operands only has one non-zero bit, and if the other -// operand has a known-zero bit in a more significant place than it (not -// including the sign bit) the ripple may go up to and fill the zero, but -// won't change the sign. For example, (X & ~4) + 1. -// FIXME: Handle case where LHS has a zero before the 1 in the RHS, but also -// has one after. -static bool CheckRippleForAdd(APInt Op0KnownZero, APInt Op0KnownOne, - APInt Op1KnownZero, APInt Op1KnownOne) { - // Make sure that one of the operand has only one bit set to 1 and all other - // bit set to 0. - if ((~Op1KnownZero).countPopulation() == 1) { - int BitWidth = Op0KnownZero.getBitWidth(); - // Ignore Sign Bit. - Op0KnownZero.clearBit(BitWidth - 1); - int Op1OnePosition = BitWidth - Op1KnownOne.countLeadingZeros() - 1; - int Op0ZeroPosition = BitWidth - Op0KnownZero.countLeadingZeros() - 1; - if ((Op0ZeroPosition != (BitWidth - 1)) && - (Op0ZeroPosition >= Op1OnePosition)) - return true; - } - return false; -} /// WillNotOverflowSignedAdd - Return true if we can prove that: /// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS)) /// This basically requires proving that the add in the original type would not /// overflow to change the sign bit or have a carry out. -/// TODO: Handle this for Vectors. bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { // There are different heuristics we can use for this. Here are some simple // ones. @@ -928,29 +905,14 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1) return true; - if (IntegerType *IT = dyn_cast(LHS->getType())) { - int BitWidth = IT->getBitWidth(); - APInt LHSKnownZero(BitWidth, 0, /*isSigned*/ true); - APInt LHSKnownOne(BitWidth, 0, /*isSigned*/ true); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); + // If one of the operands only has one non-zero bit, and if the other operand + // has a known-zero bit in a more significant place than it (not including the + // sign bit) the ripple may go up to and fill the zero, but won't change the + // sign. For example, (X & ~4) + 1. + + // TODO: Implement. - APInt RHSKnownZero(BitWidth, 0, /*isSigned*/ true); - APInt RHSKnownOne(BitWidth, 0, /*isSigned*/ true); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); - - // Addition of two 2's compliment numbers having opposite signs will never - // overflow. - if ((LHSKnownOne[BitWidth - 1] && RHSKnownZero[BitWidth - 1]) || - (LHSKnownZero[BitWidth - 1] && RHSKnownOne[BitWidth - 1])) - return true; - - // Check if carry bit of addition will not cause overflow. - if (CheckRippleForAdd(LHSKnownZero, LHSKnownOne, RHSKnownZero, RHSKnownOne)) - return true; - if (CheckRippleForAdd(RHSKnownZero, RHSKnownOne, LHSKnownZero, LHSKnownOne)) - return true; - } return false; } diff --git a/test/Transforms/InstCombine/AddOverflow.ll b/test/Transforms/InstCombine/AddOverflow.ll deleted file mode 100644 index 1bbd1fc59a5d..000000000000 --- a/test/Transforms/InstCombine/AddOverflow.ll +++ /dev/null @@ -1,56 +0,0 @@ -; RUN: opt < %s -instcombine -S | FileCheck %s - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; CHECK-LABEL: @ripple( -; CHECK: add nsw i16 %tmp1, 1 -define i32 @ripple(i16 signext %x) { -bb: - %tmp = sext i16 %x to i32 - %tmp1 = and i32 %tmp, -5 - %tmp2 = trunc i32 %tmp1 to i16 - %tmp3 = sext i16 %tmp2 to i32 - %tmp4 = add i32 %tmp3, 1 - ret i32 %tmp4 -} - -; CHECK-LABEL: @ripplenot( -; CHECK: add i32 %tmp3, 4 -define i32 @ripplenot(i16 signext %x) { -bb: - %tmp = sext i16 %x to i32 - %tmp1 = and i32 %tmp, -3 - %tmp2 = trunc i32 %tmp1 to i16 - %tmp3 = sext i16 %tmp2 to i32 - %tmp4 = add i32 %tmp3, 4 - ret i32 %tmp4 -} - -; CHECK-LABEL: @oppositesign( -; CHECK: add nsw i16 %tmp1, 4 -define i32 @oppositesign(i16 signext %x) { -bb: - %tmp = sext i16 %x to i32 - %tmp1 = or i32 %tmp, 32768 - %tmp2 = trunc i32 %tmp1 to i16 - %tmp3 = sext i16 %tmp2 to i32 - %tmp4 = add i32 %tmp3, 4 - ret i32 %tmp4 -} - -; CHECK-LABEL: @ripplenot_var( -; CHECK: add i32 %tmp6, %tmp7 -define i32 @ripplenot_var(i16 signext %x, i16 signext %y) { -bb: - %tmp = sext i16 %x to i32 - %tmp1 = and i32 %tmp, -5 - %tmp2 = trunc i32 %tmp1 to i16 - %tmp3 = sext i16 %y to i32 - %tmp4 = or i32 %tmp3, 2 - %tmp5 = trunc i32 %tmp4 to i16 - %tmp6 = sext i16 %tmp5 to i32 - %tmp7 = sext i16 %tmp2 to i32 - %tmp8 = add i32 %tmp6, %tmp7 - ret i32 %tmp8 -} From f5df18bff0f888d4772ad80d137d2d591bb99b5e Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 29 May 2014 15:13:23 +0000 Subject: [PATCH 237/906] Add a test showing the ppc code sequence for getting a function pointer. This would have found the miscompile in r209638. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209820 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/PowerPC/func-addr.ll | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 test/CodeGen/PowerPC/func-addr.ll diff --git a/test/CodeGen/PowerPC/func-addr.ll b/test/CodeGen/PowerPC/func-addr.ll new file mode 100644 index 000000000000..0cce863992b6 --- /dev/null +++ b/test/CodeGen/PowerPC/func-addr.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple powerpc64-linux < %s | FileCheck %s + +define void @foo() { + ret void +} +declare i32 @bar(i8*) + +; CHECK-LABEL: {{^}}zed: +; CHECK: addis 3, 2, .LC1@toc@ha +; CHECK-NEXT: ld 3, .LC1@toc@l(3) +; CHECK-NEXT: bl bar + + +; CHECK-LABEL: .section .toc,"aw",@progbits +; CHECK: .LC1: +; CHECK-NEXT: .tc foo[TC],foo + +define void @zed() { + call i32 @bar(i8* bitcast (void ()* @foo to i8*)) + ret void +} From 289a9d75de2da5fbfc5e20947ce3afef435ca70c Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 29 May 2014 15:41:38 +0000 Subject: [PATCH 238/906] [PPC] Use alias symbols in address computation. This seems to match what gcc does for ppc and what every other llvm backend does. This is a fixed version of r209638. The difference is to avoid any change in behavior for functions. The logic for using constant pools for function addresseses is spread over a few places and we have to keep them in sync. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209821 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCAsmPrinter.cpp | 37 ++++++++++---------------- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 14 +++------- test/CodeGen/PowerPC/alias.ll | 31 +++++++++++++++++++++ 3 files changed, 48 insertions(+), 34 deletions(-) create mode 100644 test/CodeGen/PowerPC/alias.ll diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 2174b18715f1..ee5e406c6f8d 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -380,15 +380,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { bool IsAvailExt = false; if (MO.isGlobal()) { - const GlobalValue *GValue = MO.getGlobal(); - const GlobalAlias *GAlias = dyn_cast(GValue); - const GlobalValue *RealGValue = GAlias ? GAlias->getAliasee() : GValue; - MOSymbol = getSymbol(RealGValue); - const GlobalVariable *GVar = dyn_cast(RealGValue); - IsExternal = GVar && !GVar->hasInitializer(); - IsCommon = GVar && RealGValue->hasCommonLinkage(); - IsFunction = !GVar; - IsAvailExt = GVar && RealGValue->hasAvailableExternallyLinkage(); + const GlobalValue *GV = MO.getGlobal(); + MOSymbol = getSymbol(GV); + IsExternal = GV->isDeclaration(); + IsCommon = GV->hasCommonLinkage(); + IsFunction = GV->getType()->getElementType()->isFunctionTy(); + IsAvailExt = GV->hasAvailableExternallyLinkage(); } else if (MO.isCPI()) MOSymbol = GetCPISymbol(MO.getIndex()); else if (MO.isJTI()) @@ -427,13 +424,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } else if (MO.isGlobal()) { const GlobalValue *GValue = MO.getGlobal(); - const GlobalAlias *GAlias = dyn_cast(GValue); - const GlobalValue *RealGValue = GAlias ? GAlias->getAliasee() : GValue; - MOSymbol = getSymbol(RealGValue); - const GlobalVariable *GVar = dyn_cast(RealGValue); - - if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() || - RealGValue->hasAvailableExternallyLinkage() || + MOSymbol = getSymbol(GValue); + if (GValue->getType()->getElementType()->isFunctionTy() || + GValue->isDeclaration() || GValue->hasCommonLinkage() || + GValue->hasAvailableExternallyLinkage() || TM.getCodeModel() == CodeModel::Large) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); } @@ -460,13 +454,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { bool IsFunction = false; if (MO.isGlobal()) { - const GlobalValue *GValue = MO.getGlobal(); - const GlobalAlias *GAlias = dyn_cast(GValue); - const GlobalValue *RealGValue = GAlias ? GAlias->getAliasee() : GValue; - MOSymbol = getSymbol(RealGValue); - const GlobalVariable *GVar = dyn_cast(RealGValue); - IsExternal = GVar && !GVar->hasInitializer(); - IsFunction = !GVar; + const GlobalValue *GV = MO.getGlobal(); + MOSymbol = getSymbol(GV); + IsExternal = GV->isDeclaration(); + IsFunction = GV->getType()->getElementType()->isFunctionTy(); } else if (MO.isCPI()) MOSymbol = GetCPISymbol(MO.getIndex()); diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index f6e075d27193..b8f59570020c 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1472,17 +1472,9 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { if (GlobalAddressSDNode *G = dyn_cast(GA)) { const GlobalValue *GValue = G->getGlobal(); - const GlobalAlias *GAlias = dyn_cast(GValue); - const GlobalValue *RealGValue = GAlias ? GAlias->getAliasee() : GValue; - const GlobalVariable *GVar = dyn_cast(RealGValue); - assert((GVar || isa(RealGValue)) && - "Unexpected global value subclass!"); - - // An external variable is one without an initializer. For these, - // for variables with common linkage, and for Functions, generate - // the LDtocL form. - if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() || - RealGValue->hasAvailableExternallyLinkage()) + if (GValue->getType()->getElementType()->isFunctionTy() || + GValue->isDeclaration() || GValue->hasCommonLinkage() || + GValue->hasAvailableExternallyLinkage()) return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, SDValue(Tmp, 0)); } diff --git a/test/CodeGen/PowerPC/alias.ll b/test/CodeGen/PowerPC/alias.ll new file mode 100644 index 000000000000..86e41148a0d7 --- /dev/null +++ b/test/CodeGen/PowerPC/alias.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -code-model=medium| FileCheck --check-prefix=CHECK --check-prefix=MEDIUM %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -code-model=large | FileCheck --check-prefix=CHECK --check-prefix=LARGE %s + +@foo = global i32 42 +@fooa = alias i32* @foo + +@foo2 = global i64 42 +@foo2a = alias i64* @foo2 + +; CHECK-LABEL: bar: +define i32 @bar() { +; MEDIUM: addis 3, 2, fooa@toc@ha +; LARGE: addis 3, 2, .LC1@toc@ha + %a = load i32* @fooa + ret i32 %a +} + +; CHECK-LABEL: bar2: +define i64 @bar2() { +; MEDIUM: addis 3, 2, foo2a@toc@ha +; MEDIUM: addi 3, 3, foo2a@toc@l +; LARGE: addis 3, 2, .LC3@toc@ha + %a = load i64* @foo2a + ret i64 %a +} + +; LARGE: .LC1: +; LARGE-NEXT: .tc fooa[TC],fooa + +; LARGE: .LC3: +; LARGE-NEXT: .tc foo2a[TC],foo2a From d4aff6892b8ee0880f9e25960e96111e04018342 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 29 May 2014 16:16:12 +0000 Subject: [PATCH 239/906] Rename alias variables to make it easier to add new tests to the file. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209822 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Feature/alias2.ll | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test/Feature/alias2.ll b/test/Feature/alias2.ll index 4334a6201710..bc542ffd7701 100644 --- a/test/Feature/alias2.ll +++ b/test/Feature/alias2.ll @@ -6,17 +6,17 @@ @v2 = global [1 x i32] zeroinitializer ; CHECK: @v2 = global [1 x i32] zeroinitializer -@v3 = alias i16, i32* @v1 -; CHECK: @v3 = alias i16, i32* @v1 +@a1 = alias i16, i32* @v1 +; CHECK: @a1 = alias i16, i32* @v1 -@v4 = alias i32, [1 x i32]* @v2 -; CHECK: @v4 = alias i32, [1 x i32]* @v2 +@a2 = alias i32, [1 x i32]* @v2 +; CHECK: @a2 = alias i32, [1 x i32]* @v2 -@v5 = alias addrspace(2) i32, i32* @v1 -; CHECK: @v5 = alias addrspace(2) i32, i32* @v1 +@a3 = alias addrspace(2) i32, i32* @v1 +; CHECK: @a3 = alias addrspace(2) i32, i32* @v1 -@v6 = alias i16, i32* @v1 -; CHECK: @v6 = alias i16, i32* @v1 +@a4 = alias i16, i32* @v1 +; CHECK: @a4 = alias i16, i32* @v1 -@v7 = thread_local(localdynamic) alias i32* @v1 -; CHECK: @v7 = thread_local(localdynamic) alias i32* @v1 +@a5 = thread_local(localdynamic) alias i32* @v1 +; CHECK: @a5 = thread_local(localdynamic) alias i32* @v1 From 1d7b7c5ba04ac6f6eecf12192816e14ba2850099 Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Thu, 29 May 2014 16:56:48 +0000 Subject: [PATCH 240/906] Debug Info: Remove unused code. The MInsn of an _abstract_ variable is never used again and updating the abstract variable for each inlined instance of it was questionable in the first place. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209829 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 2a0615d74f61..66ba9298c403 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1179,8 +1179,6 @@ DwarfDebug::collectVariableInfo(SmallPtrSet &Processed) { DbgVariable *RegVar = new DbgVariable(DV, AbsVar, this); if (!addCurrentFnArgument(RegVar, Scope)) addScopeVariable(Scope, RegVar); - if (AbsVar) - AbsVar->setMInsn(MInsn); // Check if the first DBG_VALUE is valid for the rest of the function. if (Ranges.size() == 1 && Ranges.front().second == nullptr) { From fa680483220afac65be435ccf3b70051eb7815d3 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Thu, 29 May 2014 17:12:05 +0000 Subject: [PATCH 241/906] [ADT] Delete the Twine assignment operator This makes it slightly harder to misuse Twines. It is still possible to refer to destroyed temporaries with the regular constructors, though. Patch by Marco Alesiani! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209832 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ADT/Twine.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/llvm/ADT/Twine.h b/include/llvm/ADT/Twine.h index a54fd743ad68..4be3ee6f82db 100644 --- a/include/llvm/ADT/Twine.h +++ b/include/llvm/ADT/Twine.h @@ -182,6 +182,10 @@ namespace llvm { assert(isValid() && "Invalid twine!"); } + /// Since the intended use of twines is as temporary objects, assignments + /// when concatenating might cause undefined behavior or stack corruptions + Twine &operator=(const Twine &Other) LLVM_DELETED_FUNCTION; + /// isNull - Check for the null twine. bool isNull() const { return getLHSKind() == NullKind; From d4d04199acab16a7bb9ca186bd95770ced578395 Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Thu, 29 May 2014 18:40:48 +0000 Subject: [PATCH 242/906] Use range-based for loops in ASan, TSan and MSan git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209834 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Instrumentation/AddressSanitizer.cpp | 79 ++++++++----------- .../Instrumentation/MemorySanitizer.cpp | 58 +++++++------- .../Instrumentation/ThreadSanitizer.cpp | 41 +++++----- 3 files changed, 81 insertions(+), 97 deletions(-) diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 1730cff325a2..ede32fe57f7e 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -225,8 +225,7 @@ class SetOfDynamicallyInitializedGlobals { M.getNamedMetadata("llvm.asan.dynamically_initialized_globals"); if (!DynamicGlobals) return; - for (int i = 0, n = DynamicGlobals->getNumOperands(); i < n; ++i) { - MDNode *MDN = DynamicGlobals->getOperand(i); + for (const auto MDN : DynamicGlobals->operands()) { assert(MDN->getNumOperands() == 1); Value *VG = MDN->getOperand(0); // The optimizer may optimize away a global entirely, in which case we @@ -1009,10 +1008,9 @@ bool AddressSanitizerModule::runOnModule(Module &M) { SmallVector GlobalsToChange; - for (Module::GlobalListType::iterator G = M.global_begin(), - E = M.global_end(); G != E; ++G) { - if (ShouldInstrumentGlobal(G)) - GlobalsToChange.push_back(G); + for (auto &G : M.globals()) { + if (ShouldInstrumentGlobal(&G)) + GlobalsToChange.push_back(&G); } Function *CtorFunc = M.getFunction(kAsanModuleCtorName); @@ -1305,8 +1303,8 @@ bool AddressSanitizer::InjectCoverage(Function &F, (unsigned)ClCoverageBlockThreshold < AllBlocks.size()) { InjectCoverageAtBlock(F, F.getEntryBlock()); } else { - for (size_t i = 0, n = AllBlocks.size(); i < n; i++) - InjectCoverageAtBlock(F, *AllBlocks[i]); + for (auto BB : AllBlocks) + InjectCoverageAtBlock(F, *BB); } return true; } @@ -1339,29 +1337,28 @@ bool AddressSanitizer::runOnFunction(Function &F) { unsigned Alignment; // Fill the set of memory operations to instrument. - for (Function::iterator FI = F.begin(), FE = F.end(); - FI != FE; ++FI) { - AllBlocks.push_back(FI); + for (auto &BB : F) { + AllBlocks.push_back(&BB); TempsToInstrument.clear(); int NumInsnsPerBB = 0; - for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); - BI != BE; ++BI) { - if (LooksLikeCodeInBug11395(BI)) return false; - if (Value *Addr = isInterestingMemoryAccess(BI, &IsWrite, &Alignment)) { + for (auto &Inst : BB) { + if (LooksLikeCodeInBug11395(&Inst)) return false; + if (Value *Addr = + isInterestingMemoryAccess(&Inst, &IsWrite, &Alignment)) { if (ClOpt && ClOptSameTemp) { if (!TempsToInstrument.insert(Addr)) continue; // We've seen this temp in the current BB. } } else if (ClInvalidPointerPairs && - isInterestingPointerComparisonOrSubtraction(BI)) { - PointerComparisonsOrSubtracts.push_back(BI); + isInterestingPointerComparisonOrSubtraction(&Inst)) { + PointerComparisonsOrSubtracts.push_back(&Inst); continue; - } else if (isa(BI)) { + } else if (isa(Inst)) { // ok, take it. } else { - if (isa(BI)) + if (isa(Inst)) NumAllocas++; - CallSite CS(BI); + CallSite CS(&Inst); if (CS) { // A call inside BB. TempsToInstrument.clear(); @@ -1370,7 +1367,7 @@ bool AddressSanitizer::runOnFunction(Function &F) { } continue; } - ToInstrument.push_back(BI); + ToInstrument.push_back(&Inst); NumInsnsPerBB++; if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB) break; @@ -1395,8 +1392,7 @@ bool AddressSanitizer::runOnFunction(Function &F) { // Instrument. int NumInstrumented = 0; - for (size_t i = 0, n = ToInstrument.size(); i != n; i++) { - Instruction *Inst = ToInstrument[i]; + for (auto Inst : ToInstrument) { if (ClDebugMin < 0 || ClDebugMax < 0 || (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { if (isInterestingMemoryAccess(Inst, &IsWrite, &Alignment)) @@ -1412,14 +1408,13 @@ bool AddressSanitizer::runOnFunction(Function &F) { // We must unpoison the stack before every NoReturn call (throw, _exit, etc). // See e.g. http://code.google.com/p/address-sanitizer/issues/detail?id=37 - for (size_t i = 0, n = NoReturnCalls.size(); i != n; i++) { - Instruction *CI = NoReturnCalls[i]; + for (auto CI : NoReturnCalls) { IRBuilder<> IRB(CI); IRB.CreateCall(AsanHandleNoReturnFunc); } - for (size_t i = 0, n = PointerComparisonsOrSubtracts.size(); i != n; i++) { - instrumentPointerComparisonOrSubtraction(PointerComparisonsOrSubtracts[i]); + for (auto Inst : PointerComparisonsOrSubtracts) { + instrumentPointerComparisonOrSubtraction(Inst); NumInstrumented++; } @@ -1532,12 +1527,10 @@ void FunctionStackPoisoner::SetShadowToStackAfterReturnInlined( } static DebugLoc getFunctionEntryDebugLocation(Function &F) { - BasicBlock::iterator I = F.getEntryBlock().begin(), - E = F.getEntryBlock().end(); - for (; I != E; ++I) - if (!isa(I)) - break; - return I->getDebugLoc(); + for (const auto &Inst : F.getEntryBlock()) + if (!isa(Inst)) + return Inst.getDebugLoc(); + return DebugLoc(); } void FunctionStackPoisoner::poisonStack() { @@ -1551,8 +1544,7 @@ void FunctionStackPoisoner::poisonStack() { SmallVector SVD; SVD.reserve(AllocaVec.size()); - for (size_t i = 0, n = AllocaVec.size(); i < n; i++) { - AllocaInst *AI = AllocaVec[i]; + for (AllocaInst *AI : AllocaVec) { ASanStackVariableDescription D = { AI->getName().data(), getAllocaSizeInBytes(AI), AI->getAlignment(), AI, 0}; @@ -1607,8 +1599,7 @@ void FunctionStackPoisoner::poisonStack() { // Insert poison calls for lifetime intrinsics for alloca. bool HavePoisonedAllocas = false; - for (size_t i = 0, n = AllocaPoisonCallVec.size(); i < n; i++) { - const AllocaPoisonCall &APC = AllocaPoisonCallVec[i]; + for (const auto &APC : AllocaPoisonCallVec) { assert(APC.InsBefore); assert(APC.AI); IRBuilder<> IRB(APC.InsBefore); @@ -1617,11 +1608,10 @@ void FunctionStackPoisoner::poisonStack() { } // Replace Alloca instructions with base+offset. - for (size_t i = 0, n = SVD.size(); i < n; i++) { - AllocaInst *AI = SVD[i].AI; + for (const auto &Desc : SVD) { + AllocaInst *AI = Desc.AI; Value *NewAllocaPtr = IRB.CreateIntToPtr( - IRB.CreateAdd(LocalStackBase, - ConstantInt::get(IntptrTy, SVD[i].Offset)), + IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)), AI->getType()); replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB); AI->replaceAllUsesWith(NewAllocaPtr); @@ -1654,8 +1644,7 @@ void FunctionStackPoisoner::poisonStack() { poisonRedZones(L.ShadowBytes, IRB, ShadowBase, true); // (Un)poison the stack before all ret instructions. - for (size_t i = 0, n = RetVec.size(); i < n; i++) { - Instruction *Ret = RetVec[i]; + for (auto Ret : RetVec) { IRBuilder<> IRBRet(Ret); // Mark the current frame as retired. IRBRet.CreateStore(ConstantInt::get(IntptrTy, kRetiredStackFrameMagic), @@ -1709,8 +1698,8 @@ void FunctionStackPoisoner::poisonStack() { } // We are done. Remove the old unused alloca instructions. - for (size_t i = 0, n = AllocaVec.size(); i < n; i++) - AllocaVec[i]->eraseFromParent(); + for (auto AI : AllocaVec) + AI->eraseFromParent(); } void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size, diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index b8e632ead10a..2323a8c57552 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -599,26 +599,26 @@ struct MemorySanitizerVisitor : public InstVisitor { } void materializeStores(bool InstrumentWithCalls) { - for (size_t i = 0, n = StoreList.size(); i < n; i++) { - StoreInst &I = *dyn_cast(StoreList[i]); + for (auto Inst : StoreList) { + StoreInst &SI = *dyn_cast(Inst); - IRBuilder<> IRB(&I); - Value *Val = I.getValueOperand(); - Value *Addr = I.getPointerOperand(); - Value *Shadow = I.isAtomic() ? getCleanShadow(Val) : getShadow(Val); + IRBuilder<> IRB(&SI); + Value *Val = SI.getValueOperand(); + Value *Addr = SI.getPointerOperand(); + Value *Shadow = SI.isAtomic() ? getCleanShadow(Val) : getShadow(Val); Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB); StoreInst *NewSI = - IRB.CreateAlignedStore(Shadow, ShadowPtr, I.getAlignment()); + IRB.CreateAlignedStore(Shadow, ShadowPtr, SI.getAlignment()); DEBUG(dbgs() << " STORE: " << *NewSI << "\n"); (void)NewSI; - if (ClCheckAccessAddress) insertShadowCheck(Addr, &I); + if (ClCheckAccessAddress) insertShadowCheck(Addr, &SI); - if (I.isAtomic()) I.setOrdering(addReleaseOrdering(I.getOrdering())); + if (SI.isAtomic()) SI.setOrdering(addReleaseOrdering(SI.getOrdering())); if (MS.TrackOrigins) { - unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment()); + unsigned Alignment = std::max(kMinOriginAlignment, SI.getAlignment()); storeOrigin(IRB, Addr, Shadow, getOrigin(Val), Alignment, InstrumentWithCalls); } @@ -662,18 +662,17 @@ struct MemorySanitizerVisitor : public InstVisitor { } void materializeChecks(bool InstrumentWithCalls) { - for (size_t i = 0, n = InstrumentationList.size(); i < n; i++) { - Instruction *OrigIns = InstrumentationList[i].OrigIns; - Value *Shadow = InstrumentationList[i].Shadow; - Value *Origin = InstrumentationList[i].Origin; + for (const auto &ShadowData : InstrumentationList) { + Instruction *OrigIns = ShadowData.OrigIns; + Value *Shadow = ShadowData.Shadow; + Value *Origin = ShadowData.Origin; materializeOneCheck(OrigIns, Shadow, Origin, InstrumentWithCalls); } DEBUG(dbgs() << "DONE:\n" << F); } void materializeIndirectCalls() { - for (size_t i = 0, n = IndirectCallList.size(); i < n; i++) { - CallSite CS = IndirectCallList[i]; + for (auto &CS : IndirectCallList) { Instruction *I = CS.getInstruction(); BasicBlock *B = I->getParent(); IRBuilder<> IRB(I); @@ -732,8 +731,7 @@ struct MemorySanitizerVisitor : public InstVisitor { // Finalize PHI nodes. - for (size_t i = 0, n = ShadowPHINodes.size(); i < n; i++) { - PHINode *PN = ShadowPHINodes[i]; + for (PHINode *PN : ShadowPHINodes) { PHINode *PNS = cast(getShadow(PN)); PHINode *PNO = MS.TrackOrigins ? cast(getOrigin(PN)) : nullptr; size_t NumValues = PN->getNumIncomingValues(); @@ -950,22 +948,21 @@ struct MemorySanitizerVisitor : public InstVisitor { Function *F = A->getParent(); IRBuilder<> EntryIRB(F->getEntryBlock().getFirstNonPHI()); unsigned ArgOffset = 0; - for (Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - AI != AE; ++AI) { - if (!AI->getType()->isSized()) { + for (auto &FArg : F->args()) { + if (!FArg.getType()->isSized()) { DEBUG(dbgs() << "Arg is not sized\n"); continue; } - unsigned Size = AI->hasByValAttr() - ? MS.DL->getTypeAllocSize(AI->getType()->getPointerElementType()) - : MS.DL->getTypeAllocSize(AI->getType()); - if (A == AI) { - Value *Base = getShadowPtrForArgument(AI, EntryIRB, ArgOffset); - if (AI->hasByValAttr()) { + unsigned Size = FArg.hasByValAttr() + ? MS.DL->getTypeAllocSize(FArg.getType()->getPointerElementType()) + : MS.DL->getTypeAllocSize(FArg.getType()); + if (A == &FArg) { + Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset); + if (FArg.hasByValAttr()) { // ByVal pointer itself has clean shadow. We copy the actual // argument shadow to the underlying memory. // Figure out maximal valid memcpy alignment. - unsigned ArgAlign = AI->getParamAlignment(); + unsigned ArgAlign = FArg.getParamAlignment(); if (ArgAlign == 0) { Type *EltType = A->getType()->getPointerElementType(); ArgAlign = MS.DL->getABITypeAlignment(EltType); @@ -980,10 +977,11 @@ struct MemorySanitizerVisitor : public InstVisitor { } else { *ShadowPtr = EntryIRB.CreateAlignedLoad(Base, kShadowTLSAlignment); } - DEBUG(dbgs() << " ARG: " << *AI << " ==> " << + DEBUG(dbgs() << " ARG: " << FArg << " ==> " << **ShadowPtr << "\n"); if (MS.TrackOrigins) { - Value* OriginPtr = getOriginPtrForArgument(AI, EntryIRB, ArgOffset); + Value *OriginPtr = + getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset); setOrigin(A, EntryIRB.CreateLoad(OriginPtr)); } } diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 8fe9bcae69dc..345b212d140f 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -333,20 +333,17 @@ bool ThreadSanitizer::runOnFunction(Function &F) { bool HasCalls = false; // Traverse all instructions, collect loads/stores/returns, check for calls. - for (Function::iterator FI = F.begin(), FE = F.end(); - FI != FE; ++FI) { - BasicBlock &BB = *FI; - for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); - BI != BE; ++BI) { - if (isAtomic(BI)) - AtomicAccesses.push_back(BI); - else if (isa(BI) || isa(BI)) - LocalLoadsAndStores.push_back(BI); - else if (isa(BI)) - RetVec.push_back(BI); - else if (isa(BI) || isa(BI)) { - if (isa(BI)) - MemIntrinCalls.push_back(BI); + for (auto &BB : F) { + for (auto &Inst : BB) { + if (isAtomic(&Inst)) + AtomicAccesses.push_back(&Inst); + else if (isa(Inst) || isa(Inst)) + LocalLoadsAndStores.push_back(&Inst); + else if (isa(Inst)) + RetVec.push_back(&Inst); + else if (isa(Inst) || isa(Inst)) { + if (isa(Inst)) + MemIntrinCalls.push_back(&Inst); HasCalls = true; chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); } @@ -360,19 +357,19 @@ bool ThreadSanitizer::runOnFunction(Function &F) { // Instrument memory accesses. if (ClInstrumentMemoryAccesses && F.hasFnAttribute(Attribute::SanitizeThread)) - for (size_t i = 0, n = AllLoadsAndStores.size(); i < n; ++i) { - Res |= instrumentLoadOrStore(AllLoadsAndStores[i]); + for (auto Inst : AllLoadsAndStores) { + Res |= instrumentLoadOrStore(Inst); } // Instrument atomic memory accesses. if (ClInstrumentAtomics) - for (size_t i = 0, n = AtomicAccesses.size(); i < n; ++i) { - Res |= instrumentAtomic(AtomicAccesses[i]); + for (auto Inst : AtomicAccesses) { + Res |= instrumentAtomic(Inst); } if (ClInstrumentMemIntrinsics) - for (size_t i = 0, n = MemIntrinCalls.size(); i < n; ++i) { - Res |= instrumentMemIntrinsic(MemIntrinCalls[i]); + for (auto Inst : MemIntrinCalls) { + Res |= instrumentMemIntrinsic(Inst); } // Instrument function entry/exit points if there were instrumented accesses. @@ -382,8 +379,8 @@ bool ThreadSanitizer::runOnFunction(Function &F) { Intrinsic::getDeclaration(F.getParent(), Intrinsic::returnaddress), IRB.getInt32(0)); IRB.CreateCall(TsanFuncEntry, ReturnAddress); - for (size_t i = 0, n = RetVec.size(); i < n; ++i) { - IRBuilder<> IRBRet(RetVec[i]); + for (auto RetInst : RetVec) { + IRBuilder<> IRBRet(RetInst); IRBRet.CreateCall(TsanFuncExit); } Res = true; From e741924230245250448a41d54adc7238e0eac716 Mon Sep 17 00:00:00 2001 From: Sebastian Pop Date: Thu, 29 May 2014 19:44:05 +0000 Subject: [PATCH 243/906] fail to find dimensions when ElementSize is nullptr when ScalarEvolution::getElementSize returns nullptr it is safe to early return in ScalarEvolution::findArrayDimensions such that we avoid later problems when we try to divide the terms by ElementSize. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209837 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 42a7aa238969..935d4158c395 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -7370,7 +7370,7 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl &Terms, SmallVectorImpl &Sizes, const SCEV *ElementSize) const { - if (Terms.size() < 1) + if (Terms.size() < 1 || !ElementSize) return; // Early return when Terms do not contain parameters: we do not delinearize From 20b6ed3c9c58104d76c523bdd7a5b7b6c1feb729 Mon Sep 17 00:00:00 2001 From: Sebastian Pop Date: Thu, 29 May 2014 19:44:09 +0000 Subject: [PATCH 244/906] implement missing SCEVDivision case without this case we would end on an infinite recursion: the remainder is zero, so Numerator - Remainder is equal to Numerator and so we would recursively ask for the division of Numerator by Denominator. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209838 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 935d4158c395..bc9f45b20430 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -7216,6 +7216,15 @@ struct SCEVDivision : public SCEVVisitor { cast(Zero)->getValue(); Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true); + if (Remainder->isZero()) { + // The Quotient is obtained by replacing Denominator by 1 in Numerator. + RewriteMap[cast(Denominator)->getValue()] = + cast(One)->getValue(); + Quotient = + SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true); + return; + } + // Quotient is (Numerator - Remainder) divided by Denominator. const SCEV *Q, *R; const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder); From 960fc3503fed972763cbd21ffd03b85ef2e58c82 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 29 May 2014 19:59:58 +0000 Subject: [PATCH 245/906] Update Credits. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209840 91177308-0d34-0410-b5e6-96231b3b80d8 --- CREDITS.TXT | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/CREDITS.TXT b/CREDITS.TXT index 2b8b115516a9..0447c40e381b 100644 --- a/CREDITS.TXT +++ b/CREDITS.TXT @@ -107,6 +107,10 @@ N: Rafael Avila de Espindola E: rafael.espindola@gmail.com D: The ARM backend +N: Dave Estes +E: cestes@codeaurora.org +D: AArch64 machine description for Cortex-A53 + N: Alkis Evlogimenos E: alkis@evlogimenos.com D: Linear scan register allocator, many codegen improvements, Java frontend @@ -341,6 +345,10 @@ D: LTO tool, PassManager rewrite, Loop Pass Manager, Loop Rotate D: GCC PCH Integration (llvm-gcc), llvm-gcc improvements D: Optimizer improvements, Loop Index Split +N: Ana Pazos +E: apazos@codeaurora.org +D: Fixes and improvements to the AArch64 backend + N: Wesley Peck E: peckw@wesleypeck.com W: http://wesleypeck.com/ @@ -370,8 +378,10 @@ D: ARM calling conventions rewrite, hard float support N: Chad Rosier E: mcrosier@codeaurora.org -D: ARM fast-isel improvements -D: Performance monitoring +I: mcrosier +D: AArch64 fast instruction selection pass +D: Fixes and improvements to the ARM fast-isel pass +D: Fixes and improvements to the AArch64 backend N: Nadav Rotem E: nrotem@apple.com From 78874456fc58ca993635e31ef5ab5517769efb93 Mon Sep 17 00:00:00 2001 From: Louis Gerbarg Date: Thu, 29 May 2014 20:29:47 +0000 Subject: [PATCH 246/906] Add support for combining GEPs across PHI nodes Currently LLVM will generally merge GEPs. This allows backends to use more complex addressing modes. In some cases this is not happening because there is PHI inbetween the two GEPs: GEP1--\ |-->PHI1-->GEP3 GEP2--/ This patch checks to see if GEP1 and GEP2 are similiar enough that they can be cloned (GEP12) in GEP3's BB, allowing GEP->GEP merging (GEP123): GEP1--\ --\ --\ |-->PHI1-->GEP3 ==> |-->PHI2->GEP12->GEP3 == > |-->PHI2->GEP123 GEP2--/ --/ --/ This also breaks certain use chains that are preventing GEP->GEP merges that the the existing instcombine would merge otherwise. Tests included. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209843 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstructionCombining.cpp | 79 +++++++++++++++++++ test/Transforms/InstCombine/gepphigep.ll | 56 +++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 test/Transforms/InstCombine/gepphigep.ll diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 4c36887f6285..c72d099d9f0a 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1220,6 +1220,85 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (MadeChange) return &GEP; } + // Check to see if the inputs to the PHI node are getelementptr instructions. + if (PHINode *PN = dyn_cast(PtrOp)) { + GetElementPtrInst *Op1 = dyn_cast(PN->getOperand(0)); + if (!Op1) + return nullptr; + + signed DI = -1; + + for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) { + GetElementPtrInst *Op2 = dyn_cast(*I); + if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands()) + return nullptr; + + for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) { + if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType()) + return nullptr; + + if (Op1->getOperand(J) != Op2->getOperand(J)) { + if (DI == -1) { + // We have not seen any differences yet in the GEPs feeding the + // PHI yet, so we record this one if it is allowed to be a + // variable. + + // The first two arguments can vary for any GEP, the rest have to be + // static for struct slots + if (J > 1) { + SmallVector Idxs(GEP.idx_begin(), GEP.idx_begin()+J-1); + Type *Ty = + GetElementPtrInst::getIndexedType(Op1->getOperand(0)->getType(), + Idxs); + if (Ty->isStructTy()) + return nullptr; + } + + DI = J; + } else { + // The GEP is different by more than one input. While this could be + // extended to support GEPs that vary by more than one variable it + // doesn't make sense since it greatly increases the complexity and + // would result in an R+R+R addressing mode which no backend + // directly supports and would need to be broken into several + // simpler instructions anyway. + return nullptr; + } + } + } + } + + GetElementPtrInst *NewGEP = cast(Op1->clone()); + + if (DI == -1) { + // All the GEPs feeding the PHI are identical. Clone one down into our + // BB so that it can be merged with the current GEP. + GEP.getParent()->getInstList().insert(GEP.getParent()->getFirstNonPHI(), + NewGEP); + } else { + // All the GEPs feeding the PHI differ at a single offset. Clone a GEP + // into the current block so it can be merged, and create a new PHI to + // set that index. + Instruction *InsertPt = Builder->GetInsertPoint(); + Builder->SetInsertPoint(PN); + PHINode *NewPN = Builder->CreatePHI(Op1->getOperand(DI)->getType(), + PN->getNumOperands()); + Builder->SetInsertPoint(InsertPt); + + for (auto &I : PN->operands()) + NewPN->addIncoming(cast(I)->getOperand(DI), + PN->getIncomingBlock(I)); + + NewGEP->setOperand(DI, NewPN); + GEP.getParent()->getInstList().insert(GEP.getParent()->getFirstNonPHI(), + NewGEP); + NewGEP->setOperand(DI, NewPN); + } + + GEP.setOperand(0, NewGEP); + PtrOp = NewGEP; + } + // Combine Indices - If the source pointer to this getelementptr instruction // is a getelementptr instruction, combine the indices of the two // getelementptr instructions into a single instruction. diff --git a/test/Transforms/InstCombine/gepphigep.ll b/test/Transforms/InstCombine/gepphigep.ll new file mode 100644 index 000000000000..9aab609901e2 --- /dev/null +++ b/test/Transforms/InstCombine/gepphigep.ll @@ -0,0 +1,56 @@ +; RUN: opt -instcombine -S < %s | FileCheck %s + +%struct1 = type { %struct2*, i32, i32, i32 } +%struct2 = type { i32, i32 } + +define i32 @test1(%struct1* %dm, i1 %tmp4, i64 %tmp9, i64 %tmp19) { +bb: + %tmp = getelementptr inbounds %struct1* %dm, i64 0, i32 0 + %tmp1 = load %struct2** %tmp, align 8 + br i1 %tmp4, label %bb1, label %bb2 + +bb1: + %tmp10 = getelementptr inbounds %struct2* %tmp1, i64 %tmp9 + %tmp11 = getelementptr inbounds %struct2* %tmp10, i64 0, i32 0 + store i32 0, i32* %tmp11, align 4 + br label %bb3 + +bb2: + %tmp20 = getelementptr inbounds %struct2* %tmp1, i64 %tmp19 + %tmp21 = getelementptr inbounds %struct2* %tmp20, i64 0, i32 0 + store i32 0, i32* %tmp21, align 4 + br label %bb3 + +bb3: + %phi = phi %struct2* [ %tmp10, %bb1 ], [ %tmp20, %bb2 ] + %tmp24 = getelementptr inbounds %struct2* %phi, i64 0, i32 1 + %tmp25 = load i32* %tmp24, align 4 + ret i32 %tmp25 + +; CHECK-LABEL: @test1( +; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp9, i32 0 +; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp19, i32 0 +; CHECK: %[[PHI:[0-9A-Za-z]+]] = phi i64 [ %tmp9, %bb1 ], [ %tmp19, %bb2 ] +; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %[[PHI]], i32 1 + +} + +define i32 @test2(%struct1* %dm, i1 %tmp4, i64 %tmp9, i64 %tmp19) { +bb: + %tmp = getelementptr inbounds %struct1* %dm, i64 0, i32 0 + %tmp1 = load %struct2** %tmp, align 8 + %tmp10 = getelementptr inbounds %struct2* %tmp1, i64 %tmp9 + %tmp11 = getelementptr inbounds %struct2* %tmp10, i64 0, i32 0 + store i32 0, i32* %tmp11, align 4 + %tmp20 = getelementptr inbounds %struct2* %tmp1, i64 %tmp19 + %tmp21 = getelementptr inbounds %struct2* %tmp20, i64 0, i32 0 + store i32 0, i32* %tmp21, align 4 + %tmp24 = getelementptr inbounds %struct2* %tmp10, i64 0, i32 1 + %tmp25 = load i32* %tmp24, align 4 + ret i32 %tmp25 + +; CHECK-LABEL: @test2( +; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp9, i32 0 +; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp19, i32 0 +; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp9, i32 1 +} From ade072c1a9cbd06e99862dff90c72af0b1f2edbe Mon Sep 17 00:00:00 2001 From: Filipe Cabecinhas Date: Thu, 29 May 2014 22:04:42 +0000 Subject: [PATCH 247/906] Added tests for shufflevector lowering to blend instrs. These tests ensure that a change I will propose in clang works as expected. Summary: Added tests for the generation of blend+immediate instructions from a shufflevector. These tests were proposed along with a patch that was dropped. I'm committing the tests anyway to protect against possible regressions in codegen. Reviewers: nadav, bkramer Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D3600 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209853 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/avx-blend.ll | 33 ++++++++++++++++++++++++++++++++ test/CodeGen/X86/avx2-shuffle.ll | 18 +++++++++++++++++ test/CodeGen/X86/sse41-blend.ll | 18 +++++++++++++++++ 3 files changed, 69 insertions(+) diff --git a/test/CodeGen/X86/avx-blend.ll b/test/CodeGen/X86/avx-blend.ll index e21c7a07e8bd..a66dc549b484 100644 --- a/test/CodeGen/X86/avx-blend.ll +++ b/test/CodeGen/X86/avx-blend.ll @@ -158,3 +158,36 @@ define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) + +;; 4 tests for shufflevectors that optimize to blend + immediate +; CHECK-LABEL: @blend_shufflevector_4xfloat +define <4 x float> @blend_shufflevector_4xfloat(<4 x float> %a, <4 x float> %b) { +; CHECK: vblendps +; CHECK: ret + %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %1 +} + +; CHECK-LABEL: @blend_shufflevector_8xfloat +define <8 x float> @blend_shufflevector_8xfloat(<8 x float> %a, <8 x float> %b) { +; CHECK: vblendps +; CHECK: ret + %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %1 +} + +; CHECK-LABEL: @blend_shufflevector_4xdouble +define <4 x double> @blend_shufflevector_4xdouble(<4 x double> %a, <4 x double> %b) { +; CHECK: vblendpd +; CHECK: ret + %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> + ret <4 x double> %1 +} + +; CHECK-LABEL: @blend_shufflevector_4xi64 +define <4 x i64> @blend_shufflevector_4xi64(<4 x i64> %a, <4 x i64> %b) { +; CHECK: vblendpd +; CHECK: ret + %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %1 +} diff --git a/test/CodeGen/X86/avx2-shuffle.ll b/test/CodeGen/X86/avx2-shuffle.ll index 0e6dd297f8df..5b2e7a40bdb9 100644 --- a/test/CodeGen/X86/avx2-shuffle.ll +++ b/test/CodeGen/X86/avx2-shuffle.ll @@ -60,6 +60,24 @@ define <4 x i64> @blend_test4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline ret <4 x i64> %t } +;; 2 tests for shufflevectors that optimize to blend + immediate +; CHECK-LABEL: @blend_test5 +; CHECK: vpblendd +; CHECK: ret +define <4 x i32> @blend_test5(<4 x i32> %a, <4 x i32> %b) { + %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %1 +} + +; CHECK-LABEL: @blend_test6 +; CHECK: vpblendw +; CHECK: ret +define <16 x i16> @blend_test6(<16 x i16> %a, <16 x i16> %b) { + %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %1 +} + ; CHECK: vpshufhw $27, %ymm define <16 x i16> @vpshufhw(<16 x i16> %src1) nounwind uwtable readnone ssp { entry: diff --git a/test/CodeGen/X86/sse41-blend.ll b/test/CodeGen/X86/sse41-blend.ll index 8ad79877c8e6..ca106e158e74 100644 --- a/test/CodeGen/X86/sse41-blend.ll +++ b/test/CodeGen/X86/sse41-blend.ll @@ -117,6 +117,24 @@ define <16 x i8> @constant_pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd) { %1 = select <16 x i1> , <16 x i8> %xyzw, <16 x i8> %abcd ret <16 x i8> %1 } + declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) + +;; 2 tests for shufflevectors that optimize to blend + immediate +; CHECK-LABEL: @blend_shufflevector_4xfloat +; CHECK: blendps +; CHECK: ret +define <4 x float> @blend_shufflevector_4xfloat(<4 x float> %a, <4 x float> %b) { + %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %1 +} + +; CHECK-LABEL: @blend_shufflevector_8xi16 +; CHECK: pblendw +; CHECK: ret +define <8 x i16> @blend_shufflevector_8xi16(<8 x i16> %a, <8 x i16> %b) { + %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %1 +} From 06413cd0f0aafb56b86ec8f7ab44328ca49f1aeb Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Thu, 29 May 2014 22:10:01 +0000 Subject: [PATCH 248/906] LoopVectorizer: Add a check that the backedge taken count + 1 does not overflow The loop vectorizer instantiates be-taken-count + 1 as the loop iteration count. If this expression overflows the generated code was invalid. In case of overflow the code now jumps to the scalar loop. Fixes PR17288. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209854 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 129 +++++++++++++----- test/Transforms/LoopVectorize/induction.ll | 27 ++++ .../LoopVectorize/runtime-check-readonly.ll | 1 + 3 files changed, 123 insertions(+), 34 deletions(-) diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 34d8a1053fa1..ba2b7eea363f 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1909,20 +1909,23 @@ void InnerLoopVectorizer::createEmptyLoop() { the vectorized instructions while the old loop will continue to run the scalar remainder. - [ ] <-- vector loop bypass (may consist of multiple blocks). - / | - / v - | [ ] <-- vector pre header. - | | - | v - | [ ] \ - | [ ]_| <-- vector loop. - | | - \ v - >[ ] <--- middle-block. - / | - / v - | [ ] <--- new preheader. + [ ] <-- Back-edge taken count overflow check. + / | + / v + | [ ] <-- vector loop bypass (may consist of multiple blocks). + | / | + | / v + || [ ] <-- vector pre header. + || | + || v + || [ ] \ + || [ ]_| <-- vector loop. + || | + | \ v + | >[ ] <--- middle-block. + | / | + | / v + -|- >[ ] <--- new preheader. | | | v | [ ] \ @@ -1936,6 +1939,7 @@ void InnerLoopVectorizer::createEmptyLoop() { BasicBlock *OldBasicBlock = OrigLoop->getHeader(); BasicBlock *BypassBlock = OrigLoop->getLoopPreheader(); BasicBlock *ExitBlock = OrigLoop->getExitBlock(); + assert(BypassBlock && "Invalid loop structure"); assert(ExitBlock && "Must have an exit block"); // Some loops have a single integer induction variable, while other loops @@ -1958,15 +1962,31 @@ void InnerLoopVectorizer::createEmptyLoop() { IdxTy->getPrimitiveSizeInBits()) ExitCount = SE->getTruncateOrNoop(ExitCount, IdxTy); - ExitCount = SE->getNoopOrZeroExtend(ExitCount, IdxTy); + const SCEV *BackedgeTakeCount = SE->getNoopOrZeroExtend(ExitCount, IdxTy); // Get the total trip count from the count by adding 1. - ExitCount = SE->getAddExpr(ExitCount, - SE->getConstant(ExitCount->getType(), 1)); + ExitCount = SE->getAddExpr(BackedgeTakeCount, + SE->getConstant(BackedgeTakeCount->getType(), 1)); // Expand the trip count and place the new instructions in the preheader. // Notice that the pre-header does not change, only the loop body. SCEVExpander Exp(*SE, "induction"); + // We need to test whether the backedge-taken count is uint##_max. Adding one + // to it will cause overflow and an incorrect loop trip count in the vector + // body. In case of overflow we want to directly jump to the scalar remainder + // loop. + Value *BackedgeCount = + Exp.expandCodeFor(BackedgeTakeCount, BackedgeTakeCount->getType(), + BypassBlock->getTerminator()); + if (BackedgeCount->getType()->isPointerTy()) + BackedgeCount = CastInst::CreatePointerCast(BackedgeCount, IdxTy, + "backedge.ptrcnt.to.int", + BypassBlock->getTerminator()); + Instruction *CheckBCOverflow = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, BackedgeCount, + Constant::getAllOnesValue(BackedgeCount->getType()), + "backedge.overflow", BypassBlock->getTerminator()); + // Count holds the overall loop count (N). Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(), BypassBlock->getTerminator()); @@ -1980,7 +2000,6 @@ void InnerLoopVectorizer::createEmptyLoop() { IdxTy): ConstantInt::get(IdxTy, 0); - assert(BypassBlock && "Invalid loop structure"); LoopBypassBlocks.push_back(BypassBlock); // Split the single block loop into the two loop structure described above. @@ -2054,24 +2073,39 @@ void InnerLoopVectorizer::createEmptyLoop() { BasicBlock *LastBypassBlock = BypassBlock; + // Generate code to check that the loops trip count that we computed by adding + // one to the backedge-taken count will not overflow. + { + auto PastOverflowCheck = std::next(BasicBlock::iterator(CheckBCOverflow)); + BasicBlock *CheckBlock = + LastBypassBlock->splitBasicBlock(PastOverflowCheck, "overflow.checked"); + if (ParentLoop) + ParentLoop->addBasicBlockToLoop(CheckBlock, LI->getBase()); + LoopBypassBlocks.push_back(CheckBlock); + Instruction *OldTerm = LastBypassBlock->getTerminator(); + BranchInst::Create(ScalarPH, CheckBlock, CheckBCOverflow, OldTerm); + OldTerm->eraseFromParent(); + LastBypassBlock = CheckBlock; + } + // Generate the code to check that the strides we assumed to be one are really // one. We want the new basic block to start at the first instruction in a // sequence of instructions that form a check. Instruction *StrideCheck; Instruction *FirstCheckInst; std::tie(FirstCheckInst, StrideCheck) = - addStrideCheck(BypassBlock->getTerminator()); + addStrideCheck(LastBypassBlock->getTerminator()); if (StrideCheck) { // Create a new block containing the stride check. BasicBlock *CheckBlock = - BypassBlock->splitBasicBlock(FirstCheckInst, "vector.stridecheck"); + LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.stridecheck"); if (ParentLoop) ParentLoop->addBasicBlockToLoop(CheckBlock, LI->getBase()); LoopBypassBlocks.push_back(CheckBlock); // Replace the branch into the memory check block with a conditional branch // for the "few elements case". - Instruction *OldTerm = BypassBlock->getTerminator(); + Instruction *OldTerm = LastBypassBlock->getTerminator(); BranchInst::Create(MiddleBlock, CheckBlock, Cmp, OldTerm); OldTerm->eraseFromParent(); @@ -2134,6 +2168,19 @@ void InnerLoopVectorizer::createEmptyLoop() { PHINode::Create(OrigPhi->getType(), 2, "trunc.resume.val", MiddleBlock->getTerminator()) : nullptr; + // Create phi nodes to merge from the backedge-taken check block. + PHINode *BCResumeVal = PHINode::Create(ResumeValTy, 3, "bc.resume.val", + ScalarPH->getTerminator()); + BCResumeVal->addIncoming(ResumeVal, MiddleBlock); + + PHINode *BCTruncResumeVal = nullptr; + if (OrigPhi == OldInduction) { + BCTruncResumeVal = + PHINode::Create(OrigPhi->getType(), 2, "bc.trunc.resume.val", + ScalarPH->getTerminator()); + BCTruncResumeVal->addIncoming(TruncResumeVal, MiddleBlock); + } + Value *EndValue = nullptr; switch (II.IK) { case LoopVectorizationLegality::IK_NoInduction: @@ -2150,10 +2197,12 @@ void InnerLoopVectorizer::createEmptyLoop() { BypassBuilder.CreateTrunc(IdxEndRoundDown, OrigPhi->getType()); // The new PHI merges the original incoming value, in case of a bypass, // or the value at the end of the vectorized loop. - for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) + for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) TruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]); TruncResumeVal->addIncoming(EndValue, VecBody); + BCTruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[0]); + // We know what the end value is. EndValue = IdxEndRoundDown; // We also know which PHI node holds it. @@ -2199,7 +2248,7 @@ void InnerLoopVectorizer::createEmptyLoop() { // The new PHI merges the original incoming value, in case of a bypass, // or the value at the end of the vectorized loop. - for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) { + for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) { if (OrigPhi == OldInduction) ResumeVal->addIncoming(StartIdx, LoopBypassBlocks[I]); else @@ -2209,11 +2258,16 @@ void InnerLoopVectorizer::createEmptyLoop() { // Fix the scalar body counter (PHI node). unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH); - // The old inductions phi node in the scalar body needs the truncated value. - if (OrigPhi == OldInduction) - OrigPhi->setIncomingValue(BlockIdx, TruncResumeVal); - else - OrigPhi->setIncomingValue(BlockIdx, ResumeVal); + + // The old induction's phi node in the scalar body needs the truncated + // value. + if (OrigPhi == OldInduction) { + BCResumeVal->addIncoming(StartIdx, LoopBypassBlocks[0]); + OrigPhi->setIncomingValue(BlockIdx, BCTruncResumeVal); + } else { + BCResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[0]); + OrigPhi->setIncomingValue(BlockIdx, BCResumeVal); + } } // If we are generating a new induction variable then we also need to @@ -2224,7 +2278,7 @@ void InnerLoopVectorizer::createEmptyLoop() { assert(!ResumeIndex && "Unexpected resume value found"); ResumeIndex = PHINode::Create(IdxTy, 2, "new.indc.resume.val", MiddleBlock->getTerminator()); - for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) + for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) ResumeIndex->addIncoming(StartIdx, LoopBypassBlocks[I]); ResumeIndex->addIncoming(IdxEndRoundDown, VecBody); } @@ -2494,7 +2548,7 @@ void InnerLoopVectorizer::vectorizeLoop() { // To do so, we need to generate the 'identity' vector and override // one of the elements with the incoming scalar reduction. We need // to do it in the vector-loop preheader. - Builder.SetInsertPoint(LoopBypassBlocks.front()->getTerminator()); + Builder.SetInsertPoint(LoopBypassBlocks[1]->getTerminator()); // This is the vector-clone of the value that leaves the loop. VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr); @@ -2568,7 +2622,7 @@ void InnerLoopVectorizer::vectorizeLoop() { VectorParts &RdxExitVal = getVectorValue(RdxDesc.LoopExitInstr); PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi"); Value *StartVal = (part == 0) ? VectorStart : Identity; - for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) + for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) NewPhi->addIncoming(StartVal, LoopBypassBlocks[I]); NewPhi->addIncoming(RdxExitVal[part], LoopVectorBody.back()); @@ -2626,6 +2680,13 @@ void InnerLoopVectorizer::vectorizeLoop() { Builder.getInt32(0)); } + // Create a phi node that merges control-flow from the backedge-taken check + // block and the middle block. + PHINode *BCBlockPhi = PHINode::Create(RdxPhi->getType(), 2, "bc.merge.rdx", + LoopScalarPreHeader->getTerminator()); + BCBlockPhi->addIncoming(RdxDesc.StartValue, LoopBypassBlocks[0]); + BCBlockPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock); + // Now, we need to fix the users of the reduction variable // inside and outside of the scalar remainder loop. // We know that the loop is in LCSSA form. We need to update the @@ -2655,7 +2716,7 @@ void InnerLoopVectorizer::vectorizeLoop() { assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index"); // Pick the other block. int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); - (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, ReducedPartRdx); + (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi); (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr); }// end of for each redux variable. @@ -3112,8 +3173,8 @@ void InnerLoopVectorizer::updateAnalysis() { } } - DT->addNewBlock(LoopMiddleBlock, LoopBypassBlocks.front()); - DT->addNewBlock(LoopScalarPreHeader, LoopMiddleBlock); + DT->addNewBlock(LoopMiddleBlock, LoopBypassBlocks[1]); + DT->addNewBlock(LoopScalarPreHeader, LoopBypassBlocks[0]); DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader); DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock); diff --git a/test/Transforms/LoopVectorize/induction.ll b/test/Transforms/LoopVectorize/induction.ll index ad2c663ce461..3102ec59bd41 100644 --- a/test/Transforms/LoopVectorize/induction.ll +++ b/test/Transforms/LoopVectorize/induction.ll @@ -108,3 +108,30 @@ define i32 @i16_loop() nounwind readnone ssp uwtable { ;